187 lines
5.1 KiB
Plaintext
187 lines
5.1 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "1faf8b29-2ba6-40c7-89ee-71f71e234f11",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Extra requirements\n",
|
|
"```bash\n",
|
|
"pip install -q -U google-genai\n",
|
|
"```\n",
|
|
"\n",
|
|
"## Required environment variable\n",
|
|
"GEMINI_API_KEY\n",
|
|
"\n",
|
|
"### How to get GEMINI API KEY\n",
|
|
"\n",
|
|
"Use the link: [gemini api key](https://aistudio.google.com/app/apikey) to get yours."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 12,
|
|
"id": "be06ce76-20ee-4066-9582-a4ed745f278f",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import os\n",
|
|
"import requests\n",
|
|
"from bs4 import BeautifulSoup\n",
|
|
"from dotenv import load_dotenv\n",
|
|
"from google import genai\n",
|
|
"from google.genai import types"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 13,
|
|
"id": "99e42519-5dac-4b13-8a26-8a635753343b",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def gemini_invoke(website):\n",
|
|
" load_dotenv()\n",
|
|
" api_key = os.getenv(\"GEMINI_API_KEY\")\n",
|
|
" if not api_key or len(api_key) < 39:\n",
|
|
" print(\"No correct api key was found\")\n",
|
|
" return\n",
|
|
" else:\n",
|
|
" print(\"Api key found. Good to go!\")\n",
|
|
" client = genai.Client(api_key=api_key)\n",
|
|
" response = client.models.generate_content(\n",
|
|
" model=\"gemini-2.0-flash\",\n",
|
|
" config=types.GenerateContentConfig(\n",
|
|
" system_instruction=system_prompt),\n",
|
|
" contents=user_prompt_for(website)\n",
|
|
" )\n",
|
|
" return response.text"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 14,
|
|
"id": "95a6ece8-8402-4cad-96b9-36a6ea444c54",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"class Website:\n",
|
|
" url: str\n",
|
|
" title: str\n",
|
|
" text: str\n",
|
|
"\n",
|
|
" def __init__(self, url):\n",
|
|
" self.url = url\n",
|
|
" response = requests.get(url)\n",
|
|
" soup = BeautifulSoup(response.content, \"html.parser\")\n",
|
|
" self.title = soup.title.string if soup.title else \"No title was found\"\n",
|
|
"\n",
|
|
" for irr in soup.body([\"script\", \"style\", \"img\", \"input\"]):\n",
|
|
" irr.decompose()\n",
|
|
" self.text = soup.body.get_text(separator=\"\\n\", strip=True)\n",
|
|
" "
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "24bbd1dd-dca4-4bbc-ae91-4bad227a4278",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"ed = Website(\"https://edwarddonner.com\")\n",
|
|
"print(ed.title)\n",
|
|
"print(ed.text)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 15,
|
|
"id": "233b8904-7a4a-4265-8b0d-20934ae4b29c",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"system_prompt = \"You are an assistant that analyzes the contents of a website \\\n",
|
|
"and provides a short summary, ignoring text that navigation related. Respond \\\n",
|
|
"in markdown.\"\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 16,
|
|
"id": "5c996c03-84ab-4378-8a55-026d94404d35",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"messages = [{\"role\": \"user\", \"content\": system_prompt}]"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 17,
|
|
"id": "abf9464e-dc8d-4099-aeb6-495498326673",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def user_prompt_for(website):\n",
|
|
" user_prompt = f\"You are looking at a website titled {website.title}\"\n",
|
|
" user_prompt += \"\\nThe contents of this website is as follows; \\\n",
|
|
"please provide a short summary of this website in markdown. \\\n",
|
|
"If it includes news or announcements, then summarize these too.\\n\\n\"\n",
|
|
" user_prompt += website.text\n",
|
|
" return user_prompt"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 20,
|
|
"id": "32ab2d29-02d1-43c5-b920-f2621f292b23",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def summarize(url, model=\"gemini\"):\n",
|
|
" website = Website(url)\n",
|
|
" if model == \"ollama\":\n",
|
|
" import ollama\n",
|
|
" Model=\"llama3.2\"\n",
|
|
" messages[0][\"content\"] += f\" Website: {url}\"\n",
|
|
" response = ollama.chat(model=Model, messages=messages)\n",
|
|
" return response[\"message\"][\"content\"]\n",
|
|
" else:\n",
|
|
" return gemini_invoke(website)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "a2a0e518-7198-489d-a0ce-2eec617f939f",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"summarize(\"https://edwarddonner.com\", \"ollama\")"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3 (ipykernel)",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.12.0"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 5
|
|
}
|