LLM_Engineering_OLD/week1/community-contributions/day2-webpage-summarizer-ollama-gemini.ipynb

{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "1faf8b29-2ba6-40c7-89ee-71f71e234f11",
   "metadata": {},
   "source": [
    "## Extra requirements\n",
    "```bash\n",
    "pip install -q -U google-genai\n",
    "```\n",
    "\n",
    "## Required environment variable\n",
    "GEMINI_API_KEY\n",
    "\n",
    "### How to get GEMINI API KEY\n",
    "\n",
    "Use the link: [gemini api key](https://aistudio.google.com/app/apikey) to get yours."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "be06ce76-20ee-4066-9582-a4ed745f278f",
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import requests\n",
    "from bs4 import BeautifulSoup\n",
    "from dotenv import load_dotenv\n",
    "from google import genai\n",
    "from google.genai import types"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "99e42519-5dac-4b13-8a26-8a635753343b",
   "metadata": {},
   "outputs": [],
   "source": [
    "def gemini_invoke(website):\n",
    "    load_dotenv()\n",
    "    api_key = os.getenv(\"GEMINI_API_KEY\")\n",
    "    if not api_key or len(api_key) < 39:\n",
    "        print(\"No correct api key was found\")\n",
    "        return\n",
    "    else:\n",
    "        print(\"Api key found. Good to go!\")\n",
    "        client = genai.Client(api_key=api_key)\n",
    "        response = client.models.generate_content(\n",
    "            model=\"gemini-2.0-flash\",\n",
    "            config=types.GenerateContentConfig(\n",
    "                system_instruction=system_prompt),\n",
    "                contents=user_prompt_for(website)\n",
    "            )\n",
    "        return response.text"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "95a6ece8-8402-4cad-96b9-36a6ea444c54",
   "metadata": {},
   "outputs": [],
   "source": [
    "class Website:\n",
    "    url: str\n",
    "    title: str\n",
    "    text: str\n",
    "\n",
    "    def __init__(self, url):\n",
    "        self.url = url\n",
    "        response = requests.get(url)\n",
    "        soup = BeautifulSoup(response.content, \"html.parser\")\n",
    "        self.title = soup.title.string if soup.title else \"No title was found\"\n",
    "\n",
    "        for irr in soup.body([\"script\", \"style\", \"img\", \"input\"]):\n",
    "            irr.decompose()\n",
    "        self.text = soup.body.get_text(separator=\"\\n\", strip=True)\n",
    "            "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "24bbd1dd-dca4-4bbc-ae91-4bad227a4278",
   "metadata": {},
   "outputs": [],
   "source": [
    "ed = Website(\"https://edwarddonner.com\")\n",
    "print(ed.title)\n",
    "print(ed.text)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "233b8904-7a4a-4265-8b0d-20934ae4b29c",
   "metadata": {},
   "outputs": [],
   "source": [
    "system_prompt = \"You are an assistant that analyzes the contents of a website \\\n",
    "and provides a short summary, ignoring text that navigation related. Respond \\\n",
    "in markdown.\"\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "5c996c03-84ab-4378-8a55-026d94404d35",
   "metadata": {},
   "outputs": [],
   "source": [
    "messages = [{\"role\": \"user\", \"content\": system_prompt}]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "abf9464e-dc8d-4099-aeb6-495498326673",
   "metadata": {},
   "outputs": [],
   "source": [
    "def user_prompt_for(website):\n",
    "    user_prompt = f\"You are looking at a website titled {website.title}\"\n",
    "    user_prompt += \"\\nThe contents of this website is as follows; \\\n",
    "please provide a short summary of this website in markdown. \\\n",
    "If it includes news or announcements, then summarize these too.\\n\\n\"\n",
    "    user_prompt += website.text\n",
    "    return user_prompt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "id": "32ab2d29-02d1-43c5-b920-f2621f292b23",
   "metadata": {},
   "outputs": [],
   "source": [
    "def summarize(url, model=\"gemini\"):\n",
    "    website = Website(url)\n",
    "    if model == \"ollama\":\n",
    "        import ollama\n",
    "        Model=\"llama3.2\"\n",
    "        messages[0][\"content\"] += f\" Website: {url}\"\n",
    "        response = ollama.chat(model=Model, messages=messages)\n",
    "        return response[\"message\"][\"content\"]\n",
    "    else:\n",
    "        return gemini_invoke(website)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a2a0e518-7198-489d-a0ce-2eec617f939f",
   "metadata": {},
   "outputs": [],
   "source": [
    "summarize(\"https://edwarddonner.com\", \"ollama\")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}