{ "cells": [ { "cell_type": "code", "execution_count": null, "id": "3ba06289-d17a-4ccd-85f5-2b79956d4e59", "metadata": {}, "outputs": [], "source": [ "!pip install selenium" ] }, { "cell_type": "code", "execution_count": null, "id": "cb6636be-e43f-4896-aadd-cafda003ed4e", "metadata": {}, "outputs": [], "source": [ "!pip install -q -U google-genai" ] }, { "cell_type": "code", "execution_count": null, "id": "dfe66209-1d33-4292-80f1-20e11baf4bc3", "metadata": {}, "outputs": [], "source": [ "from selenium import webdriver\n", "from selenium.webdriver.chrome.options import Options\n", "from selenium.webdriver.chrome.service import Service\n", "from bs4 import BeautifulSoup\n", "import time\n", "import os\n", "from dotenv import load_dotenv\n", "from IPython.display import Markdown, display\n", "from google import genai\n", "from google.genai import types\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "id": "f2b4306c-17d0-46fe-a889-7440ff809dc6", "metadata": {}, "outputs": [], "source": [ "\n", "#load env\n", "load_dotenv(override=True)\n", "api_key = os.getenv('GEMINI_API_KEY')\n", "\n", "# Check the key\n", "\n", "if not api_key:\n", " print(\"No API key was found - please head over to the troubleshooting notebook in this folder to identify & fix!\")\n", "elif api_key.strip() != api_key:\n", " print(\"An API key was found, but it looks like it might have space or tab characters at the start or end - please remove them - see troubleshooting notebook\")\n", "else:\n", " print(\"API key found and looks good so far!\")" ] }, { "cell_type": "markdown", "id": "08ec6fec-886c-4a0c-a046-e8643ad700d3", "metadata": {}, "source": [ "# Lets make a simple call for check our model is working fine or not" ] }, { "cell_type": "code", "execution_count": null, "id": "89143d5c-0013-4f7e-8e1f-f7db7e936f0d", "metadata": {}, "outputs": [], "source": [ "client = genai.Client(api_key=api_key)\n" ] }, { "cell_type": "code", "execution_count": null, "id": "1144b77a-6785-479a-ab4f-bb0ab5624b49", "metadata": {}, "outputs": [], "source": [ "\n", "response = client.models.generate_content(\n", " model=\"gemini-2.5-flash-preview-05-20\",\n", " contents=[\"hi gemini\"]\n", ")\n", "print(response.text)\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "id": "bbf3836c-19b8-44e1-904a-f265925c2786", "metadata": {}, "outputs": [], "source": [ "\n", "class Website:\n", " def __init__(self, url, driver_path=None, wait_time=3):\n", " self.url = url\n", " self.wait_time = wait_time\n", "\n", " # Headless Chrome settings\n", " options = Options()\n", " # options.add_argument(\"--headless\") \n", " # Headless mode runs the browser in the background (invisible).\n", " # However, some websites (like openai.com) block headless browsers.\n", " # So if this line is active, the page may not load correctly and you may not get the full content.\n", " options.add_argument(\"--disable-gpu\")\n", " options.add_argument(\"--no-sandbox\")\n", " options.add_argument(\"--window-size=1920x1080\")\n", "\n", " # Driver path\n", " if driver_path:\n", " service = Service(executable_path=driver_path)\n", " else:\n", " service = Service() \n", "\n", " # Start browser\n", " driver = webdriver.Chrome(service=service, options=options)\n", " driver.get(url)\n", "\n", " # Wait for the loading page\n", " time.sleep(self.wait_time)\n", "\n", " # Take page source\n", " html = driver.page_source\n", " driver.quit()\n", "\n", " # Analysis with BeautifulSoup \n", " soup = BeautifulSoup(html, 'html.parser')\n", " self.title = soup.title.string if soup.title else \"No title found\"\n", "\n", " # Clean irrelevant tags\n", " for irrelevant in soup.body([\"script\", \"style\", \"img\", \"input\"]):\n", " irrelevant.decompose()\n", "\n", " self.text = soup.body.get_text(separator=\"\\n\", strip=True)" ] }, { "cell_type": "code", "execution_count": null, "id": "852c52e2-bd4d-4bb9-94ef-e498c33f1a89", "metadata": {}, "outputs": [], "source": [ "system_prompt = \"\"\"You are an academic research assistant specialized in summarizing scholarly papers. Follow this workflow rigorously:\n", "\n", "Step 1: Document Verification\n", "Verify if the input is a research paper by checking for:\n", "\n", "Presence of academic sections (Abstract, Introduction, Methodology, Results, Discussion, References)\n", "\n", "Technical/scholarly language\n", "\n", "Citations (in-text or bibliography)\n", "\n", "Research claims or data analysis\n", "If NOT a research paper:\n", "→ Respond: \"This doesn't appear to be a research paper. Please upload peer-reviewed academic literature for summarization.\"\n", "\n", "Step 2: Structured Summary (If verified)\n", "Generate a 5-section summary in this exact format:\n", "\n", "1. Research Question\n", "[Identify core problem/gap addressed in 1 sentence]\n", "\n", "2. Methodology\n", "[Study design, data sources, analytical techniques in 2 bullet points]\n", "\n", "3. Key Findings\n", "[3-4 quantified results with numerical evidence from tables/figures]\n", "\n", "4. Limitations\n", "[2 major constraints acknowledged by authors]\n", "\n", "5. Significance\n", "[Impact on field & practical implications in 1 sentence]\n", "\n", "Critical Rules:\n", "Accuracy Priority: Never invent data. Write \"Not specified\" for missing elements\n", "\n", "Source Anchoring: Cite page/paragraph numbers for claims (e.g., \"Fig 3 shows 24% improvement\")\n", "\n", "Jargon Handling: Simplify complex terms using: [Technical Term → Layman Explanation] inline\n", "\n", "Bias Alert: Flag any undeclared funding/sponsorship conflicts\n", "\n", "Output Format: Strict Markdown with section headers, 200-word maximum\n", "\n", "Example Output:\n", "1. Research Question\n", "How does microplastic concentration affect zebrafish neural development?\n", "\n", "2. Methodology\n", "\n", "Exposed embryos to 0.1-10μm PET particles (5-100mg/L) for 96h\n", "\n", "Quantified gene expression (RT-qPCR) and behavioral assays (Open Field Test)\n", "\n", "3. Key Findings\n", "▲ 40% reduction in neuron count at 50mg/L exposure (p<0.01, Fig 2B)\n", "■ 2.3x increase in anxiolytic behavior (Table 3)\n", "▼ 17% downregulation in shha expression (p=0.03)\n", "\n", "4. Limitations\n", " \n", "Used static exposure vs dynamic aquatic environments\n", "\n", "Limited proteomic validation\n", "\n", "5. Significance\n", "Establishes dose-dependent neurotoxicity thresholds for aquatic toxicology regulations.\"\"\"" ] }, { "cell_type": "code", "execution_count": null, "id": "7620c685-c35c-4d6b-aaf1-a3da98f19ca7", "metadata": {}, "outputs": [], "source": [ "# A function that writes a User Prompt that asks for summaries of websites:\n", "\n", "def user_prompt_for(website):\n", " user_prompt = f\"You are looking at a website titled {website.title}\"\n", " user_prompt += \"\\nThe contents of this website is as follows; \\\n", "please provide a summary of this website in markdown.\\n\\n\"\n", " user_prompt += website.text\n", " return user_prompt" ] }, { "cell_type": "code", "execution_count": null, "id": "a4257406-089b-45a3-bfb5-272004360a49", "metadata": {}, "outputs": [], "source": [ "def summarize(url):\n", " website = Website(url)\n", " response = client.models.generate_content(\n", " model=\"gemini-2.5-flash-preview-05-20\",\n", " config=types.GenerateContentConfig(\n", " system_instruction=system_prompt),\n", " contents=user_prompt_for(website)\n", " )\n", "\n", " return response.text\n" ] }, { "cell_type": "code", "execution_count": null, "id": "f68b32ae-9e65-4aa4-ae8d-cc2482c4a2e2", "metadata": {}, "outputs": [], "source": [ "def display_summary(url):\n", " summary = summarize(url)\n", " display(Markdown(summary))\n", " " ] }, { "cell_type": "code", "execution_count": null, "id": "ae52543c-01c1-4262-b53c-95ef4e5a93aa", "metadata": {}, "outputs": [], "source": [ "display_summary(\"https://onlinelibrary.wiley.com/doi/full/10.1155/2021/8812542\")" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.13" } }, "nbformat": 4, "nbformat_minor": 5 }