From a5dceceeed46ebca65b24362f461a9fb67e08ef6 Mon Sep 17 00:00:00 2001 From: Rob Godbey Date: Fri, 24 Oct 2025 10:38:16 -0400 Subject: [PATCH 1/2] Adding my Lab2 --- .../wk1-day2-RBG-all-sites-ollama.ipynb | 227 ++++++++++++++++++ 1 file changed, 227 insertions(+) create mode 100644 community-contributions/wk1-day2-RBG-all-sites-ollama.ipynb diff --git a/community-contributions/wk1-day2-RBG-all-sites-ollama.ipynb b/community-contributions/wk1-day2-RBG-all-sites-ollama.ipynb new file mode 100644 index 0000000..a1204f7 --- /dev/null +++ b/community-contributions/wk1-day2-RBG-all-sites-ollama.ipynb @@ -0,0 +1,227 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "d15d8294-3328-4e07-ad16-8a03e9bbfdb9", + "metadata": {}, + "source": [ + "# Lab2: Local Open Source on My PC Project\n", + "## Summarize All Websites without Selenium Using Open Source Models\n", + "This builds on my app from yesterday using Jina (https://jina.ai/reader) to turn all websites into markdown before summarizing by an LLM. And it uses Ollama to store open source LLMs on my PC to run things locally (jina is not local, so to be totally local you might need to go back to Selenium to do JavaScript sites).\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4e2a9393-7767-488e-a8bf-27c12dca35bd", + "metadata": {}, + "outputs": [], + "source": [ + "# imports\n", + "\n", + "import os\n", + "import requests\n", + "from dotenv import load_dotenv\n", + "from IPython.display import Markdown, display\n", + "from openai import OpenAI\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7b87cadb-d513-4303-baee-a37b6f938e4d", + "metadata": {}, + "outputs": [], + "source": [ + "# Setup access to the Ollama models\n", + "\n", + "OLLAMA_BASE_URL = \"http://localhost:11434/v1\"\n", + "\n", + "ollama = OpenAI(base_url=OLLAMA_BASE_URL, api_key='ollama')\n", + "\n", + "omodel = \"llama3.2\"\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f0275b1b-7cfe-4f9d-abfa-7650d378da0c", + "metadata": {}, + "outputs": [], + "source": [ + "# Step 1-a: Define the user prompt\n", + "\n", + "user_prompt_prefix = \"\"\"\n", + "Here are the contents of a website.\n", + "Provide a short summary of this website.\n", + "If it includes news or announcements, then summarize these too.\n", + "Make recommendations for improvement\n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "abdb8417-c5dc-44bc-9bee-2e059d162699", + "metadata": {}, + "outputs": [], + "source": [ + "# Step 1-b: Define the system prompt\n", + "\n", + "system_prompt = \"\"\"You are to act like a smart Mckinsey Consultant specializing in website analysis. \n", + "1) You should provide a short, clear, summary, ignoring text that might be navigation related.\n", + "2) Follow the summary by making recommendations for improving the website so it is better at serving its purpose.\n", + "3) Follow industry frameworks for reponses always give simple answers and stick to the point.\n", + "4) If possible try to group you recommendations, for example Grammar and Style, Clarity, Functional, etc.\n", + "5) Give confidence scores with every recommendation.\n", + "6) Always provide a summary of the website, explaining what it is.\n", + "7) if you do not understand the website's purpose or have no improvement recommendations, give out an error message along the lines of more data required for analysis or ask a follow up question.\n", + "8) Respond in markdown. Do not wrap the markdown in a code block - respond just with the markdown.\"\"\"\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0134dfa4-8299-48b5-b444-f2a8c3403c88", + "metadata": {}, + "outputs": [], + "source": [ + "# Add the website content to the user prompt\n", + "\n", + "def messages_for(website):\n", + " return [\n", + " {\"role\": \"system\", \"content\": system_prompt},\n", + " {\"role\": \"user\", \"content\": user_prompt_prefix + website}\n", + " ]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2ef960cf-6dc2-4cda-afb3-b38be12f4c97", + "metadata": {}, + "outputs": [], + "source": [ + "# Step 5: Change the content utility to use jina\n", + "\n", + "def fetch_url_content(url):\n", + " jina_reader_url = f\"https://r.jina.ai/{url}\"\n", + " try:\n", + " response = requests.get(jina_reader_url)\n", + " response.raise_for_status() # Raise an exception for HTTP errors\n", + " return response.text\n", + " except requests.exceptions.RequestException as e:\n", + " print(f\"Error fetching URL: {e}\")\n", + " return None\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "905b9919-aba7-45b5-ae65-81b3d1d78e34", + "metadata": {}, + "outputs": [], + "source": [ + "# Step 3: Call Ollama model & Step 4: print the result\n", + "\n", + "def summarize(url):\n", + " website = fetch_url_content(url)\n", + " response = ollama.chat.completions.create(\n", + " model = omodel,\n", + " messages = messages_for(website)\n", + " )\n", + " summary = response.choices[0].message.content\n", + " return display(Markdown(summary))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "05e38d41-dfa4-4b20-9c96-c46ea75d9fb5", + "metadata": {}, + "outputs": [], + "source": [ + "omodel = \"llama3.2\"\n", + "summarize(\"https://edwarddonner.com\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "75df7e70", + "metadata": {}, + "outputs": [], + "source": [ + "omodel = \"deepseek-r1:1.5b\"\n", + "summarize(\"https://edwarddonner.com\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "45d83403-a24c-44b5-84ac-961449b4008f", + "metadata": {}, + "outputs": [], + "source": [ + "omodel = \"llama3.2\"\n", + "summarize(\"https://cnn.com\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "be133029", + "metadata": {}, + "outputs": [], + "source": [ + "omodel = \"deepseek-r1:1.5b\"\n", + "summarize(\"https://cnn.com\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "75e9fd40-b354-4341-991e-863ef2e59db7", + "metadata": {}, + "outputs": [], + "source": [ + "omodel = \"llama3.2\"\n", + "summarize(\"https://openai.com\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a8d1a0ed", + "metadata": {}, + "outputs": [], + "source": [ + "omodel = \"deepseek-r1:1.5b\"\n", + "summarize(\"https://openai.com\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From f13496e1f0da6a00d4d4ca3d446e4e2c1847a97e Mon Sep 17 00:00:00 2001 From: Rob Godbey Date: Fri, 24 Oct 2025 10:58:29 -0400 Subject: [PATCH 2/2] Still trying to add Lab2 to Community Contributions from Cursor --- community-contributions/wk1-day2-RBG-all-sites-ollama.ipynb | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/community-contributions/wk1-day2-RBG-all-sites-ollama.ipynb b/community-contributions/wk1-day2-RBG-all-sites-ollama.ipynb index a1204f7..bf777f6 100644 --- a/community-contributions/wk1-day2-RBG-all-sites-ollama.ipynb +++ b/community-contributions/wk1-day2-RBG-all-sites-ollama.ipynb @@ -39,9 +39,7 @@ "\n", "OLLAMA_BASE_URL = \"http://localhost:11434/v1\"\n", "\n", - "ollama = OpenAI(base_url=OLLAMA_BASE_URL, api_key='ollama')\n", - "\n", - "omodel = \"llama3.2\"\n" + "ollama = OpenAI(base_url=OLLAMA_BASE_URL, api_key='ollama')\n" ] }, {