diff --git a/community-contributions/wk1-day2-RBG-all-sites-ollama.ipynb b/community-contributions/wk1-day2-RBG-all-sites-ollama.ipynb new file mode 100644 index 0000000..bf777f6 --- /dev/null +++ b/community-contributions/wk1-day2-RBG-all-sites-ollama.ipynb @@ -0,0 +1,225 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "d15d8294-3328-4e07-ad16-8a03e9bbfdb9", + "metadata": {}, + "source": [ + "# Lab2: Local Open Source on My PC Project\n", + "## Summarize All Websites without Selenium Using Open Source Models\n", + "This builds on my app from yesterday using Jina (https://jina.ai/reader) to turn all websites into markdown before summarizing by an LLM. And it uses Ollama to store open source LLMs on my PC to run things locally (jina is not local, so to be totally local you might need to go back to Selenium to do JavaScript sites).\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4e2a9393-7767-488e-a8bf-27c12dca35bd", + "metadata": {}, + "outputs": [], + "source": [ + "# imports\n", + "\n", + "import os\n", + "import requests\n", + "from dotenv import load_dotenv\n", + "from IPython.display import Markdown, display\n", + "from openai import OpenAI\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7b87cadb-d513-4303-baee-a37b6f938e4d", + "metadata": {}, + "outputs": [], + "source": [ + "# Setup access to the Ollama models\n", + "\n", + "OLLAMA_BASE_URL = \"http://localhost:11434/v1\"\n", + "\n", + "ollama = OpenAI(base_url=OLLAMA_BASE_URL, api_key='ollama')\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f0275b1b-7cfe-4f9d-abfa-7650d378da0c", + "metadata": {}, + "outputs": [], + "source": [ + "# Step 1-a: Define the user prompt\n", + "\n", + "user_prompt_prefix = \"\"\"\n", + "Here are the contents of a website.\n", + "Provide a short summary of this website.\n", + "If it includes news or announcements, then summarize these too.\n", + "Make recommendations for improvement\n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "abdb8417-c5dc-44bc-9bee-2e059d162699", + "metadata": {}, + "outputs": [], + "source": [ + "# Step 1-b: Define the system prompt\n", + "\n", + "system_prompt = \"\"\"You are to act like a smart Mckinsey Consultant specializing in website analysis. \n", + "1) You should provide a short, clear, summary, ignoring text that might be navigation related.\n", + "2) Follow the summary by making recommendations for improving the website so it is better at serving its purpose.\n", + "3) Follow industry frameworks for reponses always give simple answers and stick to the point.\n", + "4) If possible try to group you recommendations, for example Grammar and Style, Clarity, Functional, etc.\n", + "5) Give confidence scores with every recommendation.\n", + "6) Always provide a summary of the website, explaining what it is.\n", + "7) if you do not understand the website's purpose or have no improvement recommendations, give out an error message along the lines of more data required for analysis or ask a follow up question.\n", + "8) Respond in markdown. Do not wrap the markdown in a code block - respond just with the markdown.\"\"\"\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0134dfa4-8299-48b5-b444-f2a8c3403c88", + "metadata": {}, + "outputs": [], + "source": [ + "# Add the website content to the user prompt\n", + "\n", + "def messages_for(website):\n", + " return [\n", + " {\"role\": \"system\", \"content\": system_prompt},\n", + " {\"role\": \"user\", \"content\": user_prompt_prefix + website}\n", + " ]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2ef960cf-6dc2-4cda-afb3-b38be12f4c97", + "metadata": {}, + "outputs": [], + "source": [ + "# Step 5: Change the content utility to use jina\n", + "\n", + "def fetch_url_content(url):\n", + " jina_reader_url = f\"https://r.jina.ai/{url}\"\n", + " try:\n", + " response = requests.get(jina_reader_url)\n", + " response.raise_for_status() # Raise an exception for HTTP errors\n", + " return response.text\n", + " except requests.exceptions.RequestException as e:\n", + " print(f\"Error fetching URL: {e}\")\n", + " return None\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "905b9919-aba7-45b5-ae65-81b3d1d78e34", + "metadata": {}, + "outputs": [], + "source": [ + "# Step 3: Call Ollama model & Step 4: print the result\n", + "\n", + "def summarize(url):\n", + " website = fetch_url_content(url)\n", + " response = ollama.chat.completions.create(\n", + " model = omodel,\n", + " messages = messages_for(website)\n", + " )\n", + " summary = response.choices[0].message.content\n", + " return display(Markdown(summary))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "05e38d41-dfa4-4b20-9c96-c46ea75d9fb5", + "metadata": {}, + "outputs": [], + "source": [ + "omodel = \"llama3.2\"\n", + "summarize(\"https://edwarddonner.com\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "75df7e70", + "metadata": {}, + "outputs": [], + "source": [ + "omodel = \"deepseek-r1:1.5b\"\n", + "summarize(\"https://edwarddonner.com\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "45d83403-a24c-44b5-84ac-961449b4008f", + "metadata": {}, + "outputs": [], + "source": [ + "omodel = \"llama3.2\"\n", + "summarize(\"https://cnn.com\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "be133029", + "metadata": {}, + "outputs": [], + "source": [ + "omodel = \"deepseek-r1:1.5b\"\n", + "summarize(\"https://cnn.com\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "75e9fd40-b354-4341-991e-863ef2e59db7", + "metadata": {}, + "outputs": [], + "source": [ + "omodel = \"llama3.2\"\n", + "summarize(\"https://openai.com\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a8d1a0ed", + "metadata": {}, + "outputs": [], + "source": [ + "omodel = \"deepseek-r1:1.5b\"\n", + "summarize(\"https://openai.com\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}