Merge pull request #491 from Zhufeng-Qiu/zephyr-week1-week2
Added my contributions to community-contributions of week1/week2
This commit is contained in:
271
week1/community-contributions/day1-dotabuff-summarization.ipynb
Normal file
271
week1/community-contributions/day1-dotabuff-summarization.ipynb
Normal file
@@ -0,0 +1,271 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "032a76d2-a112-4c49-bd32-fe6c87f6ec19",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Dota Game Assistant\n",
|
||||
"\n",
|
||||
"This script retrieves and summarizes information about a specified hero from `dotabuff.com` website"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "04b24159-55d1-4eaf-bc19-474cec71cc3b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install selenium\n",
|
||||
"!pip install webdriver-manager"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "14d26510-6613-4c1a-a346-159d906d111c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# imports\n",
|
||||
"\n",
|
||||
"import os\n",
|
||||
"import requests\n",
|
||||
"from dotenv import load_dotenv\n",
|
||||
"from bs4 import BeautifulSoup\n",
|
||||
"from IPython.display import Markdown, display\n",
|
||||
"from openai import OpenAI"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "f9c8ea1e-8881-4f50-953d-ca7f462d8a32",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Load environment variables in a file called .env\n",
|
||||
"\n",
|
||||
"load_dotenv(override=True)\n",
|
||||
"api_key = os.getenv('OPENAI_API_KEY')\n",
|
||||
"\n",
|
||||
"# Check the key\n",
|
||||
"\n",
|
||||
"if not api_key:\n",
|
||||
" print(\"No API key was found - please head over to the troubleshooting notebook in this folder to identify & fix!\")\n",
|
||||
"elif not api_key.startswith(\"sk-proj-\"):\n",
|
||||
" print(\"An API key was found, but it doesn't start sk-proj-; please check you're using the right key - see troubleshooting notebook\")\n",
|
||||
"elif api_key.strip() != api_key:\n",
|
||||
" print(\"An API key was found, but it looks like it might have space or tab characters at the start or end - please remove them - see troubleshooting notebook\")\n",
|
||||
"else:\n",
|
||||
" print(\"API key found and looks good so far!\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "02febcac-9a21-4322-b2ea-748972312165",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"openai = OpenAI()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "bb7dd822-962e-4b34-a743-c14809764e4a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# A class to represent a Webpage\n",
|
||||
"\n",
|
||||
"# Some websites need you to use proper headers when fetching them:\n",
|
||||
"headers = {\n",
|
||||
" \"User-Agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36\"\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"from selenium import webdriver\n",
|
||||
"from selenium.webdriver.chrome.service import Service\n",
|
||||
"from selenium.webdriver.chrome.options import Options\n",
|
||||
"from selenium.webdriver.common.by import By\n",
|
||||
"from selenium.webdriver.support.ui import WebDriverWait\n",
|
||||
"from selenium.webdriver.support import expected_conditions as EC\n",
|
||||
"from webdriver_manager.chrome import ChromeDriverManager\n",
|
||||
"from bs4 import BeautifulSoup\n",
|
||||
"\n",
|
||||
"class Website:\n",
|
||||
" def __init__(self, url, wait_time=10):\n",
|
||||
" \"\"\"\n",
|
||||
" Create this Website object from the given URL using Selenium and BeautifulSoup.\n",
|
||||
" Uses headless Chrome to load JavaScript content.\n",
|
||||
" \"\"\"\n",
|
||||
" self.url = url\n",
|
||||
"\n",
|
||||
" # Configure headless Chrome\n",
|
||||
" options = Options()\n",
|
||||
" options.headless = True\n",
|
||||
" options.add_argument(\"--disable-gpu\")\n",
|
||||
" options.add_argument(\"--no-sandbox\")\n",
|
||||
"\n",
|
||||
" # Start the driver\n",
|
||||
" service = Service(ChromeDriverManager().install())\n",
|
||||
" driver = webdriver.Chrome(service=service, options=options)\n",
|
||||
"\n",
|
||||
" try:\n",
|
||||
" driver.get(url)\n",
|
||||
"\n",
|
||||
" # Wait until body is loaded (you can tweak the wait condition)\n",
|
||||
" WebDriverWait(driver, wait_time).until(\n",
|
||||
" EC.presence_of_element_located((By.TAG_NAME, \"body\"))\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" html = driver.page_source\n",
|
||||
" soup = BeautifulSoup(html, \"html.parser\")\n",
|
||||
"\n",
|
||||
" self.title = soup.title.string.strip() if soup.title else \"No title found\"\n",
|
||||
"\n",
|
||||
" # Remove unwanted tags\n",
|
||||
" for irrelevant in soup.body([\"script\", \"style\", \"img\", \"input\"]):\n",
|
||||
" irrelevant.decompose()\n",
|
||||
"\n",
|
||||
" self.text = soup.body.get_text(separator=\"\\n\", strip=True)\n",
|
||||
"\n",
|
||||
" finally:\n",
|
||||
" driver.quit()\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "9d833fbb-0115-4d99-a4e9-464f27900eab",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"class DotaWebsite:\n",
|
||||
" def __init__(self, hero):\n",
|
||||
" web = Website(\"https://www.dotabuff.com/heroes\" + \"/\" + hero)\n",
|
||||
" self.title = web.title\n",
|
||||
" self.text = web.text"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "a0a42c2b-c837-4d1b-b8f8-b2dbb8592a1a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"system_prompt = \"You are an game assistant that analyzes the contents of a website \\\n",
|
||||
"and provides a short summary about facet selection, ability building, item building, best versus and worst versus, ignoring text that might be navigation related. \\\n",
|
||||
"Respond in markdown.\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "7c05843d-6373-4a76-8cca-9c716a6ca13a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# A function that writes a User Prompt that asks for summaries of websites:\n",
|
||||
"\n",
|
||||
"def user_prompt_for(website):\n",
|
||||
" user_prompt = f\"You are looking at a website titled {website.title}\"\n",
|
||||
" user_prompt += \"\\nThe contents of this website is as follows; \\\n",
|
||||
"please provide a short summary of provides a short summary about facet selection, ability building, item building, best versus and worst versus in markdown. \\\n",
|
||||
"If it includes news or announcements, then summarize these too.\\n\\n\"\n",
|
||||
" user_prompt += website.text\n",
|
||||
" return user_prompt"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "0145eee1-39e2-4f00-89ec-7acc6e375972",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# See how this function creates exactly the format above\n",
|
||||
"\n",
|
||||
"def messages_for(website):\n",
|
||||
" return [\n",
|
||||
" {\"role\": \"system\", \"content\": system_prompt},\n",
|
||||
" {\"role\": \"user\", \"content\": user_prompt_for(website)}\n",
|
||||
" ]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "76f389c0-572a-476b-9b4e-719c0ef10abb",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# And now: call the OpenAI API. You will get very familiar with this!\n",
|
||||
"\n",
|
||||
"def summarize(hero):\n",
|
||||
" website = DotaWebsite(hero)\n",
|
||||
" response = openai.chat.completions.create(\n",
|
||||
" model = \"gpt-4o-mini\",\n",
|
||||
" messages = messages_for(website)\n",
|
||||
" )\n",
|
||||
" return response.choices[0].message.content"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "fcb046b7-52a9-49ff-b7bc-d8f6c279df4c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# A function to display this nicely in the Jupyter output, using markdown\n",
|
||||
"\n",
|
||||
"def display_summary(hero):\n",
|
||||
" summary = summarize(hero)\n",
|
||||
" display(Markdown(summary))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "9befb685-2912-41a9-b2d9-ae33001494c0",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"display_summary(\"axe\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "bf1bb1d9-0351-44fc-8ebf-91aa47a81b42",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.13"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
459
week1/community-contributions/day2-EXERCISE-ollama-local.ipynb
Normal file
459
week1/community-contributions/day2-EXERCISE-ollama-local.ipynb
Normal file
@@ -0,0 +1,459 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d15d8294-3328-4e07-ad16-8a03e9bbfdb9",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Welcome to your first assignment!\n",
|
||||
"\n",
|
||||
"Instructions are below. Please give this a try, and look in the solutions folder if you get stuck (or feel free to ask me!)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ada885d9-4d42-4d9b-97f0-74fbbbfe93a9",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<table style=\"margin: 0; text-align: left;\">\n",
|
||||
" <tr>\n",
|
||||
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
||||
" <img src=\"../resources.jpg\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
||||
" </td>\n",
|
||||
" <td>\n",
|
||||
" <h2 style=\"color:#f71;\">Just before we get to the assignment --</h2>\n",
|
||||
" <span style=\"color:#f71;\">I thought I'd take a second to point you at this page of useful resources for the course. This includes links to all the slides.<br/>\n",
|
||||
" <a href=\"https://edwarddonner.com/2024/11/13/llm-engineering-resources/\">https://edwarddonner.com/2024/11/13/llm-engineering-resources/</a><br/>\n",
|
||||
" Please keep this bookmarked, and I'll continue to add more useful links there over time.\n",
|
||||
" </span>\n",
|
||||
" </td>\n",
|
||||
" </tr>\n",
|
||||
"</table>"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "6e9fa1fc-eac5-4d1d-9be4-541b3f2b3458",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# HOMEWORK EXERCISE ASSIGNMENT\n",
|
||||
"\n",
|
||||
"Upgrade the day 1 project to summarize a webpage to use an Open Source model running locally via Ollama rather than OpenAI\n",
|
||||
"\n",
|
||||
"You'll be able to use this technique for all subsequent projects if you'd prefer not to use paid APIs.\n",
|
||||
"\n",
|
||||
"**Benefits:**\n",
|
||||
"1. No API charges - open-source\n",
|
||||
"2. Data doesn't leave your box\n",
|
||||
"\n",
|
||||
"**Disadvantages:**\n",
|
||||
"1. Significantly less power than Frontier Model\n",
|
||||
"\n",
|
||||
"## Recap on installation of Ollama\n",
|
||||
"\n",
|
||||
"Simply visit [ollama.com](https://ollama.com) and install!\n",
|
||||
"\n",
|
||||
"Once complete, the ollama server should already be running locally. \n",
|
||||
"If you visit: \n",
|
||||
"[http://localhost:11434/](http://localhost:11434/)\n",
|
||||
"\n",
|
||||
"You should see the message `Ollama is running`. \n",
|
||||
"\n",
|
||||
"If not, bring up a new Terminal (Mac) or Powershell (Windows) and enter `ollama serve` \n",
|
||||
"And in another Terminal (Mac) or Powershell (Windows), enter `ollama pull llama3.2` \n",
|
||||
"Then try [http://localhost:11434/](http://localhost:11434/) again.\n",
|
||||
"\n",
|
||||
"If Ollama is slow on your machine, try using `llama3.2:1b` as an alternative. Run `ollama pull llama3.2:1b` from a Terminal or Powershell, and change the code below from `MODEL = \"llama3.2\"` to `MODEL = \"llama3.2:1b\"`"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "4e2a9393-7767-488e-a8bf-27c12dca35bd",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# imports\n",
|
||||
"\n",
|
||||
"import requests\n",
|
||||
"from bs4 import BeautifulSoup\n",
|
||||
"from IPython.display import Markdown, display"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "29ddd15d-a3c5-4f4e-a678-873f56162724",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Constants\n",
|
||||
"\n",
|
||||
"OLLAMA_API = \"http://localhost:11434/api/chat\"\n",
|
||||
"HEADERS = {\"Content-Type\": \"application/json\"}\n",
|
||||
"MODEL = \"llama3.2\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "dac0a679-599c-441f-9bf2-ddc73d35b940",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Create a messages list using the same format that we used for OpenAI\n",
|
||||
"\n",
|
||||
"messages = [\n",
|
||||
" {\"role\": \"user\", \"content\": \"Describe some of the business applications of Generative AI\"}\n",
|
||||
"]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "7bb9c624-14f0-4945-a719-8ddb64f66f47",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"payload = {\n",
|
||||
" \"model\": MODEL,\n",
|
||||
" \"messages\": messages,\n",
|
||||
" \"stream\": False\n",
|
||||
" }"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "479ff514-e8bd-4985-a572-2ea28bb4fa40",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Let's just make sure the model is loaded\n",
|
||||
"\n",
|
||||
"!ollama pull llama3.2"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "42b9f644-522d-4e05-a691-56e7658c0ea9",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# If this doesn't work for any reason, try the 2 versions in the following cells\n",
|
||||
"# And double check the instructions in the 'Recap on installation of Ollama' at the top of this lab\n",
|
||||
"# And if none of that works - contact me!\n",
|
||||
"\n",
|
||||
"response = requests.post(OLLAMA_API, json=payload, headers=HEADERS)\n",
|
||||
"print(response.json()['message']['content'])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "6a021f13-d6a1-4b96-8e18-4eae49d876fe",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Introducing the ollama package\n",
|
||||
"\n",
|
||||
"And now we'll do the same thing, but using the elegant ollama python package instead of a direct HTTP call.\n",
|
||||
"\n",
|
||||
"Under the hood, it's making the same call as above to the ollama server running at localhost:11434"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "7745b9c4-57dc-4867-9180-61fa5db55eb8",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import ollama\n",
|
||||
"\n",
|
||||
"response = ollama.chat(model=MODEL, messages=messages)\n",
|
||||
"print(response['message']['content'])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "a4704e10-f5fb-4c15-a935-f046c06fb13d",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Alternative approach - using OpenAI python library to connect to Ollama"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "23057e00-b6fc-4678-93a9-6b31cb704bff",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# There's actually an alternative approach that some people might prefer\n",
|
||||
"# You can use the OpenAI client python library to call Ollama:\n",
|
||||
"\n",
|
||||
"from openai import OpenAI\n",
|
||||
"ollama_via_openai = OpenAI(base_url='http://localhost:11434/v1', api_key='ollama')\n",
|
||||
"\n",
|
||||
"response = ollama_via_openai.chat.completions.create(\n",
|
||||
" model=MODEL,\n",
|
||||
" messages=messages\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"print(response.choices[0].message.content)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "9f9e22da-b891-41f6-9ac9-bd0c0a5f4f44",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Are you confused about why that works?\n",
|
||||
"\n",
|
||||
"It seems strange, right? We just used OpenAI code to call Ollama?? What's going on?!\n",
|
||||
"\n",
|
||||
"Here's the scoop:\n",
|
||||
"\n",
|
||||
"The python class `OpenAI` is simply code written by OpenAI engineers that makes calls over the internet to an endpoint. \n",
|
||||
"\n",
|
||||
"When you call `openai.chat.completions.create()`, this python code just makes a web request to the following url: \"https://api.openai.com/v1/chat/completions\"\n",
|
||||
"\n",
|
||||
"Code like this is known as a \"client library\" - it's just wrapper code that runs on your machine to make web requests. The actual power of GPT is running on OpenAI's cloud behind this API, not on your computer!\n",
|
||||
"\n",
|
||||
"OpenAI was so popular, that lots of other AI providers provided identical web endpoints, so you could use the same approach.\n",
|
||||
"\n",
|
||||
"So Ollama has an endpoint running on your local box at http://localhost:11434/v1/chat/completions \n",
|
||||
"And in week 2 we'll discover that lots of other providers do this too, including Gemini and DeepSeek.\n",
|
||||
"\n",
|
||||
"And then the team at OpenAI had a great idea: they can extend their client library so you can specify a different 'base url', and use their library to call any compatible API.\n",
|
||||
"\n",
|
||||
"That's it!\n",
|
||||
"\n",
|
||||
"So when you say: `ollama_via_openai = OpenAI(base_url='http://localhost:11434/v1', api_key='ollama')` \n",
|
||||
"Then this will make the same endpoint calls, but to Ollama instead of OpenAI."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "bc7d1de3-e2ac-46ff-a302-3b4ba38c4c90",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Also trying the amazing reasoning model DeepSeek\n",
|
||||
"\n",
|
||||
"Here we use the version of DeepSeek-reasoner that's been distilled to 1.5B. \n",
|
||||
"This is actually a 1.5B variant of Qwen that has been fine-tuned using synethic data generated by Deepseek R1.\n",
|
||||
"\n",
|
||||
"Other sizes of DeepSeek are [here](https://ollama.com/library/deepseek-r1) all the way up to the full 671B parameter version, which would use up 404GB of your drive and is far too large for most!"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "cf9eb44e-fe5b-47aa-b719-0bb63669ab3d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!ollama pull deepseek-r1:1.5b"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "1d3d554b-e00d-4c08-9300-45e073950a76",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# This may take a few minutes to run! You should then see a fascinating \"thinking\" trace inside <think> tags, followed by some decent definitions\n",
|
||||
"\n",
|
||||
"response = ollama_via_openai.chat.completions.create(\n",
|
||||
" model=\"deepseek-r1:1.5b\",\n",
|
||||
" messages=[{\"role\": \"user\", \"content\": \"Please give definitions of some core concepts behind LLMs: a neural network, attention and the transformer\"}]\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"print(response.choices[0].message.content)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1622d9bb-5c68-4d4e-9ca4-b492c751f898",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# NOW the exercise for you\n",
|
||||
"\n",
|
||||
"Take the code from day1 and incorporate it here, to build a website summarizer that uses Llama 3.2 running locally instead of OpenAI; use either of the above approaches."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "6de38216-6d1c-48c4-877b-86d403f4e0f8",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# imports\n",
|
||||
"\n",
|
||||
"import os\n",
|
||||
"import requests\n",
|
||||
"from bs4 import BeautifulSoup\n",
|
||||
"from IPython.display import Markdown, display"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "0bd2aea1-d7d7-499f-b704-5b13e2ddd23f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"MODEL = \"llama3.2\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "6df3141a-0a46-4ff9-ae73-bf8bee2aa3d8",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# A class to represent a Webpage\n",
|
||||
"\n",
|
||||
"class Website:\n",
|
||||
" \"\"\"\n",
|
||||
" A utility class to represent a Website that we have scraped\n",
|
||||
" \"\"\"\n",
|
||||
" url: str\n",
|
||||
" title: str\n",
|
||||
" text: str\n",
|
||||
"\n",
|
||||
" def __init__(self, url):\n",
|
||||
" \"\"\"\n",
|
||||
" Create this Website object from the given url using the BeautifulSoup library\n",
|
||||
" \"\"\"\n",
|
||||
" self.url = url\n",
|
||||
" response = requests.get(url)\n",
|
||||
" soup = BeautifulSoup(response.content, 'html.parser')\n",
|
||||
" self.title = soup.title.string if soup.title else \"No title found\"\n",
|
||||
" for irrelevant in soup.body([\"script\", \"style\", \"img\", \"input\"]):\n",
|
||||
" irrelevant.decompose()\n",
|
||||
" self.text = soup.body.get_text(separator=\"\\n\", strip=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "df2ea48b-7343-47be-bdcb-52b63a4de43e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Define our system prompt - you can experiment with this later, changing the last sentence to 'Respond in markdown in Spanish.\"\n",
|
||||
"\n",
|
||||
"system_prompt = \"You are an assistant that analyzes the contents of a website \\\n",
|
||||
"and provides a short summary, ignoring text that might be navigation related. \\\n",
|
||||
"Respond in markdown.\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "80f1a534-ae2a-4283-83cf-5e7c5765c736",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# A function that writes a User Prompt that asks for summaries of websites:\n",
|
||||
"\n",
|
||||
"def user_prompt_for(website):\n",
|
||||
" user_prompt = f\"You are looking at a website titled {website.title}\"\n",
|
||||
" user_prompt += \"The contents of this website is as follows; \\\n",
|
||||
"please provide a short summary of this website in markdown. \\\n",
|
||||
"If it includes news or announcements, then summarize these too.\\n\\n\"\n",
|
||||
" user_prompt += website.text\n",
|
||||
" return user_prompt"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "5dfe658d-e3f9-4b32-90e6-1a523f47f836",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# See how this function creates exactly the format above\n",
|
||||
"\n",
|
||||
"def messages_for(website):\n",
|
||||
" return [\n",
|
||||
" {\"role\": \"system\", \"content\": system_prompt},\n",
|
||||
" {\"role\": \"user\", \"content\": user_prompt_for(website)}\n",
|
||||
" ]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "2e2a09d0-bc47-490e-b085-fe3ccfbd16ad",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# And now: call the Ollama function instead of OpenAI\n",
|
||||
"\n",
|
||||
"def summarize(url):\n",
|
||||
" website = Website(url)\n",
|
||||
" messages = messages_for(website)\n",
|
||||
" response = ollama.chat(model=MODEL, messages=messages)\n",
|
||||
" return response['message']['content']"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "340e08a2-86f0-4cdd-9188-da2972cae7a6",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# A function to display this nicely in the Jupyter output, using markdown\n",
|
||||
"\n",
|
||||
"def display_summary(url):\n",
|
||||
" summary = summarize(url)\n",
|
||||
" display(Markdown(summary))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "55e4790a-013c-40cf-9dff-bb5ec1d53964",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"display_summary(\"https://zhufqiu.com\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "8a96cbad-1306-4ce1-a942-2448f50d6751",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.13"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -0,0 +1,202 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "fe12c203-e6a6-452c-a655-afb8a03a4ff5",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# End of week 1 exercise\n",
|
||||
"\n",
|
||||
"To demonstrate your familiarity with OpenAI API, and also Ollama, build a tool that takes a technical question, \n",
|
||||
"and responds with an explanation. This is a tool that you will be able to use yourself during the course!"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "c1070317-3ed9-4659-abe3-828943230e03",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# imports\n",
|
||||
"\n",
|
||||
"import os\n",
|
||||
"from dotenv import load_dotenv\n",
|
||||
"from IPython.display import Markdown, display, update_display\n",
|
||||
"from openai import OpenAI\n",
|
||||
"import ollama"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "4a456906-915a-4bfd-bb9d-57e505c5093f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# constants\n",
|
||||
"\n",
|
||||
"MODEL_GPT = 'gpt-4o-mini'\n",
|
||||
"MODEL_LLAMA = 'llama3.2'"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "a8d7923c-5f28-4c30-8556-342d7c8497c1",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# set up environment\n",
|
||||
"\n",
|
||||
"load_dotenv(override=True)\n",
|
||||
"api_key = os.getenv('OPENAI_API_KEY')\n",
|
||||
"\n",
|
||||
"# Check the key\n",
|
||||
"\n",
|
||||
"if not api_key:\n",
|
||||
" print(\"No API key was found - please head over to the troubleshooting notebook in this folder to identify & fix!\")\n",
|
||||
"elif not api_key.startswith(\"sk-proj-\"):\n",
|
||||
" print(\"An API key was found, but it doesn't start sk-proj-; please check you're using the right key - see troubleshooting notebook\")\n",
|
||||
"elif api_key.strip() != api_key:\n",
|
||||
" print(\"An API key was found, but it looks like it might have space or tab characters at the start or end - please remove them - see troubleshooting notebook\")\n",
|
||||
"else:\n",
|
||||
" print(\"API key found and looks good so far!\")\n",
|
||||
"\n",
|
||||
"openai = OpenAI()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "3f0d0137-52b0-47a8-81a8-11a90a010798",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# here is the question; type over this to ask something new\n",
|
||||
"\n",
|
||||
"question = \"\"\"\n",
|
||||
"Please explain what this code does and why:\n",
|
||||
"yield from {book.get(\"author\") for book in books if book.get(\"author\")}\n",
|
||||
"\"\"\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "1f879b7e-5ecc-4ec6-b269-78b6e2ed3480",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# prompts\n",
|
||||
"\n",
|
||||
"system_prompt = \"You are a helpful tutor who answers technical questions about programming code(especially python code), software engineering, data science and LLMs\"\n",
|
||||
"user_prompt = \"Please give a detailed explanation to the following question: \" + question"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "4ac74ae5-af61-4a5d-b991-554fa67cd3d1",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"messages = [\n",
|
||||
" {\"role\": \"system\", \"content\": system_prompt},\n",
|
||||
" {\"role\": \"user\", \"content\": user_prompt}\n",
|
||||
" ]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "60ce7000-a4a5-4cce-a261-e75ef45063b4",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Get gpt-4o-mini to answer, with streaming\n",
|
||||
"stream = openai.chat.completions.create(\n",
|
||||
" model=MODEL_GPT,\n",
|
||||
" messages=messages,\n",
|
||||
" stream=True\n",
|
||||
" )\n",
|
||||
" \n",
|
||||
"response = \"\"\n",
|
||||
"display_handle = display(Markdown(\"\"), display_id=True)\n",
|
||||
"for chunk in stream:\n",
|
||||
" response += chunk.choices[0].delta.content or ''\n",
|
||||
" response = response.replace(\"```\",\"\").replace(\"markdown\", \"\")\n",
|
||||
" update_display(Markdown(response), display_id=display_handle.display_id)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "8f7c8ea8-4082-4ad0-8751-3301adcf6538",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Get Llama 3.2 to answer\n",
|
||||
"\n",
|
||||
"OLLAMA_API = \"http://localhost:11434/api/chat\"\n",
|
||||
"HEADERS = {\"Content-Type\": \"application/json\"}"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "4bd10d96-ee72-4c86-acd8-4fa417c25960",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!ollama pull llama3.2"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "d889d514-0478-4d7f-aabf-9a7bc743adb1",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"stream = ollama.chat(model=MODEL_LLAMA, messages=messages, stream=True)\n",
|
||||
"\n",
|
||||
"response = \"\"\n",
|
||||
"display_handle = display(Markdown(\"\"), display_id=True)\n",
|
||||
"for chunk in stream:\n",
|
||||
" response += chunk.get(\"message\", {}).get(\"content\", \"\")\n",
|
||||
" response = response.replace(\"```\",\"\").replace(\"markdown\", \"\")\n",
|
||||
" update_display(Markdown(response), display_id=display_handle.display_id)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "452d442a-f3b0-42ad-89d2-a8dc664e8bb6",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.13"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
Reference in New Issue
Block a user