From 9da9692a9b7b8db12a66913e4355a20fe18c565a Mon Sep 17 00:00:00 2001 From: Elijah Rwothoromo Date: Tue, 5 Aug 2025 20:42:50 +0300 Subject: [PATCH 01/14] Week 1 exercise --- week1/Guide to Jupyter.ipynb | 155 +++++++++++--- .../01_webpage_summarizer.ipynb | 80 ++++++-- week1/day1.ipynb | 94 ++++++++- week1/day2 EXERCISE.ipynb | 4 +- week1/day5.ipynb | 71 ++++++- week1/troubleshooting.ipynb | 189 ++++++++++++++++-- week1/week1 EXERCISE.ipynb | 135 ++++++++++++- 7 files changed, 649 insertions(+), 79 deletions(-) diff --git a/week1/Guide to Jupyter.ipynb b/week1/Guide to Jupyter.ipynb index ebcc9f0..0de6a9f 100644 --- a/week1/Guide to Jupyter.ipynb +++ b/week1/Guide to Jupyter.ipynb @@ -32,10 +32,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "33d37cd8-55c9-4e03-868c-34aa9cab2c80", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "4" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Click anywhere in this cell and press Shift + Return\n", "\n", @@ -54,7 +65,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "id": "585eb9c1-85ee-4c27-8dc2-b4d8d022eda0", "metadata": {}, "outputs": [], @@ -66,10 +77,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "id": "07792faa-761d-46cb-b9b7-2bbf70bb1628", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "'bananas'" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# The result of the last statement is shown after you run it\n", "\n", @@ -78,10 +100,18 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "id": "a067d2b1-53d5-4aeb-8a3c-574d39ff654a", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "My favorite fruit is bananas\n" + ] + } + ], "source": [ "# Use the variable\n", "\n", @@ -90,7 +120,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "id": "4c5a4e60-b7f4-4953-9e80-6d84ba4664ad", "metadata": {}, "outputs": [], @@ -116,10 +146,18 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "id": "8e5ec81d-7c5b-4025-bd2e-468d67b581b6", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "My favorite fruit is anything but bananas\n" + ] + } + ], "source": [ "# Then run this cell twice, and see if you understand what's going on\n", "\n", @@ -144,10 +182,18 @@ }, { "cell_type": "code", - "execution_count": null, - "id": "84b1e410-5eda-4e2c-97ce-4eebcff816c5", + "execution_count": 7, + "id": "ce258424-40c3-49a7-9462-e6fa25014b03", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "My favorite fruit is apples\n" + ] + } + ], "source": [ "print(f\"My favorite fruit is {favorite_fruit}\")" ] @@ -165,10 +211,12 @@ { "cell_type": "code", "execution_count": null, - "id": "ce258424-40c3-49a7-9462-e6fa25014b03", + "id": "84b1e410-5eda-4e2c-97ce-4eebcff816c5", "metadata": {}, "outputs": [], - "source": [] + "source": [ + "print(f\"My favorite fruit is {favorite_fruit} - ok\")" + ] }, { "cell_type": "markdown", @@ -221,10 +269,25 @@ "Click in the cell and press the Bin icon if you want to remove it." ] }, + { + "cell_type": "markdown", + "id": "b3b2d1ff-5d2c-47a9-9c1b-90a0cfb89dd9", + "metadata": {}, + "source": [ + "# This is a heading\n", + "## This is a sub-head\n", + "### And a sub-sub-head\n", + "\n", + "I like Jupyter Lab because it's\n", + "- Easy\n", + "- Flexible\n", + "- Satisfying" + ] + }, { "cell_type": "code", "execution_count": null, - "id": "e1586320-c90f-4f22-8b39-df6865484950", + "id": "a365b651-3a34-40ed-8def-df1e6a484b43", "metadata": {}, "outputs": [], "source": [] @@ -245,10 +308,21 @@ }, { "cell_type": "code", - "execution_count": null, - "id": "82042fc5-a907-4381-a4b8-eb9386df19cd", + "execution_count": 1, + "id": "b1b303d9-ce47-4cee-85e9-6416abca7d21", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Guide to Jupyter.ipynb day2 EXERCISE.ipynb troubleshooting.ipynb\n", + "Intermediate Python.ipynb day5.ipynb week1 EXERCISE.ipynb\n", + "\u001b[34mcommunity-contributions\u001b[m\u001b[m diagnostics.py\n", + "day1.ipynb \u001b[34msolutions\u001b[m\u001b[m\n" + ] + } + ], "source": [ "# list the current directory\n", "\n", @@ -258,13 +332,13 @@ { "cell_type": "code", "execution_count": null, - "id": "4fc3e3da-8a55-40cc-9706-48bf12a0e20e", + "id": "18685382-3768-4e00-817b-cc69dd1fb531", "metadata": {}, "outputs": [], "source": [ - "# ping cnn.com - press the stop button in the toolbar when you're bored\n", + "# ping a website\n", "\n", - "!ping cnn.com" + "!ping google.com" ] }, { @@ -295,7 +369,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "id": "2646a4e5-3c23-4aee-a34d-d623815187d2", "metadata": {}, "outputs": [], @@ -313,10 +387,18 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "6e96be3d-fa82-42a3-a8aa-b81dd20563a5", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|███████████████████████████████████████| 1000/1000 [00:11<00:00, 86.00it/s]\n" + ] + } + ], "source": [ "# And now, with a nice little progress bar:\n", "\n", @@ -331,10 +413,27 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "id": "63c788dd-4618-4bb4-a5ce-204411a38ade", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/markdown": [ + "# This is a big heading!\n", + "\n", + "- And this is a bullet-point\n", + "- So is this\n", + "- Me, too!" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "# On a different topic, here's a useful way to print output in markdown\n", "\n", @@ -372,7 +471,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.11" + "version": "3.13.5" } }, "nbformat": 4, diff --git a/week1/community-contributions/01_webpage_summarizer.ipynb b/week1/community-contributions/01_webpage_summarizer.ipynb index f8be204..8126396 100644 --- a/week1/community-contributions/01_webpage_summarizer.ipynb +++ b/week1/community-contributions/01_webpage_summarizer.ipynb @@ -42,17 +42,65 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "ebf2fa36", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Collecting selenium\n", + " Downloading selenium-4.34.2-py3-none-any.whl.metadata (7.5 kB)\n", + "Collecting webdriver-manager\n", + " Downloading webdriver_manager-4.0.2-py2.py3-none-any.whl.metadata (12 kB)\n", + "Requirement already satisfied: urllib3~=2.5.0 in /Users/elijahsmac/Desktop/code/llm/llm_engineering/llms/lib/python3.11/site-packages (from urllib3[socks]~=2.5.0->selenium) (2.5.0)\n", + "Collecting trio~=0.30.0 (from selenium)\n", + " Downloading trio-0.30.0-py3-none-any.whl.metadata (8.5 kB)\n", + "Collecting trio-websocket~=0.12.2 (from selenium)\n", + " Downloading trio_websocket-0.12.2-py3-none-any.whl.metadata (5.1 kB)\n", + "Requirement already satisfied: certifi>=2025.6.15 in /Users/elijahsmac/Desktop/code/llm/llm_engineering/llms/lib/python3.11/site-packages (from selenium) (2025.7.14)\n", + "Requirement already satisfied: typing_extensions~=4.14.0 in /Users/elijahsmac/Desktop/code/llm/llm_engineering/llms/lib/python3.11/site-packages (from selenium) (4.14.1)\n", + "Requirement already satisfied: websocket-client~=1.8.0 in /Users/elijahsmac/Desktop/code/llm/llm_engineering/llms/lib/python3.11/site-packages (from selenium) (1.8.0)\n", + "Requirement already satisfied: attrs>=23.2.0 in /Users/elijahsmac/Desktop/code/llm/llm_engineering/llms/lib/python3.11/site-packages (from trio~=0.30.0->selenium) (25.3.0)\n", + "Collecting sortedcontainers (from trio~=0.30.0->selenium)\n", + " Downloading sortedcontainers-2.4.0-py2.py3-none-any.whl.metadata (10 kB)\n", + "Requirement already satisfied: idna in /Users/elijahsmac/Desktop/code/llm/llm_engineering/llms/lib/python3.11/site-packages (from trio~=0.30.0->selenium) (3.10)\n", + "Collecting outcome (from trio~=0.30.0->selenium)\n", + " Downloading outcome-1.3.0.post0-py2.py3-none-any.whl.metadata (2.6 kB)\n", + "Requirement already satisfied: sniffio>=1.3.0 in /Users/elijahsmac/Desktop/code/llm/llm_engineering/llms/lib/python3.11/site-packages (from trio~=0.30.0->selenium) (1.3.1)\n", + "Collecting wsproto>=0.14 (from trio-websocket~=0.12.2->selenium)\n", + " Downloading wsproto-1.2.0-py3-none-any.whl.metadata (5.6 kB)\n", + "Collecting pysocks!=1.5.7,<2.0,>=1.5.6 (from urllib3[socks]~=2.5.0->selenium)\n", + " Downloading PySocks-1.7.1-py3-none-any.whl.metadata (13 kB)\n", + "Requirement already satisfied: requests in /Users/elijahsmac/Desktop/code/llm/llm_engineering/llms/lib/python3.11/site-packages (from webdriver-manager) (2.32.4)\n", + "Requirement already satisfied: python-dotenv in /Users/elijahsmac/Desktop/code/llm/llm_engineering/llms/lib/python3.11/site-packages (from webdriver-manager) (1.1.1)\n", + "Requirement already satisfied: packaging in /Users/elijahsmac/Desktop/code/llm/llm_engineering/llms/lib/python3.11/site-packages (from webdriver-manager) (25.0)\n", + "Requirement already satisfied: h11<1,>=0.9.0 in /Users/elijahsmac/Desktop/code/llm/llm_engineering/llms/lib/python3.11/site-packages (from wsproto>=0.14->trio-websocket~=0.12.2->selenium) (0.16.0)\n", + "Requirement already satisfied: charset_normalizer<4,>=2 in /Users/elijahsmac/Desktop/code/llm/llm_engineering/llms/lib/python3.11/site-packages (from requests->webdriver-manager) (3.4.2)\n", + "Downloading selenium-4.34.2-py3-none-any.whl (9.4 MB)\n", + "\u001b[2K \u001b[38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m9.4/9.4 MB\u001b[0m \u001b[31m7.0 MB/s\u001b[0m \u001b[33m0:00:01\u001b[0mm \u001b[31m7.2 MB/s\u001b[0m eta \u001b[36m0:00:01\u001b[0m\n", + "\u001b[?25hDownloading trio-0.30.0-py3-none-any.whl (499 kB)\n", + "Downloading trio_websocket-0.12.2-py3-none-any.whl (21 kB)\n", + "Downloading PySocks-1.7.1-py3-none-any.whl (16 kB)\n", + "Downloading webdriver_manager-4.0.2-py2.py3-none-any.whl (27 kB)\n", + "Downloading outcome-1.3.0.post0-py2.py3-none-any.whl (10 kB)\n", + "Downloading wsproto-1.2.0-py3-none-any.whl (24 kB)\n", + "Downloading sortedcontainers-2.4.0-py2.py3-none-any.whl (29 kB)\n", + "Installing collected packages: sortedcontainers, wsproto, pysocks, outcome, webdriver-manager, trio, trio-websocket, selenium\n", + "\u001b[2K \u001b[38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m8/8\u001b[0m [selenium]━━\u001b[0m \u001b[32m7/8\u001b[0m [selenium]-manager]\n", + "\u001b[1A\u001b[2KSuccessfully installed outcome-1.3.0.post0 pysocks-1.7.1 selenium-4.34.2 sortedcontainers-2.4.0 trio-0.30.0 trio-websocket-0.12.2 webdriver-manager-4.0.2 wsproto-1.2.0\n", + "Note: you may need to restart the kernel to use updated packages.\n" + ] + } + ], "source": [ "%pip install selenium webdriver-manager" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "id": "1dcf1d9d-c540-4900-b14e-ad36a28fc822", "metadata": {}, "outputs": [], @@ -92,10 +140,18 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "id": "8598c299-05ca-492e-b085-6bcc2f7dda0d", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "✅ API key loaded successfully!\n" + ] + } + ], "source": [ "load_dotenv(override=True)\n", "api_key = os.getenv('OPENAI_API_KEY')\n", @@ -109,7 +165,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "id": "8098defb", "metadata": {}, "outputs": [], @@ -128,7 +184,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "id": "c6fe5114", "metadata": {}, "outputs": [], @@ -233,7 +289,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "id": "02e3a673-a8a1-4101-a441-3816f7ab9e4d", "metadata": {}, "outputs": [], @@ -245,7 +301,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "id": "86bb80f9-9e7c-4825-985f-9b83fe50839f", "metadata": {}, "outputs": [], @@ -259,7 +315,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "id": "89998b18-77aa-4aaf-a137-f0d078d61f75", "metadata": {}, "outputs": [], @@ -335,7 +391,7 @@ ], "metadata": { "kernelspec": { - "display_name": ".venv", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -349,7 +405,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.9" + "version": "3.11.7" } }, "nbformat": 4, diff --git a/week1/day1.ipynb b/week1/day1.ipynb index f492110..414c638 100644 --- a/week1/day1.ipynb +++ b/week1/day1.ipynb @@ -497,6 +497,26 @@ "display_summary(\"https://anthropic.com\")" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "5a904323-acd9-4c8e-9a17-70df76184590", + "metadata": {}, + "outputs": [], + "source": [ + "display_summary(\"https://rwothoromo.wordpress.com/\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a66c9fe8-c26a-49dd-9bc4-9efffc638f95", + "metadata": {}, + "outputs": [], + "source": [ + "display_summary(\"https://openai.com\")" + ] + }, { "cell_type": "markdown", "id": "c951be1a-7f1b-448f-af1f-845978e47e2c", @@ -538,23 +558,55 @@ "source": [ "# Step 1: Create your prompts\n", "\n", - "system_prompt = \"something here\"\n", + "system_prompt = \"You are a professional assistant\"\n", "user_prompt = \"\"\"\n", - " Lots of text\n", - " Can be pasted here\n", + "Review this conversation and provide a comprehensive summary. Also, suggest how much better the converation could have gone:\n", + "\n", + "Dear Dev Contact,\n", + "\n", + "I hope this message finds you well.\n", + "I would like to share that I have proficiency in front-end design tools, particularly Figma, react and Angular. At this stage, I am keenly interested in finding opportunities to apply these skills professionally.\n", + "\n", + "If you are aware of any companies, projects, or platforms seeking enterprise in front-end design, I would be grateful for any advice or recommendations you might kindly provide.\n", + "\n", + "Thank you very much for your time and consideration.\n", + "\n", + "Hello Job Seeker,\n", + "\n", + "I hope you are doing well.\n", + "\n", + "Dev Contact: The last role (3 months gig) I saw was looking for a junior PHP Developer. Does your CV include that?\n", + "\n", + "Hello Dev Contact \n", + "Thank you for your feedback.\n", + "Yes my CV has PHP as one of my skill set. Can I share it with you?\n", + "\n", + "Dev Contact: They said \"It's late. Interviews were on Monday\"\n", + "\n", + "Hello Dev Contact\n", + "\n", + "Thanks for the update. When you hear of any opportunity please let me know.\n", + "\n", + "Dev Contact: For now, check out https://refactory.academy/courses/refactory-apprenticeship/\n", "\"\"\"\n", "\n", "# Step 2: Make the messages list\n", "\n", - "messages = [] # fill this in\n", + "messages = [\n", + " {\"role\": \"system\", \"content\": system_prompt},\n", + " {\"role\": \"user\", \"content\": user_prompt},\n", + "] # fill this in\n", "\n", "# Step 3: Call OpenAI\n", "\n", - "response =\n", + "response = openai.chat.completions.create(\n", + " model = \"gpt-4o-mini\",\n", + " messages = messages\n", + ")\n", "\n", "# Step 4: print the result\n", "\n", - "print(" + "print(response.choices[0].message.content)" ] }, { @@ -588,6 +640,34 @@ "id": "f4484fcf-8b39-4c3f-9674-37970ed71988", "metadata": {}, "outputs": [], + "source": [ + "# To perform summaries using a model running locally\n", + "import ollama\n", + "\n", + "# OLLAMA_API = \"http://localhost:11434/api/chat\"\n", + "# HEADERS = {\"Content-Type\": \"application/json\"}\n", + "MODEL = \"llama3.2\"\n", + "\n", + "\n", + "def summarize_with_local_model(url):\n", + " website = Website(url)\n", + " messages = messages_for(website)\n", + " response = ollama.chat(\n", + " model=MODEL,\n", + " messages=messages,\n", + " stream=False # just get the results, don't stream them\n", + " )\n", + " return response['message']['content']\n", + "\n", + "display(Markdown(summarize_with_local_model(\"https://rwothoromo.wordpress.com/\")))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e76cbf31-2a82-40b8-b2e7-e2ceae7483ed", + "metadata": {}, + "outputs": [], "source": [] } ], @@ -607,7 +687,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.12" + "version": "3.11.7" } }, "nbformat": 4, diff --git a/week1/day2 EXERCISE.ipynb b/week1/day2 EXERCISE.ipynb index 89a383f..cde9d4a 100644 --- a/week1/day2 EXERCISE.ipynb +++ b/week1/day2 EXERCISE.ipynb @@ -118,7 +118,7 @@ "payload = {\n", " \"model\": MODEL,\n", " \"messages\": messages,\n", - " \"stream\": False\n", + " \"stream\": False # just get the results, don't stream them\n", " }" ] }, @@ -308,7 +308,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.12" + "version": "3.11.7" } }, "nbformat": 4, diff --git a/week1/day5.ipynb b/week1/day5.ipynb index 5249ce8..39142ef 100644 --- a/week1/day5.ipynb +++ b/week1/day5.ipynb @@ -144,6 +144,15 @@ " {\"type\": \"careers page\", \"url\": \"https://another.full.url/careers\"}\n", " ]\n", "}\n", + "\"\"\"\n", + "link_system_prompt += \"And this example:\"\n", + "link_system_prompt += \"\"\"\n", + "{\n", + " \"links\": [\n", + " {\"type\": \"for-you page\", \"url\": \"https://full.url/goes/here/services\"},\n", + " {\"type\": \"speak-to-a-human page\", \"url\": \"https://another.full.url/contact-us\"}\n", + " ]\n", + "}\n", "\"\"\"" ] }, @@ -213,6 +222,9 @@ "source": [ "# Anthropic has made their site harder to scrape, so I'm using HuggingFace..\n", "\n", + "# anthropic = Website(\"https://anthropic.com\")\n", + "# anthropic.links\n", + "# get_links(\"https://anthropic.com\")\n", "huggingface = Website(\"https://huggingface.co\")\n", "huggingface.links" ] @@ -272,15 +284,15 @@ "metadata": {}, "outputs": [], "source": [ - "system_prompt = \"You are an assistant that analyzes the contents of several relevant pages from a company website \\\n", - "and creates a short brochure about the company for prospective customers, investors and recruits. Respond in markdown.\\\n", - "Include details of company culture, customers and careers/jobs if you have the information.\"\n", + "# system_prompt = \"You are an assistant that analyzes the contents of several relevant pages from a company website \\\n", + "# and creates a short brochure about the company for prospective customers, investors and recruits. Respond in markdown.\\\n", + "# Include details of company culture, customers and careers/jobs if you have the information.\"\n", "\n", "# Or uncomment the lines below for a more humorous brochure - this demonstrates how easy it is to incorporate 'tone':\n", "\n", - "# system_prompt = \"You are an assistant that analyzes the contents of several relevant pages from a company website \\\n", - "# and creates a short humorous, entertaining, jokey brochure about the company for prospective customers, investors and recruits. Respond in markdown.\\\n", - "# Include details of company culture, customers and careers/jobs if you have the information.\"\n" + "system_prompt = \"You are an assistant that analyzes the contents of several relevant pages from a company website \\\n", + "and creates a short humorous, entertaining, jokey brochure about the company for prospective customers, investors and recruits. Respond in markdown.\\\n", + "Include details of company culture, customers and careers/jobs if you have the information.\"\n" ] }, { @@ -293,6 +305,7 @@ "def get_brochure_user_prompt(company_name, url):\n", " user_prompt = f\"You are looking at a company called: {company_name}\\n\"\n", " user_prompt += f\"Here are the contents of its landing page and other relevant pages; use this information to build a short brochure of the company in markdown.\\n\"\n", + " user_prompt += f\"Keep the details brief or concise, factoring in that they would be printed on a simple hand-out flyer.\\n\"\n", " user_prompt += get_all_details(url)\n", " user_prompt = user_prompt[:5_000] # Truncate if more than 5,000 characters\n", " return user_prompt" @@ -324,6 +337,28 @@ " ],\n", " )\n", " result = response.choices[0].message.content\n", + " # display(Markdown(result))\n", + " # print(result)\n", + " return result" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0029e063-0c07-4712-82d9-536ec3579e80", + "metadata": {}, + "outputs": [], + "source": [ + "def translate_brochure(brochure, language):\n", + " system_prompt_for_language = \"You're an expert in \" + language + \". Translate the brochure!\"\n", + " response = openai.chat.completions.create(\n", + " model=MODEL,\n", + " messages=[\n", + " {\"role\": \"system\", \"content\": system_prompt_for_language},\n", + " {\"role\": \"user\", \"content\": brochure}\n", + " ],\n", + " )\n", + " result = response.choices[0].message.content\n", " display(Markdown(result))" ] }, @@ -337,6 +372,28 @@ "create_brochure(\"HuggingFace\", \"https://huggingface.co\")" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "f8371bf5-c4c0-4e52-9a2a-066d994b0510", + "metadata": {}, + "outputs": [], + "source": [ + "brochure = create_brochure(\"Paint and Sip Uganda\", \"https://paintandsipuganda.com/\")\n", + "# translate_brochure(brochure, \"Spanish\")\n", + "translate_brochure(brochure, \"Swahili\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "34e03db6-61d0-4fc5-bf66-4f679b9befde", + "metadata": {}, + "outputs": [], + "source": [ + "create_brochure(\"Wabeh\", \"https://wabeh.com/\")" + ] + }, { "cell_type": "markdown", "id": "61eaaab7-0b47-4b29-82d4-75d474ad8d18", @@ -501,7 +558,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.13" + "version": "3.11.7" } }, "nbformat": 4, diff --git a/week1/troubleshooting.ipynb b/week1/troubleshooting.ipynb index 23eca6f..d8cdf8f 100644 --- a/week1/troubleshooting.ipynb +++ b/week1/troubleshooting.ipynb @@ -40,10 +40,18 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "d296f9b6-8de4-44db-b5f5-9b653dfd3d81", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Connected to the internet and can reach Google\n" + ] + } + ], "source": [ "import urllib.request\n", "\n", @@ -101,10 +109,20 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "id": "7c8c0bb3-0e94-466e-8d1a-4dfbaa014cbe", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Virtualenv is active:\n", + "Environment Path: /Users/elijahsmac/Desktop/code/llm/llm_engineering/llms\n", + "Environment Name: llms\n" + ] + } + ], "source": [ "# Some quick checks that your Conda environment or VirtualEnv is as expected\n", "# The Environment Name should be: llms\n", @@ -164,10 +182,22 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "id": "6c78b7d9-1eea-412d-8751-3de20c0f6e2f", "metadata": {}, - "outputs": [], + "outputs": [ + { + "ename": "ModuleNotFoundError", + "evalue": "No module named 'openai'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[8], line 3\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m# This import should work if your environment is active and dependencies are installed!\u001b[39;00m\n\u001b[0;32m----> 3\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mopenai\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m OpenAI\n", + "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'openai'" + ] + } + ], "source": [ "# This import should work if your environment is active and dependencies are installed!\n", "\n", @@ -201,10 +231,19 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "id": "caa4837e-b970-4f89-aa9a-8aa793c754fd", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + ".env file found.\n", + "SUCCESS! OPENAI_API_KEY found and it has the right prefix\n" + ] + } + ], "source": [ "from pathlib import Path\n", "\n", @@ -254,10 +293,18 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "id": "ab9ea6ef-49ee-4899-a1c7-75a8bd9ac36b", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "There is already a .env file - if you want me to create a new one, change the variable overwrite_if_already_exists to True above\n" + ] + } + ], "source": [ "# Only run this code in this cell if you want to have a .env file created for you!\n", "\n", @@ -371,10 +418,22 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "id": "cccb58e7-6626-4033-9dc1-e7e3ff742f6b", "metadata": {}, - "outputs": [], + "outputs": [ + { + "ename": "ModuleNotFoundError", + "evalue": "No module named 'openai'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[9], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mopenai\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m OpenAI\n\u001b[1;32m 2\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mdotenv\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m load_dotenv\n\u001b[1;32m 3\u001b[0m load_dotenv(override\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n", + "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'openai'" + ] + } + ], "source": [ "from openai import OpenAI\n", "from dotenv import load_dotenv\n", @@ -463,10 +522,110 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "id": "248204f0-7bad-482a-b715-fb06a3553916", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Starting diagnostics at 2025-08-02 23:16:47\n", + "\n", + "===== System Information =====\n", + "Operating System: Darwin\n", + "MacOS Version: 10.16\n", + "Architecture: ('64bit', 'Mach-O')\n", + "Machine: x86_64\n", + "Processor: i386\n", + "Total RAM: 16.00 GB\n", + "Available RAM: 6.04 GB\n", + "Free Disk Space: 315.94 GB\n", + "\n", + "===== File System Information =====\n", + "Current Directory: /Users/elijahsmac/Desktop/code/llm/llm_engineering/week1\n", + "Write permission: OK\n", + "\n", + "Files in Current Directory:\n", + " - .ipynb_checkpoints\n", + " - Guide to Jupyter.ipynb\n", + " - Intermediate Python.ipynb\n", + " - __pycache__\n", + " - community-contributions\n", + " - day1.ipynb\n", + " - day2 EXERCISE.ipynb\n", + " - day5.ipynb\n", + " - diagnostics.py\n", + " - report.txt\n", + " - solutions\n", + " - troubleshooting.ipynb\n", + " - week1 EXERCISE.ipynb\n", + "\n", + "===== Git Repository Information =====\n", + "Git Repository Root: /Users/elijahsmac/Desktop/code/llm/llm_engineering\n", + "Current Commit: 3a042500d7db3c0e8cde89f836a60e6cd7ab9ba1\n", + "Remote Origin: git@github.com:ed-donner/llm_engineering.git\n", + "\n", + "===== Environment File Check =====\n", + ".env file exists at: /Users/elijahsmac/Desktop/code/llm/llm_engineering/.env\n", + "OPENAI_API_KEY found in .env file\n", + "\n", + "===== Anaconda Environment Check =====\n", + "No active Anaconda environment detected\n", + "\n", + "===== Virtualenv Check =====\n", + "Virtualenv is active:\n", + "Environment Path: /Users/elijahsmac/Desktop/code/llm/llm_engineering/llms\n", + "Environment Name: llms\n", + "\n", + "Python Environment:\n", + "Python Version: 3.13.5 | packaged by Anaconda, Inc. | (main, Jun 12 2025, 11:09:21) [Clang 14.0.6 ]\n", + "Python Executable: /Users/elijahsmac/anaconda3/bin/python\n", + "\n", + "Required Package Versions:\n", + "ERROR: Required package 'openai' is not installed\n", + "python-dotenv: 1.1.0\n", + "requests: 2.32.3\n", + "ERROR: Required package 'gradio' is not installed\n", + "ERROR: Required package 'transformers' is not installed\n", + "\n", + "===== Network Connectivity Check =====\n", + "SSL Version: OpenSSL 3.0.16 11 Feb 2025\n", + "ERROR: Required packages are not installed. Please install them using 'pip install requests speedtest-cli'\n", + "\n", + "===== Environment Variables Check =====\n", + "\n", + "PYTHONPATH is not set.\n", + "\n", + "Python sys.path:\n", + " - /Users/elijahsmac/anaconda3/lib/python313.zip\n", + " - /Users/elijahsmac/anaconda3/lib/python3.13\n", + " - /Users/elijahsmac/anaconda3/lib/python3.13/lib-dynload\n", + " - \n", + " - /Users/elijahsmac/anaconda3/lib/python3.13/site-packages\n", + " - /Users/elijahsmac/anaconda3/lib/python3.13/site-packages/aeosa\n", + " - /Users/elijahsmac/anaconda3/lib/python3.13/site-packages/setuptools/_vendor\n", + "OPENAI_API_KEY is set after calling load_dotenv()\n", + "\n", + "===== Additional Diagnostics =====\n", + "Temp directory is writable: /var/folders/_1/d6xg9lvd4lb714ry78xpc10w0000gn/T\n", + "\n", + "===== Errors Found =====\n", + "The following critical issues were detected. Please address them before proceeding:\n", + "- Required package 'openai' is not installed\n", + "- Required package 'gradio' is not installed\n", + "- Required package 'transformers' is not installed\n", + "- Required packages are not installed. Please install them using 'pip install requests speedtest-cli'\n", + "\n", + "\n", + "Completed diagnostics at 2025-08-02 23:16:47\n", + "\n", + "\n", + "Please send these diagnostics to me at ed@edwarddonner.com\n", + "Either copy & paste the above output into an email, or attach the file report.txt that has been created in this directory.\n" + ] + } + ], "source": [ "# Run my diagnostics report to collect key information for debugging\n", "# Please email me the results. Either copy & paste the output, or attach the file report.txt\n", @@ -501,7 +660,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.11" + "version": "3.11.7" } }, "nbformat": 4, diff --git a/week1/week1 EXERCISE.ipynb b/week1/week1 EXERCISE.ipynb index f3486fe..2f79587 100644 --- a/week1/week1 EXERCISE.ipynb +++ b/week1/week1 EXERCISE.ipynb @@ -18,7 +18,13 @@ "metadata": {}, "outputs": [], "source": [ - "# imports" + "# imports\n", + "import os, re, requests, json, ollama\n", + "from typing import List\n", + "from dotenv import load_dotenv\n", + "from bs4 import BeautifulSoup\n", + "from IPython.display import Markdown, display, update_display\n", + "from openai import OpenAI" ] }, { @@ -41,7 +47,27 @@ "metadata": {}, "outputs": [], "source": [ - "# set up environment" + "# set up environment\n", + "\n", + "headers = {\n", + " \"User-Agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36\"\n", + "}\n", + "\n", + "class Website:\n", + "\n", + " def __init__(self, url):\n", + " \"\"\"\n", + " Create this Website object from the given url using the BeautifulSoup library\n", + " \"\"\"\n", + " self.url = url\n", + " response = requests.get(url, headers=headers)\n", + " soup = BeautifulSoup(response.content, 'html.parser')\n", + " self.title = soup.title.string if soup.title else \"No title found\"\n", + " for irrelevant in soup.body([\"script\", \"style\", \"img\", \"input\"]):\n", + " irrelevant.decompose()\n", + " self.text = soup.body.get_text(separator=\"\\n\", strip=True)\n", + "\n", + "openai = OpenAI()\n" ] }, { @@ -53,10 +79,68 @@ "source": [ "# here is the question; type over this to ask something new\n", "\n", + "# question = \"\"\"\n", + "# Please explain what this code does and why:\n", + "# yield from {book.get(\"author\") for book in books if book.get(\"author\")}\n", + "# \"\"\"\n", + "\n", "question = \"\"\"\n", - "Please explain what this code does and why:\n", - "yield from {book.get(\"author\") for book in books if book.get(\"author\")}\n", - "\"\"\"" + "How good at Software Development is Elijah Rwothoromo? \\\n", + "He has a Wordpress site https://rwothoromo.wordpress.com/. \\\n", + "He also has a LinkedIn profile https://www.linkedin.com/in/rwothoromoelaijah/. \\\n", + "What can we learn from him?\n", + "\"\"\"\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e14fd3a1-0aca-4794-a0e0-57458e111fc9", + "metadata": {}, + "outputs": [], + "source": [ + "# Process URLs in the question to improve the prompt\n", + "\n", + "# Extract all URLs from the question string using regular expressions\n", + "urls = re.findall(r'https?://[^\\s)]+', question)\n", + "\n", + "# Fetch the content for each URL using the Website class\n", + "scraped_content = []\n", + "for url in urls:\n", + " print(f\"Scraping: {url}\")\n", + " try:\n", + " site = Website(url)\n", + " content = f\"Content from {url}:\\n---\\n{site.text}\\n---\\n\" # delimiter ---\n", + " scraped_content.append(content)\n", + " except Exception as e:\n", + " print(f\"Could not scrape {url}: {e}\")\n", + " scraped_content.append(f\"Could not retrieve content from {url}.\\n\")\n", + "\n", + "# Combine all the scraped text into one string\n", + "all_scraped_text = \"\\n\".join(scraped_content)\n", + "\n", + "# Update the question with the scraped content\n", + "augmented_question = f\"\"\"\n", + "Based on the following information, please answer the user's original question.\n", + "\n", + "--- TEXT FROM WEBSITES ---\n", + "{all_scraped_text}\n", + "--- END TEXT FROM WEBSITES ---\n", + "\n", + "--- ORIGINAL QUESTION ---\n", + "{question}\n", + "\"\"\"\n", + "\n", + "# system prompt to be more accurate for AI to just analyze the provided text.\n", + "system_prompt = \"You are an expert assistant. \\\n", + "Analyze the user's question and the provided text from relevant websites to synthesize a comprehensive answer in markdown format.\\\n", + "Provides a short summary, ignoring text that might be navigation-related.\"\n", + "\n", + "# Create the messages list with the new augmented prompt\n", + "messages = [\n", + " {\"role\": \"system\", \"content\": system_prompt},\n", + " {\"role\": \"user\", \"content\": augmented_question},\n", + "]\n" ] }, { @@ -66,7 +150,25 @@ "metadata": {}, "outputs": [], "source": [ - "# Get gpt-4o-mini to answer, with streaming" + "# Get gpt-4o-mini to answer, with streaming\n", + "\n", + "def get_gpt_response(question):\n", + " # return response.choices[0].message.content\n", + "\n", + " stream = openai.chat.completions.create(\n", + " model=MODEL_GPT,\n", + " messages=messages,\n", + " stream=True\n", + " )\n", + " \n", + " response = \"\"\n", + " display_handle = display(Markdown(\"\"), display_id=True)\n", + " for chunk in stream:\n", + " response += chunk.choices[0].delta.content or ''\n", + " response = response.replace(\"```\",\"\").replace(\"markdown\", \"\")\n", + " update_display(Markdown(response), display_id=display_handle.display_id)\n", + "\n", + "get_gpt_response(question)" ] }, { @@ -76,8 +178,25 @@ "metadata": {}, "outputs": [], "source": [ - "# Get Llama 3.2 to answer" + "# Get Llama 3.2 to answer\n", + "def get_llama_response(question):\n", + " response = ollama.chat(\n", + " model=MODEL_LLAMA,\n", + " messages=messages,\n", + " stream=False # just get the results, don't stream them\n", + " )\n", + " return response['message']['content']\n", + "\n", + "display(Markdown(get_llama_response(question)))" ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fa1e9987-7b6d-49c1-9a81-b1a92aceea72", + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { @@ -96,7 +215,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.11" + "version": "3.11.7" } }, "nbformat": 4, From df046d178030398799bcecf7335a44867a17c825 Mon Sep 17 00:00:00 2001 From: Elijah Rwothoromo Date: Tue, 5 Aug 2025 21:22:27 +0300 Subject: [PATCH 02/14] Added my contributions to community-contributions --- week1/week1 EXERCISE.ipynb | 77 ++++++++++++++++++++++---------------- 1 file changed, 44 insertions(+), 33 deletions(-) diff --git a/week1/week1 EXERCISE.ipynb b/week1/week1 EXERCISE.ipynb index 2f79587..2cbd323 100644 --- a/week1/week1 EXERCISE.ipynb +++ b/week1/week1 EXERCISE.ipynb @@ -19,9 +19,8 @@ "outputs": [], "source": [ "# imports\n", - "import os, re, requests, json, ollama\n", - "from typing import List\n", - "from dotenv import load_dotenv\n", + "\n", + "import re, requests, ollama\n", "from bs4 import BeautifulSoup\n", "from IPython.display import Markdown, display, update_display\n", "from openai import OpenAI" @@ -84,6 +83,12 @@ "# yield from {book.get(\"author\") for book in books if book.get(\"author\")}\n", "# \"\"\"\n", "\n", + "# question = \"\"\"\n", + "# Please explain what this code does and why:\n", + "# yield from {book.get(\"author\") for book in books if book.get(\"author\")}\n", + "# Popular dev site https://projecteuler.net/\n", + "# \"\"\"\n", + "\n", "question = \"\"\"\n", "How good at Software Development is Elijah Rwothoromo? \\\n", "He has a Wordpress site https://rwothoromo.wordpress.com/. \\\n", @@ -103,43 +108,50 @@ "\n", "# Extract all URLs from the question string using regular expressions\n", "urls = re.findall(r'https?://[^\\s)]+', question)\n", + "# print(urls)\n", "\n", - "# Fetch the content for each URL using the Website class\n", - "scraped_content = []\n", - "for url in urls:\n", - " print(f\"Scraping: {url}\")\n", - " try:\n", - " site = Website(url)\n", - " content = f\"Content from {url}:\\n---\\n{site.text}\\n---\\n\" # delimiter ---\n", - " scraped_content.append(content)\n", - " except Exception as e:\n", - " print(f\"Could not scrape {url}: {e}\")\n", - " scraped_content.append(f\"Could not retrieve content from {url}.\\n\")\n", + "if len(urls) > 0:\n", + " \n", + " # Fetch the content for each URL using the Website class\n", + " scraped_content = []\n", + " for url in urls:\n", + " print(f\"Scraping: {url}\")\n", + " try:\n", + " site = Website(url)\n", + " content = f\"Content from {url}:\\n---\\n{site.text}\\n---\\n\" # delimiter ---\n", + " scraped_content.append(content)\n", + " except Exception as e:\n", + " print(f\"Could not scrape {url}: {e}\")\n", + " scraped_content.append(f\"Could not retrieve content from {url}.\\n\")\n", + " \n", + " # Combine all the scraped text into one string\n", + " all_scraped_text = \"\\n\".join(scraped_content)\n", + " \n", + " # Update the question with the scraped content\n", + " updated_question = f\"\"\"\n", + " Based on the following information, please answer the user's original question.\n", + " \n", + " --- TEXT FROM WEBSITES ---\n", + " {all_scraped_text}\n", + " --- END TEXT FROM WEBSITES ---\n", + " \n", + " --- ORIGINAL QUESTION ---\n", + " {question}\n", + " \"\"\"\n", + "else:\n", + " updated_question = question\n", "\n", - "# Combine all the scraped text into one string\n", - "all_scraped_text = \"\\n\".join(scraped_content)\n", - "\n", - "# Update the question with the scraped content\n", - "augmented_question = f\"\"\"\n", - "Based on the following information, please answer the user's original question.\n", - "\n", - "--- TEXT FROM WEBSITES ---\n", - "{all_scraped_text}\n", - "--- END TEXT FROM WEBSITES ---\n", - "\n", - "--- ORIGINAL QUESTION ---\n", - "{question}\n", - "\"\"\"\n", + "# print(updated_question)\n", "\n", "# system prompt to be more accurate for AI to just analyze the provided text.\n", "system_prompt = \"You are an expert assistant. \\\n", "Analyze the user's question and the provided text from relevant websites to synthesize a comprehensive answer in markdown format.\\\n", - "Provides a short summary, ignoring text that might be navigation-related.\"\n", + "Provide a short summary, ignoring text that might be navigation-related.\"\n", "\n", - "# Create the messages list with the new augmented prompt\n", + "# Create the messages list with the newly updated prompt\n", "messages = [\n", " {\"role\": \"system\", \"content\": system_prompt},\n", - " {\"role\": \"user\", \"content\": augmented_question},\n", + " {\"role\": \"user\", \"content\": updated_question},\n", "]\n" ] }, @@ -153,8 +165,6 @@ "# Get gpt-4o-mini to answer, with streaming\n", "\n", "def get_gpt_response(question):\n", - " # return response.choices[0].message.content\n", - "\n", " stream = openai.chat.completions.create(\n", " model=MODEL_GPT,\n", " messages=messages,\n", @@ -179,6 +189,7 @@ "outputs": [], "source": [ "# Get Llama 3.2 to answer\n", + "\n", "def get_llama_response(question):\n", " response = ollama.chat(\n", " model=MODEL_LLAMA,\n", From a9572a9557a725aec282a5d078b8254d5404d9df Mon Sep 17 00:00:00 2001 From: Elijah Rwothoromo Date: Tue, 5 Aug 2025 21:33:35 +0300 Subject: [PATCH 03/14] Undo changes in default files --- week1/Guide to Jupyter.ipynb | 155 ++++++---------------------- week1/troubleshooting.ipynb | 189 +++-------------------------------- week1/week1 EXERCISE.ipynb | 1 + 3 files changed, 44 insertions(+), 301 deletions(-) diff --git a/week1/Guide to Jupyter.ipynb b/week1/Guide to Jupyter.ipynb index 0de6a9f..ebcc9f0 100644 --- a/week1/Guide to Jupyter.ipynb +++ b/week1/Guide to Jupyter.ipynb @@ -32,21 +32,10 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "id": "33d37cd8-55c9-4e03-868c-34aa9cab2c80", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "4" - ] - }, - "execution_count": 1, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "# Click anywhere in this cell and press Shift + Return\n", "\n", @@ -65,7 +54,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "id": "585eb9c1-85ee-4c27-8dc2-b4d8d022eda0", "metadata": {}, "outputs": [], @@ -77,21 +66,10 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "id": "07792faa-761d-46cb-b9b7-2bbf70bb1628", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'bananas'" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "# The result of the last statement is shown after you run it\n", "\n", @@ -100,18 +78,10 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "id": "a067d2b1-53d5-4aeb-8a3c-574d39ff654a", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "My favorite fruit is bananas\n" - ] - } - ], + "outputs": [], "source": [ "# Use the variable\n", "\n", @@ -120,7 +90,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "id": "4c5a4e60-b7f4-4953-9e80-6d84ba4664ad", "metadata": {}, "outputs": [], @@ -146,18 +116,10 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "id": "8e5ec81d-7c5b-4025-bd2e-468d67b581b6", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "My favorite fruit is anything but bananas\n" - ] - } - ], + "outputs": [], "source": [ "# Then run this cell twice, and see if you understand what's going on\n", "\n", @@ -182,18 +144,10 @@ }, { "cell_type": "code", - "execution_count": 7, - "id": "ce258424-40c3-49a7-9462-e6fa25014b03", + "execution_count": null, + "id": "84b1e410-5eda-4e2c-97ce-4eebcff816c5", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "My favorite fruit is apples\n" - ] - } - ], + "outputs": [], "source": [ "print(f\"My favorite fruit is {favorite_fruit}\")" ] @@ -211,12 +165,10 @@ { "cell_type": "code", "execution_count": null, - "id": "84b1e410-5eda-4e2c-97ce-4eebcff816c5", + "id": "ce258424-40c3-49a7-9462-e6fa25014b03", "metadata": {}, "outputs": [], - "source": [ - "print(f\"My favorite fruit is {favorite_fruit} - ok\")" - ] + "source": [] }, { "cell_type": "markdown", @@ -269,25 +221,10 @@ "Click in the cell and press the Bin icon if you want to remove it." ] }, - { - "cell_type": "markdown", - "id": "b3b2d1ff-5d2c-47a9-9c1b-90a0cfb89dd9", - "metadata": {}, - "source": [ - "# This is a heading\n", - "## This is a sub-head\n", - "### And a sub-sub-head\n", - "\n", - "I like Jupyter Lab because it's\n", - "- Easy\n", - "- Flexible\n", - "- Satisfying" - ] - }, { "cell_type": "code", "execution_count": null, - "id": "a365b651-3a34-40ed-8def-df1e6a484b43", + "id": "e1586320-c90f-4f22-8b39-df6865484950", "metadata": {}, "outputs": [], "source": [] @@ -308,21 +245,10 @@ }, { "cell_type": "code", - "execution_count": 1, - "id": "b1b303d9-ce47-4cee-85e9-6416abca7d21", + "execution_count": null, + "id": "82042fc5-a907-4381-a4b8-eb9386df19cd", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Guide to Jupyter.ipynb day2 EXERCISE.ipynb troubleshooting.ipynb\n", - "Intermediate Python.ipynb day5.ipynb week1 EXERCISE.ipynb\n", - "\u001b[34mcommunity-contributions\u001b[m\u001b[m diagnostics.py\n", - "day1.ipynb \u001b[34msolutions\u001b[m\u001b[m\n" - ] - } - ], + "outputs": [], "source": [ "# list the current directory\n", "\n", @@ -332,13 +258,13 @@ { "cell_type": "code", "execution_count": null, - "id": "18685382-3768-4e00-817b-cc69dd1fb531", + "id": "4fc3e3da-8a55-40cc-9706-48bf12a0e20e", "metadata": {}, "outputs": [], "source": [ - "# ping a website\n", + "# ping cnn.com - press the stop button in the toolbar when you're bored\n", "\n", - "!ping google.com" + "!ping cnn.com" ] }, { @@ -369,7 +295,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "id": "2646a4e5-3c23-4aee-a34d-d623815187d2", "metadata": {}, "outputs": [], @@ -387,18 +313,10 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "id": "6e96be3d-fa82-42a3-a8aa-b81dd20563a5", "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "100%|███████████████████████████████████████| 1000/1000 [00:11<00:00, 86.00it/s]\n" - ] - } - ], + "outputs": [], "source": [ "# And now, with a nice little progress bar:\n", "\n", @@ -413,27 +331,10 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "id": "63c788dd-4618-4bb4-a5ce-204411a38ade", "metadata": {}, - "outputs": [ - { - "data": { - "text/markdown": [ - "# This is a big heading!\n", - "\n", - "- And this is a bullet-point\n", - "- So is this\n", - "- Me, too!" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "# On a different topic, here's a useful way to print output in markdown\n", "\n", @@ -471,7 +372,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.13.5" + "version": "3.11.11" } }, "nbformat": 4, diff --git a/week1/troubleshooting.ipynb b/week1/troubleshooting.ipynb index d8cdf8f..23eca6f 100644 --- a/week1/troubleshooting.ipynb +++ b/week1/troubleshooting.ipynb @@ -40,18 +40,10 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "id": "d296f9b6-8de4-44db-b5f5-9b653dfd3d81", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Connected to the internet and can reach Google\n" - ] - } - ], + "outputs": [], "source": [ "import urllib.request\n", "\n", @@ -109,20 +101,10 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "id": "7c8c0bb3-0e94-466e-8d1a-4dfbaa014cbe", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Virtualenv is active:\n", - "Environment Path: /Users/elijahsmac/Desktop/code/llm/llm_engineering/llms\n", - "Environment Name: llms\n" - ] - } - ], + "outputs": [], "source": [ "# Some quick checks that your Conda environment or VirtualEnv is as expected\n", "# The Environment Name should be: llms\n", @@ -182,22 +164,10 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "id": "6c78b7d9-1eea-412d-8751-3de20c0f6e2f", "metadata": {}, - "outputs": [ - { - "ename": "ModuleNotFoundError", - "evalue": "No module named 'openai'", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[8], line 3\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m# This import should work if your environment is active and dependencies are installed!\u001b[39;00m\n\u001b[0;32m----> 3\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mopenai\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m OpenAI\n", - "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'openai'" - ] - } - ], + "outputs": [], "source": [ "# This import should work if your environment is active and dependencies are installed!\n", "\n", @@ -231,19 +201,10 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "id": "caa4837e-b970-4f89-aa9a-8aa793c754fd", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - ".env file found.\n", - "SUCCESS! OPENAI_API_KEY found and it has the right prefix\n" - ] - } - ], + "outputs": [], "source": [ "from pathlib import Path\n", "\n", @@ -293,18 +254,10 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "id": "ab9ea6ef-49ee-4899-a1c7-75a8bd9ac36b", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "There is already a .env file - if you want me to create a new one, change the variable overwrite_if_already_exists to True above\n" - ] - } - ], + "outputs": [], "source": [ "# Only run this code in this cell if you want to have a .env file created for you!\n", "\n", @@ -418,22 +371,10 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "id": "cccb58e7-6626-4033-9dc1-e7e3ff742f6b", "metadata": {}, - "outputs": [ - { - "ename": "ModuleNotFoundError", - "evalue": "No module named 'openai'", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[9], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mopenai\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m OpenAI\n\u001b[1;32m 2\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mdotenv\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m load_dotenv\n\u001b[1;32m 3\u001b[0m load_dotenv(override\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n", - "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'openai'" - ] - } - ], + "outputs": [], "source": [ "from openai import OpenAI\n", "from dotenv import load_dotenv\n", @@ -522,110 +463,10 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "id": "248204f0-7bad-482a-b715-fb06a3553916", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Starting diagnostics at 2025-08-02 23:16:47\n", - "\n", - "===== System Information =====\n", - "Operating System: Darwin\n", - "MacOS Version: 10.16\n", - "Architecture: ('64bit', 'Mach-O')\n", - "Machine: x86_64\n", - "Processor: i386\n", - "Total RAM: 16.00 GB\n", - "Available RAM: 6.04 GB\n", - "Free Disk Space: 315.94 GB\n", - "\n", - "===== File System Information =====\n", - "Current Directory: /Users/elijahsmac/Desktop/code/llm/llm_engineering/week1\n", - "Write permission: OK\n", - "\n", - "Files in Current Directory:\n", - " - .ipynb_checkpoints\n", - " - Guide to Jupyter.ipynb\n", - " - Intermediate Python.ipynb\n", - " - __pycache__\n", - " - community-contributions\n", - " - day1.ipynb\n", - " - day2 EXERCISE.ipynb\n", - " - day5.ipynb\n", - " - diagnostics.py\n", - " - report.txt\n", - " - solutions\n", - " - troubleshooting.ipynb\n", - " - week1 EXERCISE.ipynb\n", - "\n", - "===== Git Repository Information =====\n", - "Git Repository Root: /Users/elijahsmac/Desktop/code/llm/llm_engineering\n", - "Current Commit: 3a042500d7db3c0e8cde89f836a60e6cd7ab9ba1\n", - "Remote Origin: git@github.com:ed-donner/llm_engineering.git\n", - "\n", - "===== Environment File Check =====\n", - ".env file exists at: /Users/elijahsmac/Desktop/code/llm/llm_engineering/.env\n", - "OPENAI_API_KEY found in .env file\n", - "\n", - "===== Anaconda Environment Check =====\n", - "No active Anaconda environment detected\n", - "\n", - "===== Virtualenv Check =====\n", - "Virtualenv is active:\n", - "Environment Path: /Users/elijahsmac/Desktop/code/llm/llm_engineering/llms\n", - "Environment Name: llms\n", - "\n", - "Python Environment:\n", - "Python Version: 3.13.5 | packaged by Anaconda, Inc. | (main, Jun 12 2025, 11:09:21) [Clang 14.0.6 ]\n", - "Python Executable: /Users/elijahsmac/anaconda3/bin/python\n", - "\n", - "Required Package Versions:\n", - "ERROR: Required package 'openai' is not installed\n", - "python-dotenv: 1.1.0\n", - "requests: 2.32.3\n", - "ERROR: Required package 'gradio' is not installed\n", - "ERROR: Required package 'transformers' is not installed\n", - "\n", - "===== Network Connectivity Check =====\n", - "SSL Version: OpenSSL 3.0.16 11 Feb 2025\n", - "ERROR: Required packages are not installed. Please install them using 'pip install requests speedtest-cli'\n", - "\n", - "===== Environment Variables Check =====\n", - "\n", - "PYTHONPATH is not set.\n", - "\n", - "Python sys.path:\n", - " - /Users/elijahsmac/anaconda3/lib/python313.zip\n", - " - /Users/elijahsmac/anaconda3/lib/python3.13\n", - " - /Users/elijahsmac/anaconda3/lib/python3.13/lib-dynload\n", - " - \n", - " - /Users/elijahsmac/anaconda3/lib/python3.13/site-packages\n", - " - /Users/elijahsmac/anaconda3/lib/python3.13/site-packages/aeosa\n", - " - /Users/elijahsmac/anaconda3/lib/python3.13/site-packages/setuptools/_vendor\n", - "OPENAI_API_KEY is set after calling load_dotenv()\n", - "\n", - "===== Additional Diagnostics =====\n", - "Temp directory is writable: /var/folders/_1/d6xg9lvd4lb714ry78xpc10w0000gn/T\n", - "\n", - "===== Errors Found =====\n", - "The following critical issues were detected. Please address them before proceeding:\n", - "- Required package 'openai' is not installed\n", - "- Required package 'gradio' is not installed\n", - "- Required package 'transformers' is not installed\n", - "- Required packages are not installed. Please install them using 'pip install requests speedtest-cli'\n", - "\n", - "\n", - "Completed diagnostics at 2025-08-02 23:16:47\n", - "\n", - "\n", - "Please send these diagnostics to me at ed@edwarddonner.com\n", - "Either copy & paste the above output into an email, or attach the file report.txt that has been created in this directory.\n" - ] - } - ], + "outputs": [], "source": [ "# Run my diagnostics report to collect key information for debugging\n", "# Please email me the results. Either copy & paste the output, or attach the file report.txt\n", @@ -660,7 +501,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.7" + "version": "3.11.11" } }, "nbformat": 4, diff --git a/week1/week1 EXERCISE.ipynb b/week1/week1 EXERCISE.ipynb index 2cbd323..7e80d75 100644 --- a/week1/week1 EXERCISE.ipynb +++ b/week1/week1 EXERCISE.ipynb @@ -19,6 +19,7 @@ "outputs": [], "source": [ "# imports\n", + "# Important Pull request ref: https://chatgpt.com/share/677a9cb5-c64c-8012-99e0-e06e88afd293\n", "\n", "import re, requests, ollama\n", "from bs4 import BeautifulSoup\n", From ae7d307063cf4679c7c9b2061cf3ce767d9a8ec9 Mon Sep 17 00:00:00 2001 From: Elijah Rwothoromo Date: Tue, 5 Aug 2025 21:44:53 +0300 Subject: [PATCH 04/14] Add changes to community folder --- .../rwothoromo/week1/day1.ipynb | 695 ++++++++++++++++++ .../rwothoromo/week1/day2 EXERCISE.ipynb | 316 ++++++++ .../rwothoromo/week1/day5.ipynb | 566 ++++++++++++++ 3 files changed, 1577 insertions(+) create mode 100644 week1/community-contributions/rwothoromo/week1/day1.ipynb create mode 100644 week1/community-contributions/rwothoromo/week1/day2 EXERCISE.ipynb create mode 100644 week1/community-contributions/rwothoromo/week1/day5.ipynb diff --git a/week1/community-contributions/rwothoromo/week1/day1.ipynb b/week1/community-contributions/rwothoromo/week1/day1.ipynb new file mode 100644 index 0000000..414c638 --- /dev/null +++ b/week1/community-contributions/rwothoromo/week1/day1.ipynb @@ -0,0 +1,695 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "d15d8294-3328-4e07-ad16-8a03e9bbfdb9", + "metadata": {}, + "source": [ + "# YOUR FIRST LAB\n", + "### Please read this section. This is valuable to get you prepared, even if it's a long read -- it's important stuff.\n", + "\n", + "## Your first Frontier LLM Project\n", + "\n", + "Let's build a useful LLM solution - in a matter of minutes.\n", + "\n", + "By the end of this course, you will have built an autonomous Agentic AI solution with 7 agents that collaborate to solve a business problem. All in good time! We will start with something smaller...\n", + "\n", + "Our goal is to code a new kind of Web Browser. Give it a URL, and it will respond with a summary. The Reader's Digest of the internet!!\n", + "\n", + "Before starting, you should have completed the setup for [PC](../SETUP-PC.md) or [Mac](../SETUP-mac.md) and you hopefully launched this jupyter lab from within the project root directory, with your environment activated.\n", + "\n", + "## If you're new to Jupyter Lab\n", + "\n", + "Welcome to the wonderful world of Data Science experimentation! Once you've used Jupyter Lab, you'll wonder how you ever lived without it. Simply click in each \"cell\" with code in it, such as the cell immediately below this text, and hit Shift+Return to execute that cell. As you wish, you can add a cell with the + button in the toolbar, and print values of variables, or try out variations. \n", + "\n", + "I've written a notebook called [Guide to Jupyter](Guide%20to%20Jupyter.ipynb) to help you get more familiar with Jupyter Labs, including adding Markdown comments, using `!` to run shell commands, and `tqdm` to show progress.\n", + "\n", + "## If you're new to the Command Line\n", + "\n", + "Please see these excellent guides: [Command line on PC](https://chatgpt.com/share/67b0acea-ba38-8012-9c34-7a2541052665) and [Command line on Mac](https://chatgpt.com/canvas/shared/67b0b10c93a081918210723867525d2b). \n", + "\n", + "## If you'd prefer to work in IDEs\n", + "\n", + "If you're more comfortable in IDEs like VSCode, Cursor or PyCharm, they both work great with these lab notebooks too. \n", + "If you'd prefer to work in VSCode, [here](https://chatgpt.com/share/676f2e19-c228-8012-9911-6ca42f8ed766) are instructions from an AI friend on how to configure it for the course.\n", + "\n", + "## If you'd like to brush up your Python\n", + "\n", + "I've added a notebook called [Intermediate Python](Intermediate%20Python.ipynb) to get you up to speed. But you should give it a miss if you already have a good idea what this code does: \n", + "`yield from {book.get(\"author\") for book in books if book.get(\"author\")}`\n", + "\n", + "## I am here to help\n", + "\n", + "If you have any problems at all, please do reach out. \n", + "I'm available through the platform, or at ed@edwarddonner.com, or at https://www.linkedin.com/in/eddonner/ if you'd like to connect (and I love connecting!) \n", + "And this is new to me, but I'm also trying out X/Twitter at [@edwarddonner](https://x.com/edwarddonner) - if you're on X, please show me how it's done 😂 \n", + "\n", + "## More troubleshooting\n", + "\n", + "Please see the [troubleshooting](troubleshooting.ipynb) notebook in this folder to diagnose and fix common problems. At the very end of it is a diagnostics script with some useful debug info.\n", + "\n", + "## For foundational technical knowledge (eg Git, APIs, debugging) \n", + "\n", + "If you're relatively new to programming -- I've got your back! While it's ideal to have some programming experience for this course, there's only one mandatory prerequisite: plenty of patience. 😁 I've put together a set of self-study guides that cover Git and GitHub, APIs and endpoints, beginner python and more.\n", + "\n", + "This covers Git and GitHub; what they are, the difference, and how to use them: \n", + "https://github.com/ed-donner/agents/blob/main/guides/03_git_and_github.ipynb\n", + "\n", + "This covers technical foundations: \n", + "ChatGPT vs API; taking screenshots; Environment Variables; Networking basics; APIs and endpoints: \n", + "https://github.com/ed-donner/agents/blob/main/guides/04_technical_foundations.ipynb\n", + "\n", + "This covers Python for beginners, and making sure that a `NameError` never trips you up: \n", + "https://github.com/ed-donner/agents/blob/main/guides/06_python_foundations.ipynb\n", + "\n", + "This covers the essential techniques for figuring out errors: \n", + "https://github.com/ed-donner/agents/blob/main/guides/08_debugging.ipynb\n", + "\n", + "And you'll find other useful guides in the same folder in GitHub. Some information applies to my other Udemy course (eg Async Python) but most of it is very relevant for LLM engineering.\n", + "\n", + "## If this is old hat!\n", + "\n", + "If you're already comfortable with today's material, please hang in there; you can move swiftly through the first few labs - we will get much more in depth as the weeks progress. Ultimately we will fine-tune our own LLM to compete with OpenAI!\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " \n", + " \n", + "

Please read - important note

\n", + " The way I collaborate with you may be different to other courses you've taken. I prefer not to type code while you watch. Rather, I execute Jupyter Labs, like this, and give you an intuition for what's going on. My suggestion is that you carefully execute this yourself, after watching the lecture. Add print statements to understand what's going on, and then come up with your own variations. If you have a Github account, use this to showcase your variations. Not only is this essential practice, but it demonstrates your skills to others, including perhaps future clients or employers...\n", + "
\n", + "\n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " \n", + " \n", + "

This code is a live resource - keep an eye out for my emails

\n", + " I push updates to the code regularly. As people ask questions, I add more examples or improved commentary. As a result, you'll notice that the code below isn't identical to the videos. Everything from the videos is here; but I've also added better explanations and new models like DeepSeek. Consider this like an interactive book.

\n", + " I try to send emails regularly with important updates related to the course. You can find this in the 'Announcements' section of Udemy in the left sidebar. You can also choose to receive my emails via your Notification Settings in Udemy. I'm respectful of your inbox and always try to add value with my emails!\n", + "
\n", + "
\n", + "\n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " \n", + " \n", + "

Business value of these exercises

\n", + " A final thought. While I've designed these notebooks to be educational, I've also tried to make them enjoyable. We'll do fun things like have LLMs tell jokes and argue with each other. But fundamentally, my goal is to teach skills you can apply in business. I'll explain business implications as we go, and it's worth keeping this in mind: as you build experience with models and techniques, think of ways you could put this into action at work today. Please do contact me if you'd like to discuss more or if you have ideas to bounce off me.\n", + "
" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4e2a9393-7767-488e-a8bf-27c12dca35bd", + "metadata": {}, + "outputs": [], + "source": [ + "# imports\n", + "\n", + "import os\n", + "import requests\n", + "from dotenv import load_dotenv\n", + "from bs4 import BeautifulSoup\n", + "from IPython.display import Markdown, display\n", + "from openai import OpenAI\n", + "\n", + "# If you get an error running this cell, then please head over to the troubleshooting notebook!" + ] + }, + { + "cell_type": "markdown", + "id": "6900b2a8-6384-4316-8aaa-5e519fca4254", + "metadata": {}, + "source": [ + "# Connecting to OpenAI (or Ollama)\n", + "\n", + "The next cell is where we load in the environment variables in your `.env` file and connect to OpenAI. \n", + "\n", + "If you'd like to use free Ollama instead, please see the README section \"Free Alternative to Paid APIs\", and if you're not sure how to do this, there's a full solution in the solutions folder (day1_with_ollama.ipynb).\n", + "\n", + "## Troubleshooting if you have problems:\n", + "\n", + "Head over to the [troubleshooting](troubleshooting.ipynb) notebook in this folder for step by step code to identify the root cause and fix it!\n", + "\n", + "If you make a change, try restarting the \"Kernel\" (the python process sitting behind this notebook) by Kernel menu >> Restart Kernel and Clear Outputs of All Cells. Then try this notebook again, starting at the top.\n", + "\n", + "Or, contact me! Message me or email ed@edwarddonner.com and we will get this to work.\n", + "\n", + "Any concerns about API costs? See my notes in the README - costs should be minimal, and you can control it at every point. You can also use Ollama as a free alternative, which we discuss during Day 2." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7b87cadb-d513-4303-baee-a37b6f938e4d", + "metadata": {}, + "outputs": [], + "source": [ + "# Load environment variables in a file called .env\n", + "\n", + "load_dotenv(override=True)\n", + "api_key = os.getenv('OPENAI_API_KEY')\n", + "\n", + "# Check the key\n", + "\n", + "if not api_key:\n", + " print(\"No API key was found - please head over to the troubleshooting notebook in this folder to identify & fix!\")\n", + "elif not api_key.startswith(\"sk-proj-\"):\n", + " print(\"An API key was found, but it doesn't start sk-proj-; please check you're using the right key - see troubleshooting notebook\")\n", + "elif api_key.strip() != api_key:\n", + " print(\"An API key was found, but it looks like it might have space or tab characters at the start or end - please remove them - see troubleshooting notebook\")\n", + "else:\n", + " print(\"API key found and looks good so far!\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "019974d9-f3ad-4a8a-b5f9-0a3719aea2d3", + "metadata": {}, + "outputs": [], + "source": [ + "openai = OpenAI()\n", + "\n", + "# If this doesn't work, try Kernel menu >> Restart Kernel and Clear Outputs Of All Cells, then run the cells from the top of this notebook down.\n", + "# If it STILL doesn't work (horrors!) then please see the Troubleshooting notebook in this folder for full instructions" + ] + }, + { + "cell_type": "markdown", + "id": "442fc84b-0815-4f40-99ab-d9a5da6bda91", + "metadata": {}, + "source": [ + "# Let's make a quick call to a Frontier model to get started, as a preview!" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a58394bf-1e45-46af-9bfd-01e24da6f49a", + "metadata": {}, + "outputs": [], + "source": [ + "# To give you a preview -- calling OpenAI with these messages is this easy. Any problems, head over to the Troubleshooting notebook.\n", + "\n", + "message = \"Hello, GPT! This is my first ever message to you! Hi!\"\n", + "response = openai.chat.completions.create(model=\"gpt-4o-mini\", messages=[{\"role\":\"user\", \"content\":message}])\n", + "print(response.choices[0].message.content)" + ] + }, + { + "cell_type": "markdown", + "id": "2aa190e5-cb31-456a-96cc-db109919cd78", + "metadata": {}, + "source": [ + "## OK onwards with our first project" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c5e793b2-6775-426a-a139-4848291d0463", + "metadata": {}, + "outputs": [], + "source": [ + "# A class to represent a Webpage\n", + "# If you're not familiar with Classes, check out the \"Intermediate Python\" notebook\n", + "\n", + "# Some websites need you to use proper headers when fetching them:\n", + "headers = {\n", + " \"User-Agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36\"\n", + "}\n", + "\n", + "class Website:\n", + "\n", + " def __init__(self, url):\n", + " \"\"\"\n", + " Create this Website object from the given url using the BeautifulSoup library\n", + " \"\"\"\n", + " self.url = url\n", + " response = requests.get(url, headers=headers)\n", + " soup = BeautifulSoup(response.content, 'html.parser')\n", + " self.title = soup.title.string if soup.title else \"No title found\"\n", + " for irrelevant in soup.body([\"script\", \"style\", \"img\", \"input\"]):\n", + " irrelevant.decompose()\n", + " self.text = soup.body.get_text(separator=\"\\n\", strip=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2ef960cf-6dc2-4cda-afb3-b38be12f4c97", + "metadata": {}, + "outputs": [], + "source": [ + "# Let's try one out. Change the website and add print statements to follow along.\n", + "\n", + "ed = Website(\"https://edwarddonner.com\")\n", + "print(ed.title)\n", + "print(ed.text)" + ] + }, + { + "cell_type": "markdown", + "id": "6a478a0c-2c53-48ff-869c-4d08199931e1", + "metadata": {}, + "source": [ + "## Types of prompts\n", + "\n", + "You may know this already - but if not, you will get very familiar with it!\n", + "\n", + "Models like GPT4o have been trained to receive instructions in a particular way.\n", + "\n", + "They expect to receive:\n", + "\n", + "**A system prompt** that tells them what task they are performing and what tone they should use\n", + "\n", + "**A user prompt** -- the conversation starter that they should reply to" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "abdb8417-c5dc-44bc-9bee-2e059d162699", + "metadata": {}, + "outputs": [], + "source": [ + "# Define our system prompt - you can experiment with this later, changing the last sentence to 'Respond in markdown in Spanish.\"\n", + "\n", + "system_prompt = \"You are an assistant that analyzes the contents of a website \\\n", + "and provides a short summary, ignoring text that might be navigation related. \\\n", + "Respond in markdown.\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f0275b1b-7cfe-4f9d-abfa-7650d378da0c", + "metadata": {}, + "outputs": [], + "source": [ + "# A function that writes a User Prompt that asks for summaries of websites:\n", + "\n", + "def user_prompt_for(website):\n", + " user_prompt = f\"You are looking at a website titled {website.title}\"\n", + " user_prompt += \"\\nThe contents of this website is as follows; \\\n", + "please provide a short summary of this website in markdown. \\\n", + "If it includes news or announcements, then summarize these too.\\n\\n\"\n", + " user_prompt += website.text\n", + " return user_prompt" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "26448ec4-5c00-4204-baec-7df91d11ff2e", + "metadata": {}, + "outputs": [], + "source": [ + "print(user_prompt_for(ed))" + ] + }, + { + "cell_type": "markdown", + "id": "ea211b5f-28e1-4a86-8e52-c0b7677cadcc", + "metadata": {}, + "source": [ + "## Messages\n", + "\n", + "The API from OpenAI expects to receive messages in a particular structure.\n", + "Many of the other APIs share this structure:\n", + "\n", + "```python\n", + "[\n", + " {\"role\": \"system\", \"content\": \"system message goes here\"},\n", + " {\"role\": \"user\", \"content\": \"user message goes here\"}\n", + "]\n", + "```\n", + "To give you a preview, the next 2 cells make a rather simple call - we won't stretch the mighty GPT (yet!)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f25dcd35-0cd0-4235-9f64-ac37ed9eaaa5", + "metadata": {}, + "outputs": [], + "source": [ + "messages = [\n", + " {\"role\": \"system\", \"content\": \"You are a snarky assistant\"},\n", + " {\"role\": \"user\", \"content\": \"What is 2 + 2?\"}\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "21ed95c5-7001-47de-a36d-1d6673b403ce", + "metadata": {}, + "outputs": [], + "source": [ + "# To give you a preview -- calling OpenAI with system and user messages:\n", + "\n", + "response = openai.chat.completions.create(model=\"gpt-4o-mini\", messages=messages)\n", + "print(response.choices[0].message.content)" + ] + }, + { + "cell_type": "markdown", + "id": "d06e8d78-ce4c-4b05-aa8e-17050c82bb47", + "metadata": {}, + "source": [ + "## And now let's build useful messages for GPT-4o-mini, using a function" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0134dfa4-8299-48b5-b444-f2a8c3403c88", + "metadata": {}, + "outputs": [], + "source": [ + "# See how this function creates exactly the format above\n", + "\n", + "def messages_for(website):\n", + " return [\n", + " {\"role\": \"system\", \"content\": system_prompt},\n", + " {\"role\": \"user\", \"content\": user_prompt_for(website)}\n", + " ]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "36478464-39ee-485c-9f3f-6a4e458dbc9c", + "metadata": {}, + "outputs": [], + "source": [ + "# Try this out, and then try for a few more websites\n", + "\n", + "messages_for(ed)" + ] + }, + { + "cell_type": "markdown", + "id": "16f49d46-bf55-4c3e-928f-68fc0bf715b0", + "metadata": {}, + "source": [ + "## Time to bring it together - the API for OpenAI is very simple!" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "905b9919-aba7-45b5-ae65-81b3d1d78e34", + "metadata": {}, + "outputs": [], + "source": [ + "# And now: call the OpenAI API. You will get very familiar with this!\n", + "\n", + "def summarize(url):\n", + " website = Website(url)\n", + " response = openai.chat.completions.create(\n", + " model = \"gpt-4o-mini\",\n", + " messages = messages_for(website)\n", + " )\n", + " return response.choices[0].message.content" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "05e38d41-dfa4-4b20-9c96-c46ea75d9fb5", + "metadata": {}, + "outputs": [], + "source": [ + "summarize(\"https://edwarddonner.com\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3d926d59-450e-4609-92ba-2d6f244f1342", + "metadata": {}, + "outputs": [], + "source": [ + "# A function to display this nicely in the Jupyter output, using markdown\n", + "\n", + "def display_summary(url):\n", + " summary = summarize(url)\n", + " display(Markdown(summary))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3018853a-445f-41ff-9560-d925d1774b2f", + "metadata": {}, + "outputs": [], + "source": [ + "display_summary(\"https://edwarddonner.com\")" + ] + }, + { + "cell_type": "markdown", + "id": "b3bcf6f4-adce-45e9-97ad-d9a5d7a3a624", + "metadata": {}, + "source": [ + "# Let's try more websites\n", + "\n", + "Note that this will only work on websites that can be scraped using this simplistic approach.\n", + "\n", + "Websites that are rendered with Javascript, like React apps, won't show up. See the community-contributions folder for a Selenium implementation that gets around this. You'll need to read up on installing Selenium (ask ChatGPT!)\n", + "\n", + "Also Websites protected with CloudFront (and similar) may give 403 errors - many thanks Andy J for pointing this out.\n", + "\n", + "But many websites will work just fine!" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "45d83403-a24c-44b5-84ac-961449b4008f", + "metadata": {}, + "outputs": [], + "source": [ + "display_summary(\"https://cnn.com\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "75e9fd40-b354-4341-991e-863ef2e59db7", + "metadata": {}, + "outputs": [], + "source": [ + "display_summary(\"https://anthropic.com\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5a904323-acd9-4c8e-9a17-70df76184590", + "metadata": {}, + "outputs": [], + "source": [ + "display_summary(\"https://rwothoromo.wordpress.com/\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a66c9fe8-c26a-49dd-9bc4-9efffc638f95", + "metadata": {}, + "outputs": [], + "source": [ + "display_summary(\"https://openai.com\")" + ] + }, + { + "cell_type": "markdown", + "id": "c951be1a-7f1b-448f-af1f-845978e47e2c", + "metadata": {}, + "source": [ + "\n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " \n", + " \n", + "

Business applications

\n", + " In this exercise, you experienced calling the Cloud API of a Frontier Model (a leading model at the frontier of AI) for the first time. We will be using APIs like OpenAI at many stages in the course, in addition to building our own LLMs.\n", + "\n", + "More specifically, we've applied this to Summarization - a classic Gen AI use case to make a summary. This can be applied to any business vertical - summarizing the news, summarizing financial performance, summarizing a resume in a cover letter - the applications are limitless. Consider how you could apply Summarization in your business, and try prototyping a solution.\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " \n", + " \n", + "

Before you continue - now try yourself

\n", + " Use the cell below to make your own simple commercial example. Stick with the summarization use case for now. Here's an idea: write something that will take the contents of an email, and will suggest an appropriate short subject line for the email. That's the kind of feature that might be built into a commercial email tool.\n", + "
" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "00743dac-0e70-45b7-879a-d7293a6f68a6", + "metadata": {}, + "outputs": [], + "source": [ + "# Step 1: Create your prompts\n", + "\n", + "system_prompt = \"You are a professional assistant\"\n", + "user_prompt = \"\"\"\n", + "Review this conversation and provide a comprehensive summary. Also, suggest how much better the converation could have gone:\n", + "\n", + "Dear Dev Contact,\n", + "\n", + "I hope this message finds you well.\n", + "I would like to share that I have proficiency in front-end design tools, particularly Figma, react and Angular. At this stage, I am keenly interested in finding opportunities to apply these skills professionally.\n", + "\n", + "If you are aware of any companies, projects, or platforms seeking enterprise in front-end design, I would be grateful for any advice or recommendations you might kindly provide.\n", + "\n", + "Thank you very much for your time and consideration.\n", + "\n", + "Hello Job Seeker,\n", + "\n", + "I hope you are doing well.\n", + "\n", + "Dev Contact: The last role (3 months gig) I saw was looking for a junior PHP Developer. Does your CV include that?\n", + "\n", + "Hello Dev Contact \n", + "Thank you for your feedback.\n", + "Yes my CV has PHP as one of my skill set. Can I share it with you?\n", + "\n", + "Dev Contact: They said \"It's late. Interviews were on Monday\"\n", + "\n", + "Hello Dev Contact\n", + "\n", + "Thanks for the update. When you hear of any opportunity please let me know.\n", + "\n", + "Dev Contact: For now, check out https://refactory.academy/courses/refactory-apprenticeship/\n", + "\"\"\"\n", + "\n", + "# Step 2: Make the messages list\n", + "\n", + "messages = [\n", + " {\"role\": \"system\", \"content\": system_prompt},\n", + " {\"role\": \"user\", \"content\": user_prompt},\n", + "] # fill this in\n", + "\n", + "# Step 3: Call OpenAI\n", + "\n", + "response = openai.chat.completions.create(\n", + " model = \"gpt-4o-mini\",\n", + " messages = messages\n", + ")\n", + "\n", + "# Step 4: print the result\n", + "\n", + "print(response.choices[0].message.content)" + ] + }, + { + "cell_type": "markdown", + "id": "36ed9f14-b349-40e9-a42c-b367e77f8bda", + "metadata": {}, + "source": [ + "## An extra exercise for those who enjoy web scraping\n", + "\n", + "You may notice that if you try `display_summary(\"https://openai.com\")` - it doesn't work! That's because OpenAI has a fancy website that uses Javascript. There are many ways around this that some of you might be familiar with. For example, Selenium is a hugely popular framework that runs a browser behind the scenes, renders the page, and allows you to query it. If you have experience with Selenium, Playwright or similar, then feel free to improve the Website class to use them. In the community-contributions folder, you'll find an example Selenium solution from a student (thank you!)" + ] + }, + { + "cell_type": "markdown", + "id": "eeab24dc-5f90-4570-b542-b0585aca3eb6", + "metadata": {}, + "source": [ + "# Sharing your code\n", + "\n", + "I'd love it if you share your code afterwards so I can share it with others! You'll notice that some students have already made changes (including a Selenium implementation) which you will find in the community-contributions folder. If you'd like add your changes to that folder, submit a Pull Request with your new versions in that folder and I'll merge your changes.\n", + "\n", + "If you're not an expert with git (and I am not!) then GPT has given some nice instructions on how to submit a Pull Request. It's a bit of an involved process, but once you've done it once it's pretty clear. As a pro-tip: it's best if you clear the outputs of your Jupyter notebooks (Edit >> Clean outputs of all cells, and then Save) for clean notebooks.\n", + "\n", + "Here are good instructions courtesy of an AI friend: \n", + "https://chatgpt.com/share/677a9cb5-c64c-8012-99e0-e06e88afd293" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f4484fcf-8b39-4c3f-9674-37970ed71988", + "metadata": {}, + "outputs": [], + "source": [ + "# To perform summaries using a model running locally\n", + "import ollama\n", + "\n", + "# OLLAMA_API = \"http://localhost:11434/api/chat\"\n", + "# HEADERS = {\"Content-Type\": \"application/json\"}\n", + "MODEL = \"llama3.2\"\n", + "\n", + "\n", + "def summarize_with_local_model(url):\n", + " website = Website(url)\n", + " messages = messages_for(website)\n", + " response = ollama.chat(\n", + " model=MODEL,\n", + " messages=messages,\n", + " stream=False # just get the results, don't stream them\n", + " )\n", + " return response['message']['content']\n", + "\n", + "display(Markdown(summarize_with_local_model(\"https://rwothoromo.wordpress.com/\")))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e76cbf31-2a82-40b8-b2e7-e2ceae7483ed", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.7" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/week1/community-contributions/rwothoromo/week1/day2 EXERCISE.ipynb b/week1/community-contributions/rwothoromo/week1/day2 EXERCISE.ipynb new file mode 100644 index 0000000..cde9d4a --- /dev/null +++ b/week1/community-contributions/rwothoromo/week1/day2 EXERCISE.ipynb @@ -0,0 +1,316 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "d15d8294-3328-4e07-ad16-8a03e9bbfdb9", + "metadata": {}, + "source": [ + "# Welcome to your first assignment!\n", + "\n", + "Instructions are below. Please give this a try, and look in the solutions folder if you get stuck (or feel free to ask me!)" + ] + }, + { + "cell_type": "markdown", + "id": "ada885d9-4d42-4d9b-97f0-74fbbbfe93a9", + "metadata": {}, + "source": [ + "\n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " \n", + " \n", + "

Just before we get to the assignment --

\n", + " I thought I'd take a second to point you at this page of useful resources for the course. This includes links to all the slides.
\n", + " https://edwarddonner.com/2024/11/13/llm-engineering-resources/
\n", + " Please keep this bookmarked, and I'll continue to add more useful links there over time.\n", + "
\n", + "
" + ] + }, + { + "cell_type": "markdown", + "id": "6e9fa1fc-eac5-4d1d-9be4-541b3f2b3458", + "metadata": {}, + "source": [ + "# HOMEWORK EXERCISE ASSIGNMENT\n", + "\n", + "Upgrade the day 1 project to summarize a webpage to use an Open Source model running locally via Ollama rather than OpenAI\n", + "\n", + "You'll be able to use this technique for all subsequent projects if you'd prefer not to use paid APIs.\n", + "\n", + "**Benefits:**\n", + "1. No API charges - open-source\n", + "2. Data doesn't leave your box\n", + "\n", + "**Disadvantages:**\n", + "1. Significantly less power than Frontier Model\n", + "\n", + "## Recap on installation of Ollama\n", + "\n", + "Simply visit [ollama.com](https://ollama.com) and install!\n", + "\n", + "Once complete, the ollama server should already be running locally. \n", + "If you visit: \n", + "[http://localhost:11434/](http://localhost:11434/)\n", + "\n", + "You should see the message `Ollama is running`. \n", + "\n", + "If not, bring up a new Terminal (Mac) or Powershell (Windows) and enter `ollama serve` \n", + "And in another Terminal (Mac) or Powershell (Windows), enter `ollama pull llama3.2` \n", + "Then try [http://localhost:11434/](http://localhost:11434/) again.\n", + "\n", + "If Ollama is slow on your machine, try using `llama3.2:1b` as an alternative. Run `ollama pull llama3.2:1b` from a Terminal or Powershell, and change the code below from `MODEL = \"llama3.2\"` to `MODEL = \"llama3.2:1b\"`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4e2a9393-7767-488e-a8bf-27c12dca35bd", + "metadata": {}, + "outputs": [], + "source": [ + "# imports\n", + "\n", + "import requests\n", + "from bs4 import BeautifulSoup\n", + "from IPython.display import Markdown, display" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "29ddd15d-a3c5-4f4e-a678-873f56162724", + "metadata": {}, + "outputs": [], + "source": [ + "# Constants\n", + "\n", + "OLLAMA_API = \"http://localhost:11434/api/chat\"\n", + "HEADERS = {\"Content-Type\": \"application/json\"}\n", + "MODEL = \"llama3.2\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "dac0a679-599c-441f-9bf2-ddc73d35b940", + "metadata": {}, + "outputs": [], + "source": [ + "# Create a messages list using the same format that we used for OpenAI\n", + "\n", + "messages = [\n", + " {\"role\": \"user\", \"content\": \"Describe some of the business applications of Generative AI\"}\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7bb9c624-14f0-4945-a719-8ddb64f66f47", + "metadata": {}, + "outputs": [], + "source": [ + "payload = {\n", + " \"model\": MODEL,\n", + " \"messages\": messages,\n", + " \"stream\": False # just get the results, don't stream them\n", + " }" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "479ff514-e8bd-4985-a572-2ea28bb4fa40", + "metadata": {}, + "outputs": [], + "source": [ + "# Let's just make sure the model is loaded\n", + "\n", + "!ollama pull llama3.2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "42b9f644-522d-4e05-a691-56e7658c0ea9", + "metadata": {}, + "outputs": [], + "source": [ + "# If this doesn't work for any reason, try the 2 versions in the following cells\n", + "# And double check the instructions in the 'Recap on installation of Ollama' at the top of this lab\n", + "# And if none of that works - contact me!\n", + "\n", + "response = requests.post(OLLAMA_API, json=payload, headers=HEADERS)\n", + "print(response.json()['message']['content'])" + ] + }, + { + "cell_type": "markdown", + "id": "6a021f13-d6a1-4b96-8e18-4eae49d876fe", + "metadata": {}, + "source": [ + "# Introducing the ollama package\n", + "\n", + "And now we'll do the same thing, but using the elegant ollama python package instead of a direct HTTP call.\n", + "\n", + "Under the hood, it's making the same call as above to the ollama server running at localhost:11434" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7745b9c4-57dc-4867-9180-61fa5db55eb8", + "metadata": {}, + "outputs": [], + "source": [ + "import ollama\n", + "\n", + "response = ollama.chat(model=MODEL, messages=messages)\n", + "print(response['message']['content'])" + ] + }, + { + "cell_type": "markdown", + "id": "a4704e10-f5fb-4c15-a935-f046c06fb13d", + "metadata": {}, + "source": [ + "## Alternative approach - using OpenAI python library to connect to Ollama" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "23057e00-b6fc-4678-93a9-6b31cb704bff", + "metadata": {}, + "outputs": [], + "source": [ + "# There's actually an alternative approach that some people might prefer\n", + "# You can use the OpenAI client python library to call Ollama:\n", + "\n", + "from openai import OpenAI\n", + "ollama_via_openai = OpenAI(base_url='http://localhost:11434/v1', api_key='ollama')\n", + "\n", + "response = ollama_via_openai.chat.completions.create(\n", + " model=MODEL,\n", + " messages=messages\n", + ")\n", + "\n", + "print(response.choices[0].message.content)" + ] + }, + { + "cell_type": "markdown", + "id": "9f9e22da-b891-41f6-9ac9-bd0c0a5f4f44", + "metadata": {}, + "source": [ + "## Are you confused about why that works?\n", + "\n", + "It seems strange, right? We just used OpenAI code to call Ollama?? What's going on?!\n", + "\n", + "Here's the scoop:\n", + "\n", + "The python class `OpenAI` is simply code written by OpenAI engineers that makes calls over the internet to an endpoint. \n", + "\n", + "When you call `openai.chat.completions.create()`, this python code just makes a web request to the following url: \"https://api.openai.com/v1/chat/completions\"\n", + "\n", + "Code like this is known as a \"client library\" - it's just wrapper code that runs on your machine to make web requests. The actual power of GPT is running on OpenAI's cloud behind this API, not on your computer!\n", + "\n", + "OpenAI was so popular, that lots of other AI providers provided identical web endpoints, so you could use the same approach.\n", + "\n", + "So Ollama has an endpoint running on your local box at http://localhost:11434/v1/chat/completions \n", + "And in week 2 we'll discover that lots of other providers do this too, including Gemini and DeepSeek.\n", + "\n", + "And then the team at OpenAI had a great idea: they can extend their client library so you can specify a different 'base url', and use their library to call any compatible API.\n", + "\n", + "That's it!\n", + "\n", + "So when you say: `ollama_via_openai = OpenAI(base_url='http://localhost:11434/v1', api_key='ollama')` \n", + "Then this will make the same endpoint calls, but to Ollama instead of OpenAI." + ] + }, + { + "cell_type": "markdown", + "id": "bc7d1de3-e2ac-46ff-a302-3b4ba38c4c90", + "metadata": {}, + "source": [ + "## Also trying the amazing reasoning model DeepSeek\n", + "\n", + "Here we use the version of DeepSeek-reasoner that's been distilled to 1.5B. \n", + "This is actually a 1.5B variant of Qwen that has been fine-tuned using synethic data generated by Deepseek R1.\n", + "\n", + "Other sizes of DeepSeek are [here](https://ollama.com/library/deepseek-r1) all the way up to the full 671B parameter version, which would use up 404GB of your drive and is far too large for most!" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cf9eb44e-fe5b-47aa-b719-0bb63669ab3d", + "metadata": {}, + "outputs": [], + "source": [ + "!ollama pull deepseek-r1:1.5b" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1d3d554b-e00d-4c08-9300-45e073950a76", + "metadata": {}, + "outputs": [], + "source": [ + "# This may take a few minutes to run! You should then see a fascinating \"thinking\" trace inside tags, followed by some decent definitions\n", + "\n", + "response = ollama_via_openai.chat.completions.create(\n", + " model=\"deepseek-r1:1.5b\",\n", + " messages=[{\"role\": \"user\", \"content\": \"Please give definitions of some core concepts behind LLMs: a neural network, attention and the transformer\"}]\n", + ")\n", + "\n", + "print(response.choices[0].message.content)" + ] + }, + { + "cell_type": "markdown", + "id": "1622d9bb-5c68-4d4e-9ca4-b492c751f898", + "metadata": {}, + "source": [ + "# NOW the exercise for you\n", + "\n", + "Take the code from day1 and incorporate it here, to build a website summarizer that uses Llama 3.2 running locally instead of OpenAI; use either of the above approaches." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6de38216-6d1c-48c4-877b-86d403f4e0f8", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.7" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/week1/community-contributions/rwothoromo/week1/day5.ipynb b/week1/community-contributions/rwothoromo/week1/day5.ipynb new file mode 100644 index 0000000..39142ef --- /dev/null +++ b/week1/community-contributions/rwothoromo/week1/day5.ipynb @@ -0,0 +1,566 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "a98030af-fcd1-4d63-a36e-38ba053498fa", + "metadata": {}, + "source": [ + "# A full business solution\n", + "\n", + "## Now we will take our project from Day 1 to the next level\n", + "\n", + "### BUSINESS CHALLENGE:\n", + "\n", + "Create a product that builds a Brochure for a company to be used for prospective clients, investors and potential recruits.\n", + "\n", + "We will be provided a company name and their primary website.\n", + "\n", + "See the end of this notebook for examples of real-world business applications.\n", + "\n", + "And remember: I'm always available if you have problems or ideas! Please do reach out." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5b08506-dc8b-4443-9201-5f1848161363", + "metadata": {}, + "outputs": [], + "source": [ + "# imports\n", + "# If these fail, please check you're running from an 'activated' environment with (llms) in the command prompt\n", + "\n", + "import os\n", + "import requests\n", + "import json\n", + "from typing import List\n", + "from dotenv import load_dotenv\n", + "from bs4 import BeautifulSoup\n", + "from IPython.display import Markdown, display, update_display\n", + "from openai import OpenAI" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fc5d8880-f2ee-4c06-af16-ecbc0262af61", + "metadata": {}, + "outputs": [], + "source": [ + "# Initialize and constants\n", + "\n", + "load_dotenv(override=True)\n", + "api_key = os.getenv('OPENAI_API_KEY')\n", + "\n", + "if api_key and api_key.startswith('sk-proj-') and len(api_key)>10:\n", + " print(\"API key looks good so far\")\n", + "else:\n", + " print(\"There might be a problem with your API key? Please visit the troubleshooting notebook!\")\n", + " \n", + "MODEL = 'gpt-4o-mini'\n", + "openai = OpenAI()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "106dd65e-90af-4ca8-86b6-23a41840645b", + "metadata": {}, + "outputs": [], + "source": [ + "# A class to represent a Webpage\n", + "\n", + "# Some websites need you to use proper headers when fetching them:\n", + "headers = {\n", + " \"User-Agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36\"\n", + "}\n", + "\n", + "class Website:\n", + " \"\"\"\n", + " A utility class to represent a Website that we have scraped, now with links\n", + " \"\"\"\n", + "\n", + " def __init__(self, url):\n", + " self.url = url\n", + " response = requests.get(url, headers=headers)\n", + " self.body = response.content\n", + " soup = BeautifulSoup(self.body, 'html.parser')\n", + " self.title = soup.title.string if soup.title else \"No title found\"\n", + " if soup.body:\n", + " for irrelevant in soup.body([\"script\", \"style\", \"img\", \"input\"]):\n", + " irrelevant.decompose()\n", + " self.text = soup.body.get_text(separator=\"\\n\", strip=True)\n", + " else:\n", + " self.text = \"\"\n", + " links = [link.get('href') for link in soup.find_all('a')]\n", + " self.links = [link for link in links if link]\n", + "\n", + " def get_contents(self):\n", + " return f\"Webpage Title:\\n{self.title}\\nWebpage Contents:\\n{self.text}\\n\\n\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e30d8128-933b-44cc-81c8-ab4c9d86589a", + "metadata": {}, + "outputs": [], + "source": [ + "ed = Website(\"https://edwarddonner.com\")\n", + "ed.links" + ] + }, + { + "cell_type": "markdown", + "id": "1771af9c-717a-4fca-bbbe-8a95893312c3", + "metadata": {}, + "source": [ + "## First step: Have GPT-4o-mini figure out which links are relevant\n", + "\n", + "### Use a call to gpt-4o-mini to read the links on a webpage, and respond in structured JSON. \n", + "It should decide which links are relevant, and replace relative links such as \"/about\" with \"https://company.com/about\". \n", + "We will use \"one shot prompting\" in which we provide an example of how it should respond in the prompt.\n", + "\n", + "This is an excellent use case for an LLM, because it requires nuanced understanding. Imagine trying to code this without LLMs by parsing and analyzing the webpage - it would be very hard!\n", + "\n", + "Sidenote: there is a more advanced technique called \"Structured Outputs\" in which we require the model to respond according to a spec. We cover this technique in Week 8 during our autonomous Agentic AI project." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6957b079-0d96-45f7-a26a-3487510e9b35", + "metadata": {}, + "outputs": [], + "source": [ + "link_system_prompt = \"You are provided with a list of links found on a webpage. \\\n", + "You are able to decide which of the links would be most relevant to include in a brochure about the company, \\\n", + "such as links to an About page, or a Company page, or Careers/Jobs pages.\\n\"\n", + "link_system_prompt += \"You should respond in JSON as in this example:\"\n", + "link_system_prompt += \"\"\"\n", + "{\n", + " \"links\": [\n", + " {\"type\": \"about page\", \"url\": \"https://full.url/goes/here/about\"},\n", + " {\"type\": \"careers page\", \"url\": \"https://another.full.url/careers\"}\n", + " ]\n", + "}\n", + "\"\"\"\n", + "link_system_prompt += \"And this example:\"\n", + "link_system_prompt += \"\"\"\n", + "{\n", + " \"links\": [\n", + " {\"type\": \"for-you page\", \"url\": \"https://full.url/goes/here/services\"},\n", + " {\"type\": \"speak-to-a-human page\", \"url\": \"https://another.full.url/contact-us\"}\n", + " ]\n", + "}\n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b97e4068-97ed-4120-beae-c42105e4d59a", + "metadata": {}, + "outputs": [], + "source": [ + "print(link_system_prompt)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8e1f601b-2eaf-499d-b6b8-c99050c9d6b3", + "metadata": {}, + "outputs": [], + "source": [ + "def get_links_user_prompt(website):\n", + " user_prompt = f\"Here is the list of links on the website of {website.url} - \"\n", + " user_prompt += \"please decide which of these are relevant web links for a brochure about the company, respond with the full https URL in JSON format. \\\n", + "Do not include Terms of Service, Privacy, email links.\\n\"\n", + " user_prompt += \"Links (some might be relative links):\\n\"\n", + " user_prompt += \"\\n\".join(website.links)\n", + " return user_prompt" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6bcbfa78-6395-4685-b92c-22d592050fd7", + "metadata": {}, + "outputs": [], + "source": [ + "print(get_links_user_prompt(ed))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a29aca19-ca13-471c-a4b4-5abbfa813f69", + "metadata": {}, + "outputs": [], + "source": [ + "def get_links(url):\n", + " website = Website(url)\n", + " response = openai.chat.completions.create(\n", + " model=MODEL,\n", + " messages=[\n", + " {\"role\": \"system\", \"content\": link_system_prompt},\n", + " {\"role\": \"user\", \"content\": get_links_user_prompt(website)}\n", + " ],\n", + " response_format={\"type\": \"json_object\"}\n", + " )\n", + " result = response.choices[0].message.content\n", + " return json.loads(result)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "74a827a0-2782-4ae5-b210-4a242a8b4cc2", + "metadata": {}, + "outputs": [], + "source": [ + "# Anthropic has made their site harder to scrape, so I'm using HuggingFace..\n", + "\n", + "# anthropic = Website(\"https://anthropic.com\")\n", + "# anthropic.links\n", + "# get_links(\"https://anthropic.com\")\n", + "huggingface = Website(\"https://huggingface.co\")\n", + "huggingface.links" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d3d583e2-dcc4-40cc-9b28-1e8dbf402924", + "metadata": {}, + "outputs": [], + "source": [ + "get_links(\"https://huggingface.co\")" + ] + }, + { + "cell_type": "markdown", + "id": "0d74128e-dfb6-47ec-9549-288b621c838c", + "metadata": {}, + "source": [ + "## Second step: make the brochure!\n", + "\n", + "Assemble all the details into another prompt to GPT4-o" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "85a5b6e2-e7ef-44a9-bc7f-59ede71037b5", + "metadata": {}, + "outputs": [], + "source": [ + "def get_all_details(url):\n", + " result = \"Landing page:\\n\"\n", + " result += Website(url).get_contents()\n", + " links = get_links(url)\n", + " print(\"Found links:\", links)\n", + " for link in links[\"links\"]:\n", + " result += f\"\\n\\n{link['type']}\\n\"\n", + " result += Website(link[\"url\"]).get_contents()\n", + " return result" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5099bd14-076d-4745-baf3-dac08d8e5ab2", + "metadata": {}, + "outputs": [], + "source": [ + "print(get_all_details(\"https://huggingface.co\"))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9b863a55-f86c-4e3f-8a79-94e24c1a8cf2", + "metadata": {}, + "outputs": [], + "source": [ + "# system_prompt = \"You are an assistant that analyzes the contents of several relevant pages from a company website \\\n", + "# and creates a short brochure about the company for prospective customers, investors and recruits. Respond in markdown.\\\n", + "# Include details of company culture, customers and careers/jobs if you have the information.\"\n", + "\n", + "# Or uncomment the lines below for a more humorous brochure - this demonstrates how easy it is to incorporate 'tone':\n", + "\n", + "system_prompt = \"You are an assistant that analyzes the contents of several relevant pages from a company website \\\n", + "and creates a short humorous, entertaining, jokey brochure about the company for prospective customers, investors and recruits. Respond in markdown.\\\n", + "Include details of company culture, customers and careers/jobs if you have the information.\"\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6ab83d92-d36b-4ce0-8bcc-5bb4c2f8ff23", + "metadata": {}, + "outputs": [], + "source": [ + "def get_brochure_user_prompt(company_name, url):\n", + " user_prompt = f\"You are looking at a company called: {company_name}\\n\"\n", + " user_prompt += f\"Here are the contents of its landing page and other relevant pages; use this information to build a short brochure of the company in markdown.\\n\"\n", + " user_prompt += f\"Keep the details brief or concise, factoring in that they would be printed on a simple hand-out flyer.\\n\"\n", + " user_prompt += get_all_details(url)\n", + " user_prompt = user_prompt[:5_000] # Truncate if more than 5,000 characters\n", + " return user_prompt" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cd909e0b-1312-4ce2-a553-821e795d7572", + "metadata": {}, + "outputs": [], + "source": [ + "get_brochure_user_prompt(\"HuggingFace\", \"https://huggingface.co\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e44de579-4a1a-4e6a-a510-20ea3e4b8d46", + "metadata": {}, + "outputs": [], + "source": [ + "def create_brochure(company_name, url):\n", + " response = openai.chat.completions.create(\n", + " model=MODEL,\n", + " messages=[\n", + " {\"role\": \"system\", \"content\": system_prompt},\n", + " {\"role\": \"user\", \"content\": get_brochure_user_prompt(company_name, url)}\n", + " ],\n", + " )\n", + " result = response.choices[0].message.content\n", + " # display(Markdown(result))\n", + " # print(result)\n", + " return result" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0029e063-0c07-4712-82d9-536ec3579e80", + "metadata": {}, + "outputs": [], + "source": [ + "def translate_brochure(brochure, language):\n", + " system_prompt_for_language = \"You're an expert in \" + language + \". Translate the brochure!\"\n", + " response = openai.chat.completions.create(\n", + " model=MODEL,\n", + " messages=[\n", + " {\"role\": \"system\", \"content\": system_prompt_for_language},\n", + " {\"role\": \"user\", \"content\": brochure}\n", + " ],\n", + " )\n", + " result = response.choices[0].message.content\n", + " display(Markdown(result))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e093444a-9407-42ae-924a-145730591a39", + "metadata": {}, + "outputs": [], + "source": [ + "create_brochure(\"HuggingFace\", \"https://huggingface.co\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f8371bf5-c4c0-4e52-9a2a-066d994b0510", + "metadata": {}, + "outputs": [], + "source": [ + "brochure = create_brochure(\"Paint and Sip Uganda\", \"https://paintandsipuganda.com/\")\n", + "# translate_brochure(brochure, \"Spanish\")\n", + "translate_brochure(brochure, \"Swahili\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "34e03db6-61d0-4fc5-bf66-4f679b9befde", + "metadata": {}, + "outputs": [], + "source": [ + "create_brochure(\"Wabeh\", \"https://wabeh.com/\")" + ] + }, + { + "cell_type": "markdown", + "id": "61eaaab7-0b47-4b29-82d4-75d474ad8d18", + "metadata": {}, + "source": [ + "## Finally - a minor improvement\n", + "\n", + "With a small adjustment, we can change this so that the results stream back from OpenAI,\n", + "with the familiar typewriter animation" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51db0e49-f261-4137-aabe-92dd601f7725", + "metadata": {}, + "outputs": [], + "source": [ + "def stream_brochure(company_name, url):\n", + " stream = openai.chat.completions.create(\n", + " model=MODEL,\n", + " messages=[\n", + " {\"role\": \"system\", \"content\": system_prompt},\n", + " {\"role\": \"user\", \"content\": get_brochure_user_prompt(company_name, url)}\n", + " ],\n", + " stream=True\n", + " )\n", + " \n", + " response = \"\"\n", + " display_handle = display(Markdown(\"\"), display_id=True)\n", + " for chunk in stream:\n", + " response += chunk.choices[0].delta.content or ''\n", + " response = response.replace(\"```\",\"\").replace(\"markdown\", \"\")\n", + " update_display(Markdown(response), display_id=display_handle.display_id)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "56bf0ae3-ee9d-4a72-9cd6-edcac67ceb6d", + "metadata": {}, + "outputs": [], + "source": [ + "stream_brochure(\"HuggingFace\", \"https://huggingface.co\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fdb3f8d8-a3eb-41c8-b1aa-9f60686a653b", + "metadata": {}, + "outputs": [], + "source": [ + "# Try changing the system prompt to the humorous version when you make the Brochure for Hugging Face:\n", + "\n", + "stream_brochure(\"HuggingFace\", \"https://huggingface.co\")" + ] + }, + { + "cell_type": "markdown", + "id": "a27bf9e0-665f-4645-b66b-9725e2a959b5", + "metadata": {}, + "source": [ + "\n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " \n", + " \n", + "

Business applications

\n", + " In this exercise we extended the Day 1 code to make multiple LLM calls, and generate a document.\n", + "\n", + "This is perhaps the first example of Agentic AI design patterns, as we combined multiple calls to LLMs. This will feature more in Week 2, and then we will return to Agentic AI in a big way in Week 8 when we build a fully autonomous Agent solution.\n", + "\n", + "Generating content in this way is one of the very most common Use Cases. As with summarization, this can be applied to any business vertical. Write marketing content, generate a product tutorial from a spec, create personalized email content, and so much more. Explore how you can apply content generation to your business, and try making yourself a proof-of-concept prototype. See what other students have done in the community-contributions folder -- so many valuable projects -- it's wild!\n", + "
" + ] + }, + { + "cell_type": "markdown", + "id": "14b2454b-8ef8-4b5c-b928-053a15e0d553", + "metadata": {}, + "source": [ + "\n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " \n", + " \n", + "

Before you move to Week 2 (which is tons of fun)

\n", + " Please see the week1 EXERCISE notebook for your challenge for the end of week 1. This will give you some essential practice working with Frontier APIs, and prepare you well for Week 2.\n", + "
" + ] + }, + { + "cell_type": "markdown", + "id": "17b64f0f-7d33-4493-985a-033d06e8db08", + "metadata": {}, + "source": [ + "\n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " \n", + " \n", + "

A reminder on 3 useful resources

\n", + " 1. The resources for the course are available here.
\n", + " 2. I'm on LinkedIn here and I love connecting with people taking the course!
\n", + " 3. I'm trying out X/Twitter and I'm at @edwarddonner and hoping people will teach me how it's done.. \n", + "
\n", + "
" + ] + }, + { + "cell_type": "markdown", + "id": "6f48e42e-fa7a-495f-a5d4-26bfc24d60b6", + "metadata": {}, + "source": [ + "\n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " \n", + " \n", + "

Finally! I have a special request for you

\n", + " \n", + " My editor tells me that it makes a MASSIVE difference when students rate this course on Udemy - it's one of the main ways that Udemy decides whether to show it to others. If you're able to take a minute to rate this, I'd be so very grateful! And regardless - always please reach out to me at ed@edwarddonner.com if I can help at any point.\n", + " \n", + "
" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b8d3e1a1-ba54-4907-97c5-30f89a24775b", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.7" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From c577b8675713ff5a89f60d88f32880496d124963 Mon Sep 17 00:00:00 2001 From: Elijah Rwothoromo Date: Tue, 5 Aug 2025 21:47:40 +0300 Subject: [PATCH 05/14] remove redundant changes --- .../rwothoromo/week1/week1 EXERCISE.ipynb | 235 ++++++++++++++++++ week1/day1.ipynb | 94 +------ week1/day5.ipynb | 71 +----- week1/week1 EXERCISE.ipynb | 147 +---------- 4 files changed, 257 insertions(+), 290 deletions(-) create mode 100644 week1/community-contributions/rwothoromo/week1/week1 EXERCISE.ipynb diff --git a/week1/community-contributions/rwothoromo/week1/week1 EXERCISE.ipynb b/week1/community-contributions/rwothoromo/week1/week1 EXERCISE.ipynb new file mode 100644 index 0000000..7e80d75 --- /dev/null +++ b/week1/community-contributions/rwothoromo/week1/week1 EXERCISE.ipynb @@ -0,0 +1,235 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "fe12c203-e6a6-452c-a655-afb8a03a4ff5", + "metadata": {}, + "source": [ + "# End of week 1 exercise\n", + "\n", + "To demonstrate your familiarity with OpenAI API, and also Ollama, build a tool that takes a technical question, \n", + "and responds with an explanation. This is a tool that you will be able to use yourself during the course!" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c1070317-3ed9-4659-abe3-828943230e03", + "metadata": {}, + "outputs": [], + "source": [ + "# imports\n", + "# Important Pull request ref: https://chatgpt.com/share/677a9cb5-c64c-8012-99e0-e06e88afd293\n", + "\n", + "import re, requests, ollama\n", + "from bs4 import BeautifulSoup\n", + "from IPython.display import Markdown, display, update_display\n", + "from openai import OpenAI" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4a456906-915a-4bfd-bb9d-57e505c5093f", + "metadata": {}, + "outputs": [], + "source": [ + "# constants\n", + "\n", + "MODEL_GPT = 'gpt-4o-mini'\n", + "MODEL_LLAMA = 'llama3.2'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a8d7923c-5f28-4c30-8556-342d7c8497c1", + "metadata": {}, + "outputs": [], + "source": [ + "# set up environment\n", + "\n", + "headers = {\n", + " \"User-Agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36\"\n", + "}\n", + "\n", + "class Website:\n", + "\n", + " def __init__(self, url):\n", + " \"\"\"\n", + " Create this Website object from the given url using the BeautifulSoup library\n", + " \"\"\"\n", + " self.url = url\n", + " response = requests.get(url, headers=headers)\n", + " soup = BeautifulSoup(response.content, 'html.parser')\n", + " self.title = soup.title.string if soup.title else \"No title found\"\n", + " for irrelevant in soup.body([\"script\", \"style\", \"img\", \"input\"]):\n", + " irrelevant.decompose()\n", + " self.text = soup.body.get_text(separator=\"\\n\", strip=True)\n", + "\n", + "openai = OpenAI()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3f0d0137-52b0-47a8-81a8-11a90a010798", + "metadata": {}, + "outputs": [], + "source": [ + "# here is the question; type over this to ask something new\n", + "\n", + "# question = \"\"\"\n", + "# Please explain what this code does and why:\n", + "# yield from {book.get(\"author\") for book in books if book.get(\"author\")}\n", + "# \"\"\"\n", + "\n", + "# question = \"\"\"\n", + "# Please explain what this code does and why:\n", + "# yield from {book.get(\"author\") for book in books if book.get(\"author\")}\n", + "# Popular dev site https://projecteuler.net/\n", + "# \"\"\"\n", + "\n", + "question = \"\"\"\n", + "How good at Software Development is Elijah Rwothoromo? \\\n", + "He has a Wordpress site https://rwothoromo.wordpress.com/. \\\n", + "He also has a LinkedIn profile https://www.linkedin.com/in/rwothoromoelaijah/. \\\n", + "What can we learn from him?\n", + "\"\"\"\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e14fd3a1-0aca-4794-a0e0-57458e111fc9", + "metadata": {}, + "outputs": [], + "source": [ + "# Process URLs in the question to improve the prompt\n", + "\n", + "# Extract all URLs from the question string using regular expressions\n", + "urls = re.findall(r'https?://[^\\s)]+', question)\n", + "# print(urls)\n", + "\n", + "if len(urls) > 0:\n", + " \n", + " # Fetch the content for each URL using the Website class\n", + " scraped_content = []\n", + " for url in urls:\n", + " print(f\"Scraping: {url}\")\n", + " try:\n", + " site = Website(url)\n", + " content = f\"Content from {url}:\\n---\\n{site.text}\\n---\\n\" # delimiter ---\n", + " scraped_content.append(content)\n", + " except Exception as e:\n", + " print(f\"Could not scrape {url}: {e}\")\n", + " scraped_content.append(f\"Could not retrieve content from {url}.\\n\")\n", + " \n", + " # Combine all the scraped text into one string\n", + " all_scraped_text = \"\\n\".join(scraped_content)\n", + " \n", + " # Update the question with the scraped content\n", + " updated_question = f\"\"\"\n", + " Based on the following information, please answer the user's original question.\n", + " \n", + " --- TEXT FROM WEBSITES ---\n", + " {all_scraped_text}\n", + " --- END TEXT FROM WEBSITES ---\n", + " \n", + " --- ORIGINAL QUESTION ---\n", + " {question}\n", + " \"\"\"\n", + "else:\n", + " updated_question = question\n", + "\n", + "# print(updated_question)\n", + "\n", + "# system prompt to be more accurate for AI to just analyze the provided text.\n", + "system_prompt = \"You are an expert assistant. \\\n", + "Analyze the user's question and the provided text from relevant websites to synthesize a comprehensive answer in markdown format.\\\n", + "Provide a short summary, ignoring text that might be navigation-related.\"\n", + "\n", + "# Create the messages list with the newly updated prompt\n", + "messages = [\n", + " {\"role\": \"system\", \"content\": system_prompt},\n", + " {\"role\": \"user\", \"content\": updated_question},\n", + "]\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "60ce7000-a4a5-4cce-a261-e75ef45063b4", + "metadata": {}, + "outputs": [], + "source": [ + "# Get gpt-4o-mini to answer, with streaming\n", + "\n", + "def get_gpt_response(question):\n", + " stream = openai.chat.completions.create(\n", + " model=MODEL_GPT,\n", + " messages=messages,\n", + " stream=True\n", + " )\n", + " \n", + " response = \"\"\n", + " display_handle = display(Markdown(\"\"), display_id=True)\n", + " for chunk in stream:\n", + " response += chunk.choices[0].delta.content or ''\n", + " response = response.replace(\"```\",\"\").replace(\"markdown\", \"\")\n", + " update_display(Markdown(response), display_id=display_handle.display_id)\n", + "\n", + "get_gpt_response(question)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8f7c8ea8-4082-4ad0-8751-3301adcf6538", + "metadata": {}, + "outputs": [], + "source": [ + "# Get Llama 3.2 to answer\n", + "\n", + "def get_llama_response(question):\n", + " response = ollama.chat(\n", + " model=MODEL_LLAMA,\n", + " messages=messages,\n", + " stream=False # just get the results, don't stream them\n", + " )\n", + " return response['message']['content']\n", + "\n", + "display(Markdown(get_llama_response(question)))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fa1e9987-7b6d-49c1-9a81-b1a92aceea72", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.7" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/week1/day1.ipynb b/week1/day1.ipynb index 414c638..f492110 100644 --- a/week1/day1.ipynb +++ b/week1/day1.ipynb @@ -497,26 +497,6 @@ "display_summary(\"https://anthropic.com\")" ] }, - { - "cell_type": "code", - "execution_count": null, - "id": "5a904323-acd9-4c8e-9a17-70df76184590", - "metadata": {}, - "outputs": [], - "source": [ - "display_summary(\"https://rwothoromo.wordpress.com/\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a66c9fe8-c26a-49dd-9bc4-9efffc638f95", - "metadata": {}, - "outputs": [], - "source": [ - "display_summary(\"https://openai.com\")" - ] - }, { "cell_type": "markdown", "id": "c951be1a-7f1b-448f-af1f-845978e47e2c", @@ -558,55 +538,23 @@ "source": [ "# Step 1: Create your prompts\n", "\n", - "system_prompt = \"You are a professional assistant\"\n", + "system_prompt = \"something here\"\n", "user_prompt = \"\"\"\n", - "Review this conversation and provide a comprehensive summary. Also, suggest how much better the converation could have gone:\n", - "\n", - "Dear Dev Contact,\n", - "\n", - "I hope this message finds you well.\n", - "I would like to share that I have proficiency in front-end design tools, particularly Figma, react and Angular. At this stage, I am keenly interested in finding opportunities to apply these skills professionally.\n", - "\n", - "If you are aware of any companies, projects, or platforms seeking enterprise in front-end design, I would be grateful for any advice or recommendations you might kindly provide.\n", - "\n", - "Thank you very much for your time and consideration.\n", - "\n", - "Hello Job Seeker,\n", - "\n", - "I hope you are doing well.\n", - "\n", - "Dev Contact: The last role (3 months gig) I saw was looking for a junior PHP Developer. Does your CV include that?\n", - "\n", - "Hello Dev Contact \n", - "Thank you for your feedback.\n", - "Yes my CV has PHP as one of my skill set. Can I share it with you?\n", - "\n", - "Dev Contact: They said \"It's late. Interviews were on Monday\"\n", - "\n", - "Hello Dev Contact\n", - "\n", - "Thanks for the update. When you hear of any opportunity please let me know.\n", - "\n", - "Dev Contact: For now, check out https://refactory.academy/courses/refactory-apprenticeship/\n", + " Lots of text\n", + " Can be pasted here\n", "\"\"\"\n", "\n", "# Step 2: Make the messages list\n", "\n", - "messages = [\n", - " {\"role\": \"system\", \"content\": system_prompt},\n", - " {\"role\": \"user\", \"content\": user_prompt},\n", - "] # fill this in\n", + "messages = [] # fill this in\n", "\n", "# Step 3: Call OpenAI\n", "\n", - "response = openai.chat.completions.create(\n", - " model = \"gpt-4o-mini\",\n", - " messages = messages\n", - ")\n", + "response =\n", "\n", "# Step 4: print the result\n", "\n", - "print(response.choices[0].message.content)" + "print(" ] }, { @@ -640,34 +588,6 @@ "id": "f4484fcf-8b39-4c3f-9674-37970ed71988", "metadata": {}, "outputs": [], - "source": [ - "# To perform summaries using a model running locally\n", - "import ollama\n", - "\n", - "# OLLAMA_API = \"http://localhost:11434/api/chat\"\n", - "# HEADERS = {\"Content-Type\": \"application/json\"}\n", - "MODEL = \"llama3.2\"\n", - "\n", - "\n", - "def summarize_with_local_model(url):\n", - " website = Website(url)\n", - " messages = messages_for(website)\n", - " response = ollama.chat(\n", - " model=MODEL,\n", - " messages=messages,\n", - " stream=False # just get the results, don't stream them\n", - " )\n", - " return response['message']['content']\n", - "\n", - "display(Markdown(summarize_with_local_model(\"https://rwothoromo.wordpress.com/\")))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "e76cbf31-2a82-40b8-b2e7-e2ceae7483ed", - "metadata": {}, - "outputs": [], "source": [] } ], @@ -687,7 +607,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.7" + "version": "3.11.12" } }, "nbformat": 4, diff --git a/week1/day5.ipynb b/week1/day5.ipynb index 39142ef..5249ce8 100644 --- a/week1/day5.ipynb +++ b/week1/day5.ipynb @@ -144,15 +144,6 @@ " {\"type\": \"careers page\", \"url\": \"https://another.full.url/careers\"}\n", " ]\n", "}\n", - "\"\"\"\n", - "link_system_prompt += \"And this example:\"\n", - "link_system_prompt += \"\"\"\n", - "{\n", - " \"links\": [\n", - " {\"type\": \"for-you page\", \"url\": \"https://full.url/goes/here/services\"},\n", - " {\"type\": \"speak-to-a-human page\", \"url\": \"https://another.full.url/contact-us\"}\n", - " ]\n", - "}\n", "\"\"\"" ] }, @@ -222,9 +213,6 @@ "source": [ "# Anthropic has made their site harder to scrape, so I'm using HuggingFace..\n", "\n", - "# anthropic = Website(\"https://anthropic.com\")\n", - "# anthropic.links\n", - "# get_links(\"https://anthropic.com\")\n", "huggingface = Website(\"https://huggingface.co\")\n", "huggingface.links" ] @@ -284,15 +272,15 @@ "metadata": {}, "outputs": [], "source": [ - "# system_prompt = \"You are an assistant that analyzes the contents of several relevant pages from a company website \\\n", - "# and creates a short brochure about the company for prospective customers, investors and recruits. Respond in markdown.\\\n", - "# Include details of company culture, customers and careers/jobs if you have the information.\"\n", + "system_prompt = \"You are an assistant that analyzes the contents of several relevant pages from a company website \\\n", + "and creates a short brochure about the company for prospective customers, investors and recruits. Respond in markdown.\\\n", + "Include details of company culture, customers and careers/jobs if you have the information.\"\n", "\n", "# Or uncomment the lines below for a more humorous brochure - this demonstrates how easy it is to incorporate 'tone':\n", "\n", - "system_prompt = \"You are an assistant that analyzes the contents of several relevant pages from a company website \\\n", - "and creates a short humorous, entertaining, jokey brochure about the company for prospective customers, investors and recruits. Respond in markdown.\\\n", - "Include details of company culture, customers and careers/jobs if you have the information.\"\n" + "# system_prompt = \"You are an assistant that analyzes the contents of several relevant pages from a company website \\\n", + "# and creates a short humorous, entertaining, jokey brochure about the company for prospective customers, investors and recruits. Respond in markdown.\\\n", + "# Include details of company culture, customers and careers/jobs if you have the information.\"\n" ] }, { @@ -305,7 +293,6 @@ "def get_brochure_user_prompt(company_name, url):\n", " user_prompt = f\"You are looking at a company called: {company_name}\\n\"\n", " user_prompt += f\"Here are the contents of its landing page and other relevant pages; use this information to build a short brochure of the company in markdown.\\n\"\n", - " user_prompt += f\"Keep the details brief or concise, factoring in that they would be printed on a simple hand-out flyer.\\n\"\n", " user_prompt += get_all_details(url)\n", " user_prompt = user_prompt[:5_000] # Truncate if more than 5,000 characters\n", " return user_prompt" @@ -337,28 +324,6 @@ " ],\n", " )\n", " result = response.choices[0].message.content\n", - " # display(Markdown(result))\n", - " # print(result)\n", - " return result" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "0029e063-0c07-4712-82d9-536ec3579e80", - "metadata": {}, - "outputs": [], - "source": [ - "def translate_brochure(brochure, language):\n", - " system_prompt_for_language = \"You're an expert in \" + language + \". Translate the brochure!\"\n", - " response = openai.chat.completions.create(\n", - " model=MODEL,\n", - " messages=[\n", - " {\"role\": \"system\", \"content\": system_prompt_for_language},\n", - " {\"role\": \"user\", \"content\": brochure}\n", - " ],\n", - " )\n", - " result = response.choices[0].message.content\n", " display(Markdown(result))" ] }, @@ -372,28 +337,6 @@ "create_brochure(\"HuggingFace\", \"https://huggingface.co\")" ] }, - { - "cell_type": "code", - "execution_count": null, - "id": "f8371bf5-c4c0-4e52-9a2a-066d994b0510", - "metadata": {}, - "outputs": [], - "source": [ - "brochure = create_brochure(\"Paint and Sip Uganda\", \"https://paintandsipuganda.com/\")\n", - "# translate_brochure(brochure, \"Spanish\")\n", - "translate_brochure(brochure, \"Swahili\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "34e03db6-61d0-4fc5-bf66-4f679b9befde", - "metadata": {}, - "outputs": [], - "source": [ - "create_brochure(\"Wabeh\", \"https://wabeh.com/\")" - ] - }, { "cell_type": "markdown", "id": "61eaaab7-0b47-4b29-82d4-75d474ad8d18", @@ -558,7 +501,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.7" + "version": "3.11.13" } }, "nbformat": 4, diff --git a/week1/week1 EXERCISE.ipynb b/week1/week1 EXERCISE.ipynb index 7e80d75..f3486fe 100644 --- a/week1/week1 EXERCISE.ipynb +++ b/week1/week1 EXERCISE.ipynb @@ -18,13 +18,7 @@ "metadata": {}, "outputs": [], "source": [ - "# imports\n", - "# Important Pull request ref: https://chatgpt.com/share/677a9cb5-c64c-8012-99e0-e06e88afd293\n", - "\n", - "import re, requests, ollama\n", - "from bs4 import BeautifulSoup\n", - "from IPython.display import Markdown, display, update_display\n", - "from openai import OpenAI" + "# imports" ] }, { @@ -47,27 +41,7 @@ "metadata": {}, "outputs": [], "source": [ - "# set up environment\n", - "\n", - "headers = {\n", - " \"User-Agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36\"\n", - "}\n", - "\n", - "class Website:\n", - "\n", - " def __init__(self, url):\n", - " \"\"\"\n", - " Create this Website object from the given url using the BeautifulSoup library\n", - " \"\"\"\n", - " self.url = url\n", - " response = requests.get(url, headers=headers)\n", - " soup = BeautifulSoup(response.content, 'html.parser')\n", - " self.title = soup.title.string if soup.title else \"No title found\"\n", - " for irrelevant in soup.body([\"script\", \"style\", \"img\", \"input\"]):\n", - " irrelevant.decompose()\n", - " self.text = soup.body.get_text(separator=\"\\n\", strip=True)\n", - "\n", - "openai = OpenAI()\n" + "# set up environment" ] }, { @@ -79,81 +53,10 @@ "source": [ "# here is the question; type over this to ask something new\n", "\n", - "# question = \"\"\"\n", - "# Please explain what this code does and why:\n", - "# yield from {book.get(\"author\") for book in books if book.get(\"author\")}\n", - "# \"\"\"\n", - "\n", - "# question = \"\"\"\n", - "# Please explain what this code does and why:\n", - "# yield from {book.get(\"author\") for book in books if book.get(\"author\")}\n", - "# Popular dev site https://projecteuler.net/\n", - "# \"\"\"\n", - "\n", "question = \"\"\"\n", - "How good at Software Development is Elijah Rwothoromo? \\\n", - "He has a Wordpress site https://rwothoromo.wordpress.com/. \\\n", - "He also has a LinkedIn profile https://www.linkedin.com/in/rwothoromoelaijah/. \\\n", - "What can we learn from him?\n", - "\"\"\"\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "e14fd3a1-0aca-4794-a0e0-57458e111fc9", - "metadata": {}, - "outputs": [], - "source": [ - "# Process URLs in the question to improve the prompt\n", - "\n", - "# Extract all URLs from the question string using regular expressions\n", - "urls = re.findall(r'https?://[^\\s)]+', question)\n", - "# print(urls)\n", - "\n", - "if len(urls) > 0:\n", - " \n", - " # Fetch the content for each URL using the Website class\n", - " scraped_content = []\n", - " for url in urls:\n", - " print(f\"Scraping: {url}\")\n", - " try:\n", - " site = Website(url)\n", - " content = f\"Content from {url}:\\n---\\n{site.text}\\n---\\n\" # delimiter ---\n", - " scraped_content.append(content)\n", - " except Exception as e:\n", - " print(f\"Could not scrape {url}: {e}\")\n", - " scraped_content.append(f\"Could not retrieve content from {url}.\\n\")\n", - " \n", - " # Combine all the scraped text into one string\n", - " all_scraped_text = \"\\n\".join(scraped_content)\n", - " \n", - " # Update the question with the scraped content\n", - " updated_question = f\"\"\"\n", - " Based on the following information, please answer the user's original question.\n", - " \n", - " --- TEXT FROM WEBSITES ---\n", - " {all_scraped_text}\n", - " --- END TEXT FROM WEBSITES ---\n", - " \n", - " --- ORIGINAL QUESTION ---\n", - " {question}\n", - " \"\"\"\n", - "else:\n", - " updated_question = question\n", - "\n", - "# print(updated_question)\n", - "\n", - "# system prompt to be more accurate for AI to just analyze the provided text.\n", - "system_prompt = \"You are an expert assistant. \\\n", - "Analyze the user's question and the provided text from relevant websites to synthesize a comprehensive answer in markdown format.\\\n", - "Provide a short summary, ignoring text that might be navigation-related.\"\n", - "\n", - "# Create the messages list with the newly updated prompt\n", - "messages = [\n", - " {\"role\": \"system\", \"content\": system_prompt},\n", - " {\"role\": \"user\", \"content\": updated_question},\n", - "]\n" + "Please explain what this code does and why:\n", + "yield from {book.get(\"author\") for book in books if book.get(\"author\")}\n", + "\"\"\"" ] }, { @@ -163,23 +66,7 @@ "metadata": {}, "outputs": [], "source": [ - "# Get gpt-4o-mini to answer, with streaming\n", - "\n", - "def get_gpt_response(question):\n", - " stream = openai.chat.completions.create(\n", - " model=MODEL_GPT,\n", - " messages=messages,\n", - " stream=True\n", - " )\n", - " \n", - " response = \"\"\n", - " display_handle = display(Markdown(\"\"), display_id=True)\n", - " for chunk in stream:\n", - " response += chunk.choices[0].delta.content or ''\n", - " response = response.replace(\"```\",\"\").replace(\"markdown\", \"\")\n", - " update_display(Markdown(response), display_id=display_handle.display_id)\n", - "\n", - "get_gpt_response(question)" + "# Get gpt-4o-mini to answer, with streaming" ] }, { @@ -189,26 +76,8 @@ "metadata": {}, "outputs": [], "source": [ - "# Get Llama 3.2 to answer\n", - "\n", - "def get_llama_response(question):\n", - " response = ollama.chat(\n", - " model=MODEL_LLAMA,\n", - " messages=messages,\n", - " stream=False # just get the results, don't stream them\n", - " )\n", - " return response['message']['content']\n", - "\n", - "display(Markdown(get_llama_response(question)))" + "# Get Llama 3.2 to answer" ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "fa1e9987-7b6d-49c1-9a81-b1a92aceea72", - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { @@ -227,7 +96,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.7" + "version": "3.11.11" } }, "nbformat": 4, From fb4b1bf31be0b092fbbbab794b1669ae28200bef Mon Sep 17 00:00:00 2001 From: Elijah Rwothoromo Date: Tue, 5 Aug 2025 21:49:41 +0300 Subject: [PATCH 06/14] Remove more redundancies --- week1/day1.ipynb | 2 +- week1/day2 EXERCISE.ipynb | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/week1/day1.ipynb b/week1/day1.ipynb index f492110..28c951f 100644 --- a/week1/day1.ipynb +++ b/week1/day1.ipynb @@ -607,7 +607,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.12" + "version": "3.11.7" } }, "nbformat": 4, diff --git a/week1/day2 EXERCISE.ipynb b/week1/day2 EXERCISE.ipynb index cde9d4a..89a383f 100644 --- a/week1/day2 EXERCISE.ipynb +++ b/week1/day2 EXERCISE.ipynb @@ -118,7 +118,7 @@ "payload = {\n", " \"model\": MODEL,\n", " \"messages\": messages,\n", - " \"stream\": False # just get the results, don't stream them\n", + " \"stream\": False\n", " }" ] }, @@ -308,7 +308,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.7" + "version": "3.11.12" } }, "nbformat": 4, From 3c41597113e4509737b7bcaf384846c876e3d1e4 Mon Sep 17 00:00:00 2001 From: Elijah Rwothoromo Date: Tue, 5 Aug 2025 21:53:16 +0300 Subject: [PATCH 07/14] More redundancies removed --- .../01_webpage_summarizer.ipynb | 80 +++---------------- week1/day1.ipynb | 2 +- 2 files changed, 13 insertions(+), 69 deletions(-) diff --git a/week1/community-contributions/01_webpage_summarizer.ipynb b/week1/community-contributions/01_webpage_summarizer.ipynb index 8126396..f8be204 100644 --- a/week1/community-contributions/01_webpage_summarizer.ipynb +++ b/week1/community-contributions/01_webpage_summarizer.ipynb @@ -42,65 +42,17 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "id": "ebf2fa36", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Collecting selenium\n", - " Downloading selenium-4.34.2-py3-none-any.whl.metadata (7.5 kB)\n", - "Collecting webdriver-manager\n", - " Downloading webdriver_manager-4.0.2-py2.py3-none-any.whl.metadata (12 kB)\n", - "Requirement already satisfied: urllib3~=2.5.0 in /Users/elijahsmac/Desktop/code/llm/llm_engineering/llms/lib/python3.11/site-packages (from urllib3[socks]~=2.5.0->selenium) (2.5.0)\n", - "Collecting trio~=0.30.0 (from selenium)\n", - " Downloading trio-0.30.0-py3-none-any.whl.metadata (8.5 kB)\n", - "Collecting trio-websocket~=0.12.2 (from selenium)\n", - " Downloading trio_websocket-0.12.2-py3-none-any.whl.metadata (5.1 kB)\n", - "Requirement already satisfied: certifi>=2025.6.15 in /Users/elijahsmac/Desktop/code/llm/llm_engineering/llms/lib/python3.11/site-packages (from selenium) (2025.7.14)\n", - "Requirement already satisfied: typing_extensions~=4.14.0 in /Users/elijahsmac/Desktop/code/llm/llm_engineering/llms/lib/python3.11/site-packages (from selenium) (4.14.1)\n", - "Requirement already satisfied: websocket-client~=1.8.0 in /Users/elijahsmac/Desktop/code/llm/llm_engineering/llms/lib/python3.11/site-packages (from selenium) (1.8.0)\n", - "Requirement already satisfied: attrs>=23.2.0 in /Users/elijahsmac/Desktop/code/llm/llm_engineering/llms/lib/python3.11/site-packages (from trio~=0.30.0->selenium) (25.3.0)\n", - "Collecting sortedcontainers (from trio~=0.30.0->selenium)\n", - " Downloading sortedcontainers-2.4.0-py2.py3-none-any.whl.metadata (10 kB)\n", - "Requirement already satisfied: idna in /Users/elijahsmac/Desktop/code/llm/llm_engineering/llms/lib/python3.11/site-packages (from trio~=0.30.0->selenium) (3.10)\n", - "Collecting outcome (from trio~=0.30.0->selenium)\n", - " Downloading outcome-1.3.0.post0-py2.py3-none-any.whl.metadata (2.6 kB)\n", - "Requirement already satisfied: sniffio>=1.3.0 in /Users/elijahsmac/Desktop/code/llm/llm_engineering/llms/lib/python3.11/site-packages (from trio~=0.30.0->selenium) (1.3.1)\n", - "Collecting wsproto>=0.14 (from trio-websocket~=0.12.2->selenium)\n", - " Downloading wsproto-1.2.0-py3-none-any.whl.metadata (5.6 kB)\n", - "Collecting pysocks!=1.5.7,<2.0,>=1.5.6 (from urllib3[socks]~=2.5.0->selenium)\n", - " Downloading PySocks-1.7.1-py3-none-any.whl.metadata (13 kB)\n", - "Requirement already satisfied: requests in /Users/elijahsmac/Desktop/code/llm/llm_engineering/llms/lib/python3.11/site-packages (from webdriver-manager) (2.32.4)\n", - "Requirement already satisfied: python-dotenv in /Users/elijahsmac/Desktop/code/llm/llm_engineering/llms/lib/python3.11/site-packages (from webdriver-manager) (1.1.1)\n", - "Requirement already satisfied: packaging in /Users/elijahsmac/Desktop/code/llm/llm_engineering/llms/lib/python3.11/site-packages (from webdriver-manager) (25.0)\n", - "Requirement already satisfied: h11<1,>=0.9.0 in /Users/elijahsmac/Desktop/code/llm/llm_engineering/llms/lib/python3.11/site-packages (from wsproto>=0.14->trio-websocket~=0.12.2->selenium) (0.16.0)\n", - "Requirement already satisfied: charset_normalizer<4,>=2 in /Users/elijahsmac/Desktop/code/llm/llm_engineering/llms/lib/python3.11/site-packages (from requests->webdriver-manager) (3.4.2)\n", - "Downloading selenium-4.34.2-py3-none-any.whl (9.4 MB)\n", - "\u001b[2K \u001b[38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m9.4/9.4 MB\u001b[0m \u001b[31m7.0 MB/s\u001b[0m \u001b[33m0:00:01\u001b[0mm \u001b[31m7.2 MB/s\u001b[0m eta \u001b[36m0:00:01\u001b[0m\n", - "\u001b[?25hDownloading trio-0.30.0-py3-none-any.whl (499 kB)\n", - "Downloading trio_websocket-0.12.2-py3-none-any.whl (21 kB)\n", - "Downloading PySocks-1.7.1-py3-none-any.whl (16 kB)\n", - "Downloading webdriver_manager-4.0.2-py2.py3-none-any.whl (27 kB)\n", - "Downloading outcome-1.3.0.post0-py2.py3-none-any.whl (10 kB)\n", - "Downloading wsproto-1.2.0-py3-none-any.whl (24 kB)\n", - "Downloading sortedcontainers-2.4.0-py2.py3-none-any.whl (29 kB)\n", - "Installing collected packages: sortedcontainers, wsproto, pysocks, outcome, webdriver-manager, trio, trio-websocket, selenium\n", - "\u001b[2K \u001b[38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m8/8\u001b[0m [selenium]━━\u001b[0m \u001b[32m7/8\u001b[0m [selenium]-manager]\n", - "\u001b[1A\u001b[2KSuccessfully installed outcome-1.3.0.post0 pysocks-1.7.1 selenium-4.34.2 sortedcontainers-2.4.0 trio-0.30.0 trio-websocket-0.12.2 webdriver-manager-4.0.2 wsproto-1.2.0\n", - "Note: you may need to restart the kernel to use updated packages.\n" - ] - } - ], + "outputs": [], "source": [ "%pip install selenium webdriver-manager" ] }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "id": "1dcf1d9d-c540-4900-b14e-ad36a28fc822", "metadata": {}, "outputs": [], @@ -140,18 +92,10 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "id": "8598c299-05ca-492e-b085-6bcc2f7dda0d", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "✅ API key loaded successfully!\n" - ] - } - ], + "outputs": [], "source": [ "load_dotenv(override=True)\n", "api_key = os.getenv('OPENAI_API_KEY')\n", @@ -165,7 +109,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "id": "8098defb", "metadata": {}, "outputs": [], @@ -184,7 +128,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "id": "c6fe5114", "metadata": {}, "outputs": [], @@ -289,7 +233,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "id": "02e3a673-a8a1-4101-a441-3816f7ab9e4d", "metadata": {}, "outputs": [], @@ -301,7 +245,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "id": "86bb80f9-9e7c-4825-985f-9b83fe50839f", "metadata": {}, "outputs": [], @@ -315,7 +259,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "id": "89998b18-77aa-4aaf-a137-f0d078d61f75", "metadata": {}, "outputs": [], @@ -391,7 +335,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": ".venv", "language": "python", "name": "python3" }, @@ -405,7 +349,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.7" + "version": "3.12.9" } }, "nbformat": 4, diff --git a/week1/day1.ipynb b/week1/day1.ipynb index 28c951f..f492110 100644 --- a/week1/day1.ipynb +++ b/week1/day1.ipynb @@ -607,7 +607,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.7" + "version": "3.11.12" } }, "nbformat": 4, From 32683da2c891f45a231f4412641edc70e8a605fc Mon Sep 17 00:00:00 2001 From: Elijah Rwothoromo Date: Tue, 5 Aug 2025 22:02:16 +0300 Subject: [PATCH 08/14] Clean up ipynb files --- .../rwothoromo/week1/day1.ipynb | 517 ++++++------------ .../rwothoromo/week1/day5.ipynb | 89 --- .../rwothoromo/week1/week1 EXERCISE.ipynb | 9 - 3 files changed, 157 insertions(+), 458 deletions(-) diff --git a/week1/community-contributions/rwothoromo/week1/day1.ipynb b/week1/community-contributions/rwothoromo/week1/day1.ipynb index 414c638..37302de 100644 --- a/week1/community-contributions/rwothoromo/week1/day1.ipynb +++ b/week1/community-contributions/rwothoromo/week1/day1.ipynb @@ -5,107 +5,9 @@ "id": "d15d8294-3328-4e07-ad16-8a03e9bbfdb9", "metadata": {}, "source": [ - "# YOUR FIRST LAB\n", - "### Please read this section. This is valuable to get you prepared, even if it's a long read -- it's important stuff.\n", + "# How-To\n", "\n", - "## Your first Frontier LLM Project\n", - "\n", - "Let's build a useful LLM solution - in a matter of minutes.\n", - "\n", - "By the end of this course, you will have built an autonomous Agentic AI solution with 7 agents that collaborate to solve a business problem. All in good time! We will start with something smaller...\n", - "\n", - "Our goal is to code a new kind of Web Browser. Give it a URL, and it will respond with a summary. The Reader's Digest of the internet!!\n", - "\n", - "Before starting, you should have completed the setup for [PC](../SETUP-PC.md) or [Mac](../SETUP-mac.md) and you hopefully launched this jupyter lab from within the project root directory, with your environment activated.\n", - "\n", - "## If you're new to Jupyter Lab\n", - "\n", - "Welcome to the wonderful world of Data Science experimentation! Once you've used Jupyter Lab, you'll wonder how you ever lived without it. Simply click in each \"cell\" with code in it, such as the cell immediately below this text, and hit Shift+Return to execute that cell. As you wish, you can add a cell with the + button in the toolbar, and print values of variables, or try out variations. \n", - "\n", - "I've written a notebook called [Guide to Jupyter](Guide%20to%20Jupyter.ipynb) to help you get more familiar with Jupyter Labs, including adding Markdown comments, using `!` to run shell commands, and `tqdm` to show progress.\n", - "\n", - "## If you're new to the Command Line\n", - "\n", - "Please see these excellent guides: [Command line on PC](https://chatgpt.com/share/67b0acea-ba38-8012-9c34-7a2541052665) and [Command line on Mac](https://chatgpt.com/canvas/shared/67b0b10c93a081918210723867525d2b). \n", - "\n", - "## If you'd prefer to work in IDEs\n", - "\n", - "If you're more comfortable in IDEs like VSCode, Cursor or PyCharm, they both work great with these lab notebooks too. \n", - "If you'd prefer to work in VSCode, [here](https://chatgpt.com/share/676f2e19-c228-8012-9911-6ca42f8ed766) are instructions from an AI friend on how to configure it for the course.\n", - "\n", - "## If you'd like to brush up your Python\n", - "\n", - "I've added a notebook called [Intermediate Python](Intermediate%20Python.ipynb) to get you up to speed. But you should give it a miss if you already have a good idea what this code does: \n", - "`yield from {book.get(\"author\") for book in books if book.get(\"author\")}`\n", - "\n", - "## I am here to help\n", - "\n", - "If you have any problems at all, please do reach out. \n", - "I'm available through the platform, or at ed@edwarddonner.com, or at https://www.linkedin.com/in/eddonner/ if you'd like to connect (and I love connecting!) \n", - "And this is new to me, but I'm also trying out X/Twitter at [@edwarddonner](https://x.com/edwarddonner) - if you're on X, please show me how it's done 😂 \n", - "\n", - "## More troubleshooting\n", - "\n", - "Please see the [troubleshooting](troubleshooting.ipynb) notebook in this folder to diagnose and fix common problems. At the very end of it is a diagnostics script with some useful debug info.\n", - "\n", - "## For foundational technical knowledge (eg Git, APIs, debugging) \n", - "\n", - "If you're relatively new to programming -- I've got your back! While it's ideal to have some programming experience for this course, there's only one mandatory prerequisite: plenty of patience. 😁 I've put together a set of self-study guides that cover Git and GitHub, APIs and endpoints, beginner python and more.\n", - "\n", - "This covers Git and GitHub; what they are, the difference, and how to use them: \n", - "https://github.com/ed-donner/agents/blob/main/guides/03_git_and_github.ipynb\n", - "\n", - "This covers technical foundations: \n", - "ChatGPT vs API; taking screenshots; Environment Variables; Networking basics; APIs and endpoints: \n", - "https://github.com/ed-donner/agents/blob/main/guides/04_technical_foundations.ipynb\n", - "\n", - "This covers Python for beginners, and making sure that a `NameError` never trips you up: \n", - "https://github.com/ed-donner/agents/blob/main/guides/06_python_foundations.ipynb\n", - "\n", - "This covers the essential techniques for figuring out errors: \n", - "https://github.com/ed-donner/agents/blob/main/guides/08_debugging.ipynb\n", - "\n", - "And you'll find other useful guides in the same folder in GitHub. Some information applies to my other Udemy course (eg Async Python) but most of it is very relevant for LLM engineering.\n", - "\n", - "## If this is old hat!\n", - "\n", - "If you're already comfortable with today's material, please hang in there; you can move swiftly through the first few labs - we will get much more in depth as the weeks progress. Ultimately we will fine-tune our own LLM to compete with OpenAI!\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - "
\n", - " \n", - " \n", - "

Please read - important note

\n", - " The way I collaborate with you may be different to other courses you've taken. I prefer not to type code while you watch. Rather, I execute Jupyter Labs, like this, and give you an intuition for what's going on. My suggestion is that you carefully execute this yourself, after watching the lecture. Add print statements to understand what's going on, and then come up with your own variations. If you have a Github account, use this to showcase your variations. Not only is this essential practice, but it demonstrates your skills to others, including perhaps future clients or employers...\n", - "
\n", - "\n", - " \n", - " \n", - " \n", - " \n", - "
\n", - " \n", - " \n", - "

This code is a live resource - keep an eye out for my emails

\n", - " I push updates to the code regularly. As people ask questions, I add more examples or improved commentary. As a result, you'll notice that the code below isn't identical to the videos. Everything from the videos is here; but I've also added better explanations and new models like DeepSeek. Consider this like an interactive book.

\n", - " I try to send emails regularly with important updates related to the course. You can find this in the 'Announcements' section of Udemy in the left sidebar. You can also choose to receive my emails via your Notification Settings in Udemy. I'm respectful of your inbox and always try to add value with my emails!\n", - "
\n", - "
\n", - "\n", - " \n", - " \n", - " \n", - " \n", - "
\n", - " \n", - " \n", - "

Business value of these exercises

\n", - " A final thought. While I've designed these notebooks to be educational, I've also tried to make them enjoyable. We'll do fun things like have LLMs tell jokes and argue with each other. But fundamentally, my goal is to teach skills you can apply in business. I'll explain business implications as we go, and it's worth keeping this in mind: as you build experience with models and techniques, think of ways you could put this into action at work today. Please do contact me if you'd like to discuss more or if you have ideas to bounce off me.\n", - "
" + "Press `Shift` + `Return` to run a Cell.\n" ] }, { @@ -117,73 +19,24 @@ "source": [ "# imports\n", "\n", - "import os\n", - "import requests\n", + "import os, requests, time\n", "from dotenv import load_dotenv\n", "from bs4 import BeautifulSoup\n", "from IPython.display import Markdown, display\n", "from openai import OpenAI\n", "\n", - "# If you get an error running this cell, then please head over to the troubleshooting notebook!" - ] - }, - { - "cell_type": "markdown", - "id": "6900b2a8-6384-4316-8aaa-5e519fca4254", - "metadata": {}, - "source": [ - "# Connecting to OpenAI (or Ollama)\n", - "\n", - "The next cell is where we load in the environment variables in your `.env` file and connect to OpenAI. \n", - "\n", - "If you'd like to use free Ollama instead, please see the README section \"Free Alternative to Paid APIs\", and if you're not sure how to do this, there's a full solution in the solutions folder (day1_with_ollama.ipynb).\n", - "\n", - "## Troubleshooting if you have problems:\n", - "\n", - "Head over to the [troubleshooting](troubleshooting.ipynb) notebook in this folder for step by step code to identify the root cause and fix it!\n", - "\n", - "If you make a change, try restarting the \"Kernel\" (the python process sitting behind this notebook) by Kernel menu >> Restart Kernel and Clear Outputs of All Cells. Then try this notebook again, starting at the top.\n", - "\n", - "Or, contact me! Message me or email ed@edwarddonner.com and we will get this to work.\n", - "\n", - "Any concerns about API costs? See my notes in the README - costs should be minimal, and you can control it at every point. You can also use Ollama as a free alternative, which we discuss during Day 2." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7b87cadb-d513-4303-baee-a37b6f938e4d", - "metadata": {}, - "outputs": [], - "source": [ "# Load environment variables in a file called .env\n", - "\n", "load_dotenv(override=True)\n", "api_key = os.getenv('OPENAI_API_KEY')\n", "\n", "# Check the key\n", - "\n", "if not api_key:\n", - " print(\"No API key was found - please head over to the troubleshooting notebook in this folder to identify & fix!\")\n", - "elif not api_key.startswith(\"sk-proj-\"):\n", - " print(\"An API key was found, but it doesn't start sk-proj-; please check you're using the right key - see troubleshooting notebook\")\n", - "elif api_key.strip() != api_key:\n", - " print(\"An API key was found, but it looks like it might have space or tab characters at the start or end - please remove them - see troubleshooting notebook\")\n", + " print(\"No API key was found\")\n", "else:\n", - " print(\"API key found and looks good so far!\")\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "019974d9-f3ad-4a8a-b5f9-0a3719aea2d3", - "metadata": {}, - "outputs": [], - "source": [ - "openai = OpenAI()\n", + " print(\"API key found and looks good so far!\")\n", "\n", - "# If this doesn't work, try Kernel menu >> Restart Kernel and Clear Outputs Of All Cells, then run the cells from the top of this notebook down.\n", - "# If it STILL doesn't work (horrors!) then please see the Troubleshooting notebook in this folder for full instructions" + "# Instantiate an OpenAI object\n", + "openai = OpenAI()" ] }, { @@ -191,7 +44,7 @@ "id": "442fc84b-0815-4f40-99ab-d9a5da6bda91", "metadata": {}, "source": [ - "# Let's make a quick call to a Frontier model to get started, as a preview!" + "# Make a test call to a Frontier model (Open AI) to get started:" ] }, { @@ -201,9 +54,7 @@ "metadata": {}, "outputs": [], "source": [ - "# To give you a preview -- calling OpenAI with these messages is this easy. Any problems, head over to the Troubleshooting notebook.\n", - "\n", - "message = \"Hello, GPT! This is my first ever message to you! Hi!\"\n", + "message = \"Hello, GPT! Holla back to this space probe!\"\n", "response = openai.chat.completions.create(model=\"gpt-4o-mini\", messages=[{\"role\":\"user\", \"content\":message}])\n", "print(response.choices[0].message.content)" ] @@ -213,7 +64,7 @@ "id": "2aa190e5-cb31-456a-96cc-db109919cd78", "metadata": {}, "source": [ - "## OK onwards with our first project" + "## Summarization project" ] }, { @@ -223,14 +74,14 @@ "metadata": {}, "outputs": [], "source": [ - "# A class to represent a Webpage\n", - "# If you're not familiar with Classes, check out the \"Intermediate Python\" notebook\n", - "\n", - "# Some websites need you to use proper headers when fetching them:\n", + "# Some websites need proper headers when fetching them:\n", "headers = {\n", " \"User-Agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36\"\n", "}\n", "\n", + "\"\"\"\n", + "A class to represent a Webpage\n", + "\"\"\"\n", "class Website:\n", "\n", " def __init__(self, url):\n", @@ -253,29 +104,9 @@ "metadata": {}, "outputs": [], "source": [ - "# Let's try one out. Change the website and add print statements to follow along.\n", - "\n", - "ed = Website(\"https://edwarddonner.com\")\n", - "print(ed.title)\n", - "print(ed.text)" - ] - }, - { - "cell_type": "markdown", - "id": "6a478a0c-2c53-48ff-869c-4d08199931e1", - "metadata": {}, - "source": [ - "## Types of prompts\n", - "\n", - "You may know this already - but if not, you will get very familiar with it!\n", - "\n", - "Models like GPT4o have been trained to receive instructions in a particular way.\n", - "\n", - "They expect to receive:\n", - "\n", - "**A system prompt** that tells them what task they are performing and what tone they should use\n", - "\n", - "**A user prompt** -- the conversation starter that they should reply to" + "# Summarize website content\n", + "website = Website(\"https://rwothoromo.wordpress.com/\")\n", + "# print(eli.title, \"\\n\", eli.text)" ] }, { @@ -285,10 +116,11 @@ "metadata": {}, "outputs": [], "source": [ - "# Define our system prompt - you can experiment with this later, changing the last sentence to 'Respond in markdown in Spanish.\"\n", + "# A system prompt tells a model like GPT4o what task they are performing and what tone they should use\n", + "# A user prompt is the conversation starter that they should reply to\n", "\n", - "system_prompt = \"You are an assistant that analyzes the contents of a website \\\n", - "and provides a short summary, ignoring text that might be navigation related. \\\n", + "system_prompt = \"You are an assistant that analyzes the contents of a given website, \\\n", + "and returns a brief summary, ignoring text that might be navigation-related. \\\n", "Respond in markdown.\"" ] }, @@ -317,26 +149,7 @@ "metadata": {}, "outputs": [], "source": [ - "print(user_prompt_for(ed))" - ] - }, - { - "cell_type": "markdown", - "id": "ea211b5f-28e1-4a86-8e52-c0b7677cadcc", - "metadata": {}, - "source": [ - "## Messages\n", - "\n", - "The API from OpenAI expects to receive messages in a particular structure.\n", - "Many of the other APIs share this structure:\n", - "\n", - "```python\n", - "[\n", - " {\"role\": \"system\", \"content\": \"system message goes here\"},\n", - " {\"role\": \"user\", \"content\": \"user message goes here\"}\n", - "]\n", - "```\n", - "To give you a preview, the next 2 cells make a rather simple call - we won't stretch the mighty GPT (yet!)" + "print(user_prompt_for(website))" ] }, { @@ -346,33 +159,15 @@ "metadata": {}, "outputs": [], "source": [ + "# The API from OpenAI expects to receive messages in a particular structure. Many of the other APIs share this structure:\n", "messages = [\n", - " {\"role\": \"system\", \"content\": \"You are a snarky assistant\"},\n", - " {\"role\": \"user\", \"content\": \"What is 2 + 2?\"}\n", - "]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "21ed95c5-7001-47de-a36d-1d6673b403ce", - "metadata": {}, - "outputs": [], - "source": [ - "# To give you a preview -- calling OpenAI with system and user messages:\n", - "\n", + " {\"role\": \"system\", \"content\": \"You are a snarky assistant\"}, # system message\n", + " {\"role\": \"user\", \"content\": \"What is 2 + 2?\"}, # user message\n", + "]\n", "response = openai.chat.completions.create(model=\"gpt-4o-mini\", messages=messages)\n", "print(response.choices[0].message.content)" ] }, - { - "cell_type": "markdown", - "id": "d06e8d78-ce4c-4b05-aa8e-17050c82bb47", - "metadata": {}, - "source": [ - "## And now let's build useful messages for GPT-4o-mini, using a function" - ] - }, { "cell_type": "code", "execution_count": null, @@ -380,33 +175,15 @@ "metadata": {}, "outputs": [], "source": [ - "# See how this function creates exactly the format above\n", + "# To build useful messages for GPT-4o-mini\n", "\n", "def messages_for(website):\n", " return [\n", " {\"role\": \"system\", \"content\": system_prompt},\n", " {\"role\": \"user\", \"content\": user_prompt_for(website)}\n", - " ]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "36478464-39ee-485c-9f3f-6a4e458dbc9c", - "metadata": {}, - "outputs": [], - "source": [ - "# Try this out, and then try for a few more websites\n", + " ]\n", "\n", - "messages_for(ed)" - ] - }, - { - "cell_type": "markdown", - "id": "16f49d46-bf55-4c3e-928f-68fc0bf715b0", - "metadata": {}, - "source": [ - "## Time to bring it together - the API for OpenAI is very simple!" + "messages_for(website)" ] }, { @@ -416,10 +193,12 @@ "metadata": {}, "outputs": [], "source": [ - "# And now: call the OpenAI API. You will get very familiar with this!\n", + "# Call the OpenAI API.\n", "\n", - "def summarize(url):\n", - " website = Website(url)\n", + "url = \"https://rwothoromo.wordpress.com/\"\n", + "website = Website(url)\n", + "\n", + "def summarize(website):\n", " response = openai.chat.completions.create(\n", " model = \"gpt-4o-mini\",\n", " messages = messages_for(website)\n", @@ -434,7 +213,7 @@ "metadata": {}, "outputs": [], "source": [ - "summarize(\"https://edwarddonner.com\")" + "summarize(website)" ] }, { @@ -446,8 +225,8 @@ "source": [ "# A function to display this nicely in the Jupyter output, using markdown\n", "\n", - "def display_summary(url):\n", - " summary = summarize(url)\n", + "summary = summarize(website)\n", + "def display_summary(summary):\n", " display(Markdown(summary))" ] }, @@ -458,43 +237,10 @@ "metadata": {}, "outputs": [], "source": [ - "display_summary(\"https://edwarddonner.com\")" - ] - }, - { - "cell_type": "markdown", - "id": "b3bcf6f4-adce-45e9-97ad-d9a5d7a3a624", - "metadata": {}, - "source": [ - "# Let's try more websites\n", - "\n", - "Note that this will only work on websites that can be scraped using this simplistic approach.\n", - "\n", - "Websites that are rendered with Javascript, like React apps, won't show up. See the community-contributions folder for a Selenium implementation that gets around this. You'll need to read up on installing Selenium (ask ChatGPT!)\n", - "\n", - "Also Websites protected with CloudFront (and similar) may give 403 errors - many thanks Andy J for pointing this out.\n", - "\n", - "But many websites will work just fine!" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "45d83403-a24c-44b5-84ac-961449b4008f", - "metadata": {}, - "outputs": [], - "source": [ - "display_summary(\"https://cnn.com\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "75e9fd40-b354-4341-991e-863ef2e59db7", - "metadata": {}, - "outputs": [], - "source": [ - "display_summary(\"https://anthropic.com\")" + "display_summary(summary)\n", + "# display_summary(summarize(Website(\"https://edwarddonner.com\")))\n", + "# display_summary(summarize(Website(\"https://cnn.com\")))\n", + "# display_summary(summarize(Website(\"https://anthropic.com\")))" ] }, { @@ -504,49 +250,127 @@ "metadata": {}, "outputs": [], "source": [ - "display_summary(\"https://rwothoromo.wordpress.com/\")" + "# Websites protected with CloudFront (and similar) or with JavaScript need a Selenium or Playwright implementation. They return 403\n", + "\n", + "# display_summary(summarize(Website(\"https://openai.com\")))" ] }, { "cell_type": "code", "execution_count": null, - "id": "a66c9fe8-c26a-49dd-9bc4-9efffc638f95", + "id": "139ad985", "metadata": {}, "outputs": [], "source": [ - "display_summary(\"https://openai.com\")" + "# To generate the above summary, use selenium\n", + "\n", + "from selenium import webdriver\n", + "from selenium.webdriver.chrome.service import Service\n", + "from selenium.webdriver.common.by import By\n", + "from selenium.webdriver.support.ui import WebDriverWait\n", + "from selenium.webdriver.support import expected_conditions as EC\n", + "\n", + "class WebsiteSelenium:\n", + " def __init__(self, url):\n", + " self.url = url\n", + " self.title = \"No title found\"\n", + " self.text = \"\"\n", + "\n", + " # Configure Chrome options (headless mode is recommended for server environments)\n", + " chrome_options = webdriver.ChromeOptions()\n", + " chrome_options.add_argument(\"--headless\") # Run Chrome in headless mode (without a UI)\n", + " chrome_options.add_argument(\"--no-sandbox\") # Required for running as root in some environments\n", + " chrome_options.add_argument(\"--disable-dev-shm-usage\") # Overcomes limited resource problems\n", + "\n", + " # Path to your WebDriver executable (e.g., chromedriver)\n", + " # Make sure to replace this with the actual path to your chromedriver\n", + " # You might need to download it from: https://chromedriver.chromium.org/downloads and place it in a drivers dir\n", + " service = Service('./drivers/chromedriver-mac-x64/chromedriver')\n", + "\n", + " driver = None\n", + " try:\n", + " driver = webdriver.Chrome(service=service, options=chrome_options)\n", + " driver.get(url)\n", + "\n", + " # Wait for the page to load and dynamic content to render\n", + " # You might need to adjust the wait condition based on the website\n", + " WebDriverWait(driver, 10).until(\n", + " EC.presence_of_element_located((By.TAG_NAME, \"body\"))\n", + " )\n", + " time.sleep(3) # Give more time for JavaScript to execute\n", + "\n", + " # Get the page source after dynamic content has loaded\n", + " soup = BeautifulSoup(driver.page_source, 'html.parser')\n", + "\n", + " self.title = soup.title.string if soup.title else \"No title found\"\n", + " for irrelevant in soup.body([\"script\", \"style\", \"img\", \"input\"]):\n", + " irrelevant.decompose()\n", + " self.text = soup.body.get_text(separator=\"\\n\", strip=True)\n", + "\n", + " except Exception as e:\n", + " print(f\"Error accessing {url} with Selenium: {e}\")\n", + " finally:\n", + " if driver:\n", + " driver.quit() # Always close the browser\n", + "\n", + "display_summary(summarize(WebsiteSelenium(\"https://openai.com\")))" ] }, { - "cell_type": "markdown", - "id": "c951be1a-7f1b-448f-af1f-845978e47e2c", + "cell_type": "code", + "execution_count": null, + "id": "130d4572", "metadata": {}, + "outputs": [], "source": [ - "\n", - " \n", - " \n", - " \n", - " \n", - "
\n", - " \n", - " \n", - "

Business applications

\n", - " In this exercise, you experienced calling the Cloud API of a Frontier Model (a leading model at the frontier of AI) for the first time. We will be using APIs like OpenAI at many stages in the course, in addition to building our own LLMs.\n", + "import asyncio\n", + "from playwright.async_api import async_playwright\n", + "import nest_asyncio\n", "\n", - "More specifically, we've applied this to Summarization - a classic Gen AI use case to make a summary. This can be applied to any business vertical - summarizing the news, summarizing financial performance, summarizing a resume in a cover letter - the applications are limitless. Consider how you could apply Summarization in your business, and try prototyping a solution.\n", - "
\n", + "# Apply nest_asyncio to allow asyncio.run in Jupyter\n", + "nest_asyncio.apply()\n", "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - "
\n", - " \n", - " \n", - "

Before you continue - now try yourself

\n", - " Use the cell below to make your own simple commercial example. Stick with the summarization use case for now. Here's an idea: write something that will take the contents of an email, and will suggest an appropriate short subject line for the email. That's the kind of feature that might be built into a commercial email tool.\n", - "
" + "class WebsitePlaywright:\n", + " def __init__(self, url):\n", + " self.url = url\n", + " self.title = \"No title found\"\n", + " self.text = \"\"\n", + " asyncio.run(self._fetch_content())\n", + "\n", + " async def _fetch_content(self):\n", + " async with async_playwright() as p:\n", + " browser = None\n", + " try:\n", + " browser = await p.chromium.launch(headless=True)\n", + " page = await browser.new_page()\n", + "\n", + " # Increase timeout for navigation and other operations\n", + " await page.goto(self.url, timeout=60000) # Wait up to 60 seconds for navigation\n", + " print(f\"Accessing {self.url} with Playwright - goto()\")\n", + "\n", + " # You might need to adjust or add more specific waits\n", + " await page.wait_for_load_state('domcontentloaded', timeout=60000) # Wait for basic HTML\n", + " # await page.wait_for_load_state('networkidle', timeout=60000) # Wait for network activity to settle\n", + " await page.wait_for_selector('div.duration-short', timeout=60000) # instead of networkidle\n", + " await page.wait_for_selector('body', timeout=60000) # Wait for the body to be present\n", + " await asyncio.sleep(5) # Give a bit more time for final rendering\n", + "\n", + " content = await page.content()\n", + " soup = BeautifulSoup(content, 'html.parser')\n", + "\n", + " self.title = soup.title.string if soup.title else \"No title found\"\n", + " for irrelevant in soup.body([\"script\", \"style\", \"img\", \"input\"]):\n", + " irrelevant.decompose()\n", + " self.text = soup.body.get_text(separator=\"\\n\", strip=True)\n", + " print(f\"Accessed {self.url} with Playwright\")\n", + "\n", + " except Exception as e:\n", + " print(f\"Error accessing {self.url} with Playwright: {e}\")\n", + " finally:\n", + " if browser:\n", + " await browser.close()\n", + "\n", + "display_summary(summarize(WebsitePlaywright(\"https://openai.com/\")))" ] }, { @@ -558,11 +382,10 @@ "source": [ "# Step 1: Create your prompts\n", "\n", - "system_prompt = \"You are a professional assistant\"\n", + "system_prompt = \"You are a professional assistant. Review this conversation and provide a comprehensive summary. Also, suggest how much better the converation could have gone:\"\n", "user_prompt = \"\"\"\n", - "Review this conversation and provide a comprehensive summary. Also, suggest how much better the converation could have gone:\n", "\n", - "Dear Dev Contact,\n", + "Dear Email Contact,\n", "\n", "I hope this message finds you well.\n", "I would like to share that I have proficiency in front-end design tools, particularly Figma, react and Angular. At this stage, I am keenly interested in finding opportunities to apply these skills professionally.\n", @@ -575,19 +398,19 @@ "\n", "I hope you are doing well.\n", "\n", - "Dev Contact: The last role (3 months gig) I saw was looking for a junior PHP Developer. Does your CV include that?\n", + "The last role (3 months gig) I saw was looking for a junior PHP Developer. Does your CV include that?\n", "\n", - "Hello Dev Contact \n", + "Hello Email Contact,\n", "Thank you for your feedback.\n", "Yes my CV has PHP as one of my skill set. Can I share it with you?\n", "\n", - "Dev Contact: They said \"It's late. Interviews were on Monday\"\n", + "Email Contact: They said \"It's late. Interviews were on Monday\"\n", "\n", - "Hello Dev Contact\n", + "Hello Email Contact\n", "\n", "Thanks for the update. When you hear of any opportunity please let me know.\n", "\n", - "Dev Contact: For now, check out https://refactory.academy/courses/refactory-apprenticeship/\n", + "Email Contact: For now, check out https://refactory.academy/courses/refactory-apprenticeship/\n", "\"\"\"\n", "\n", "# Step 2: Make the messages list\n", @@ -595,7 +418,7 @@ "messages = [\n", " {\"role\": \"system\", \"content\": system_prompt},\n", " {\"role\": \"user\", \"content\": user_prompt},\n", - "] # fill this in\n", + "]\n", "\n", "# Step 3: Call OpenAI\n", "\n", @@ -609,35 +432,10 @@ "print(response.choices[0].message.content)" ] }, - { - "cell_type": "markdown", - "id": "36ed9f14-b349-40e9-a42c-b367e77f8bda", - "metadata": {}, - "source": [ - "## An extra exercise for those who enjoy web scraping\n", - "\n", - "You may notice that if you try `display_summary(\"https://openai.com\")` - it doesn't work! That's because OpenAI has a fancy website that uses Javascript. There are many ways around this that some of you might be familiar with. For example, Selenium is a hugely popular framework that runs a browser behind the scenes, renders the page, and allows you to query it. If you have experience with Selenium, Playwright or similar, then feel free to improve the Website class to use them. In the community-contributions folder, you'll find an example Selenium solution from a student (thank you!)" - ] - }, - { - "cell_type": "markdown", - "id": "eeab24dc-5f90-4570-b542-b0585aca3eb6", - "metadata": {}, - "source": [ - "# Sharing your code\n", - "\n", - "I'd love it if you share your code afterwards so I can share it with others! You'll notice that some students have already made changes (including a Selenium implementation) which you will find in the community-contributions folder. If you'd like add your changes to that folder, submit a Pull Request with your new versions in that folder and I'll merge your changes.\n", - "\n", - "If you're not an expert with git (and I am not!) then GPT has given some nice instructions on how to submit a Pull Request. It's a bit of an involved process, but once you've done it once it's pretty clear. As a pro-tip: it's best if you clear the outputs of your Jupyter notebooks (Edit >> Clean outputs of all cells, and then Save) for clean notebooks.\n", - "\n", - "Here are good instructions courtesy of an AI friend: \n", - "https://chatgpt.com/share/677a9cb5-c64c-8012-99e0-e06e88afd293" - ] - }, { "cell_type": "code", "execution_count": null, - "id": "f4484fcf-8b39-4c3f-9674-37970ed71988", + "id": "4b583226-9b13-4990-863a-86517a5ccfec", "metadata": {}, "outputs": [], "source": [ @@ -648,7 +446,6 @@ "# HEADERS = {\"Content-Type\": \"application/json\"}\n", "MODEL = \"llama3.2\"\n", "\n", - "\n", "def summarize_with_local_model(url):\n", " website = Website(url)\n", " messages = messages_for(website)\n", @@ -665,7 +462,7 @@ { "cell_type": "code", "execution_count": null, - "id": "e76cbf31-2a82-40b8-b2e7-e2ceae7483ed", + "id": "b8e3fe00-d98b-4c0c-b32b-3f3bd17a1546", "metadata": {}, "outputs": [], "source": [] diff --git a/week1/community-contributions/rwothoromo/week1/day5.ipynb b/week1/community-contributions/rwothoromo/week1/day5.ipynb index 39142ef..4f831bd 100644 --- a/week1/community-contributions/rwothoromo/week1/day5.ipynb +++ b/week1/community-contributions/rwothoromo/week1/day5.ipynb @@ -451,95 +451,6 @@ "\n", "stream_brochure(\"HuggingFace\", \"https://huggingface.co\")" ] - }, - { - "cell_type": "markdown", - "id": "a27bf9e0-665f-4645-b66b-9725e2a959b5", - "metadata": {}, - "source": [ - "\n", - " \n", - " \n", - " \n", - " \n", - "
\n", - " \n", - " \n", - "

Business applications

\n", - " In this exercise we extended the Day 1 code to make multiple LLM calls, and generate a document.\n", - "\n", - "This is perhaps the first example of Agentic AI design patterns, as we combined multiple calls to LLMs. This will feature more in Week 2, and then we will return to Agentic AI in a big way in Week 8 when we build a fully autonomous Agent solution.\n", - "\n", - "Generating content in this way is one of the very most common Use Cases. As with summarization, this can be applied to any business vertical. Write marketing content, generate a product tutorial from a spec, create personalized email content, and so much more. Explore how you can apply content generation to your business, and try making yourself a proof-of-concept prototype. See what other students have done in the community-contributions folder -- so many valuable projects -- it's wild!\n", - "
" - ] - }, - { - "cell_type": "markdown", - "id": "14b2454b-8ef8-4b5c-b928-053a15e0d553", - "metadata": {}, - "source": [ - "\n", - " \n", - " \n", - " \n", - " \n", - "
\n", - " \n", - " \n", - "

Before you move to Week 2 (which is tons of fun)

\n", - " Please see the week1 EXERCISE notebook for your challenge for the end of week 1. This will give you some essential practice working with Frontier APIs, and prepare you well for Week 2.\n", - "
" - ] - }, - { - "cell_type": "markdown", - "id": "17b64f0f-7d33-4493-985a-033d06e8db08", - "metadata": {}, - "source": [ - "\n", - " \n", - " \n", - " \n", - " \n", - "
\n", - " \n", - " \n", - "

A reminder on 3 useful resources

\n", - " 1. The resources for the course are available here.
\n", - " 2. I'm on LinkedIn here and I love connecting with people taking the course!
\n", - " 3. I'm trying out X/Twitter and I'm at @edwarddonner and hoping people will teach me how it's done.. \n", - "
\n", - "
" - ] - }, - { - "cell_type": "markdown", - "id": "6f48e42e-fa7a-495f-a5d4-26bfc24d60b6", - "metadata": {}, - "source": [ - "\n", - " \n", - " \n", - " \n", - " \n", - "
\n", - " \n", - " \n", - "

Finally! I have a special request for you

\n", - " \n", - " My editor tells me that it makes a MASSIVE difference when students rate this course on Udemy - it's one of the main ways that Udemy decides whether to show it to others. If you're able to take a minute to rate this, I'd be so very grateful! And regardless - always please reach out to me at ed@edwarddonner.com if I can help at any point.\n", - " \n", - "
" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b8d3e1a1-ba54-4907-97c5-30f89a24775b", - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { diff --git a/week1/community-contributions/rwothoromo/week1/week1 EXERCISE.ipynb b/week1/community-contributions/rwothoromo/week1/week1 EXERCISE.ipynb index 7e80d75..fc975e5 100644 --- a/week1/community-contributions/rwothoromo/week1/week1 EXERCISE.ipynb +++ b/week1/community-contributions/rwothoromo/week1/week1 EXERCISE.ipynb @@ -19,7 +19,6 @@ "outputs": [], "source": [ "# imports\n", - "# Important Pull request ref: https://chatgpt.com/share/677a9cb5-c64c-8012-99e0-e06e88afd293\n", "\n", "import re, requests, ollama\n", "from bs4 import BeautifulSoup\n", @@ -201,14 +200,6 @@ "\n", "display(Markdown(get_llama_response(question)))" ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "fa1e9987-7b6d-49c1-9a81-b1a92aceea72", - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { From 7c0843eea61dbcd64e0a92a247f024932054b474 Mon Sep 17 00:00:00 2001 From: Elijah Rwothoromo Date: Tue, 5 Aug 2025 22:07:45 +0300 Subject: [PATCH 09/14] Remove redundant day 2. its content is in day 1 --- .../rwothoromo/week1/day1.ipynb | 8 - .../rwothoromo/week1/day2 EXERCISE.ipynb | 316 ------------------ 2 files changed, 324 deletions(-) delete mode 100644 week1/community-contributions/rwothoromo/week1/day2 EXERCISE.ipynb diff --git a/week1/community-contributions/rwothoromo/week1/day1.ipynb b/week1/community-contributions/rwothoromo/week1/day1.ipynb index 37302de..e67c86d 100644 --- a/week1/community-contributions/rwothoromo/week1/day1.ipynb +++ b/week1/community-contributions/rwothoromo/week1/day1.ipynb @@ -458,14 +458,6 @@ "\n", "display(Markdown(summarize_with_local_model(\"https://rwothoromo.wordpress.com/\")))" ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b8e3fe00-d98b-4c0c-b32b-3f3bd17a1546", - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { diff --git a/week1/community-contributions/rwothoromo/week1/day2 EXERCISE.ipynb b/week1/community-contributions/rwothoromo/week1/day2 EXERCISE.ipynb deleted file mode 100644 index cde9d4a..0000000 --- a/week1/community-contributions/rwothoromo/week1/day2 EXERCISE.ipynb +++ /dev/null @@ -1,316 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "d15d8294-3328-4e07-ad16-8a03e9bbfdb9", - "metadata": {}, - "source": [ - "# Welcome to your first assignment!\n", - "\n", - "Instructions are below. Please give this a try, and look in the solutions folder if you get stuck (or feel free to ask me!)" - ] - }, - { - "cell_type": "markdown", - "id": "ada885d9-4d42-4d9b-97f0-74fbbbfe93a9", - "metadata": {}, - "source": [ - "\n", - " \n", - " \n", - " \n", - " \n", - "
\n", - " \n", - " \n", - "

Just before we get to the assignment --

\n", - " I thought I'd take a second to point you at this page of useful resources for the course. This includes links to all the slides.
\n", - " https://edwarddonner.com/2024/11/13/llm-engineering-resources/
\n", - " Please keep this bookmarked, and I'll continue to add more useful links there over time.\n", - "
\n", - "
" - ] - }, - { - "cell_type": "markdown", - "id": "6e9fa1fc-eac5-4d1d-9be4-541b3f2b3458", - "metadata": {}, - "source": [ - "# HOMEWORK EXERCISE ASSIGNMENT\n", - "\n", - "Upgrade the day 1 project to summarize a webpage to use an Open Source model running locally via Ollama rather than OpenAI\n", - "\n", - "You'll be able to use this technique for all subsequent projects if you'd prefer not to use paid APIs.\n", - "\n", - "**Benefits:**\n", - "1. No API charges - open-source\n", - "2. Data doesn't leave your box\n", - "\n", - "**Disadvantages:**\n", - "1. Significantly less power than Frontier Model\n", - "\n", - "## Recap on installation of Ollama\n", - "\n", - "Simply visit [ollama.com](https://ollama.com) and install!\n", - "\n", - "Once complete, the ollama server should already be running locally. \n", - "If you visit: \n", - "[http://localhost:11434/](http://localhost:11434/)\n", - "\n", - "You should see the message `Ollama is running`. \n", - "\n", - "If not, bring up a new Terminal (Mac) or Powershell (Windows) and enter `ollama serve` \n", - "And in another Terminal (Mac) or Powershell (Windows), enter `ollama pull llama3.2` \n", - "Then try [http://localhost:11434/](http://localhost:11434/) again.\n", - "\n", - "If Ollama is slow on your machine, try using `llama3.2:1b` as an alternative. Run `ollama pull llama3.2:1b` from a Terminal or Powershell, and change the code below from `MODEL = \"llama3.2\"` to `MODEL = \"llama3.2:1b\"`" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4e2a9393-7767-488e-a8bf-27c12dca35bd", - "metadata": {}, - "outputs": [], - "source": [ - "# imports\n", - "\n", - "import requests\n", - "from bs4 import BeautifulSoup\n", - "from IPython.display import Markdown, display" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "29ddd15d-a3c5-4f4e-a678-873f56162724", - "metadata": {}, - "outputs": [], - "source": [ - "# Constants\n", - "\n", - "OLLAMA_API = \"http://localhost:11434/api/chat\"\n", - "HEADERS = {\"Content-Type\": \"application/json\"}\n", - "MODEL = \"llama3.2\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "dac0a679-599c-441f-9bf2-ddc73d35b940", - "metadata": {}, - "outputs": [], - "source": [ - "# Create a messages list using the same format that we used for OpenAI\n", - "\n", - "messages = [\n", - " {\"role\": \"user\", \"content\": \"Describe some of the business applications of Generative AI\"}\n", - "]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7bb9c624-14f0-4945-a719-8ddb64f66f47", - "metadata": {}, - "outputs": [], - "source": [ - "payload = {\n", - " \"model\": MODEL,\n", - " \"messages\": messages,\n", - " \"stream\": False # just get the results, don't stream them\n", - " }" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "479ff514-e8bd-4985-a572-2ea28bb4fa40", - "metadata": {}, - "outputs": [], - "source": [ - "# Let's just make sure the model is loaded\n", - "\n", - "!ollama pull llama3.2" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "42b9f644-522d-4e05-a691-56e7658c0ea9", - "metadata": {}, - "outputs": [], - "source": [ - "# If this doesn't work for any reason, try the 2 versions in the following cells\n", - "# And double check the instructions in the 'Recap on installation of Ollama' at the top of this lab\n", - "# And if none of that works - contact me!\n", - "\n", - "response = requests.post(OLLAMA_API, json=payload, headers=HEADERS)\n", - "print(response.json()['message']['content'])" - ] - }, - { - "cell_type": "markdown", - "id": "6a021f13-d6a1-4b96-8e18-4eae49d876fe", - "metadata": {}, - "source": [ - "# Introducing the ollama package\n", - "\n", - "And now we'll do the same thing, but using the elegant ollama python package instead of a direct HTTP call.\n", - "\n", - "Under the hood, it's making the same call as above to the ollama server running at localhost:11434" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7745b9c4-57dc-4867-9180-61fa5db55eb8", - "metadata": {}, - "outputs": [], - "source": [ - "import ollama\n", - "\n", - "response = ollama.chat(model=MODEL, messages=messages)\n", - "print(response['message']['content'])" - ] - }, - { - "cell_type": "markdown", - "id": "a4704e10-f5fb-4c15-a935-f046c06fb13d", - "metadata": {}, - "source": [ - "## Alternative approach - using OpenAI python library to connect to Ollama" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "23057e00-b6fc-4678-93a9-6b31cb704bff", - "metadata": {}, - "outputs": [], - "source": [ - "# There's actually an alternative approach that some people might prefer\n", - "# You can use the OpenAI client python library to call Ollama:\n", - "\n", - "from openai import OpenAI\n", - "ollama_via_openai = OpenAI(base_url='http://localhost:11434/v1', api_key='ollama')\n", - "\n", - "response = ollama_via_openai.chat.completions.create(\n", - " model=MODEL,\n", - " messages=messages\n", - ")\n", - "\n", - "print(response.choices[0].message.content)" - ] - }, - { - "cell_type": "markdown", - "id": "9f9e22da-b891-41f6-9ac9-bd0c0a5f4f44", - "metadata": {}, - "source": [ - "## Are you confused about why that works?\n", - "\n", - "It seems strange, right? We just used OpenAI code to call Ollama?? What's going on?!\n", - "\n", - "Here's the scoop:\n", - "\n", - "The python class `OpenAI` is simply code written by OpenAI engineers that makes calls over the internet to an endpoint. \n", - "\n", - "When you call `openai.chat.completions.create()`, this python code just makes a web request to the following url: \"https://api.openai.com/v1/chat/completions\"\n", - "\n", - "Code like this is known as a \"client library\" - it's just wrapper code that runs on your machine to make web requests. The actual power of GPT is running on OpenAI's cloud behind this API, not on your computer!\n", - "\n", - "OpenAI was so popular, that lots of other AI providers provided identical web endpoints, so you could use the same approach.\n", - "\n", - "So Ollama has an endpoint running on your local box at http://localhost:11434/v1/chat/completions \n", - "And in week 2 we'll discover that lots of other providers do this too, including Gemini and DeepSeek.\n", - "\n", - "And then the team at OpenAI had a great idea: they can extend their client library so you can specify a different 'base url', and use their library to call any compatible API.\n", - "\n", - "That's it!\n", - "\n", - "So when you say: `ollama_via_openai = OpenAI(base_url='http://localhost:11434/v1', api_key='ollama')` \n", - "Then this will make the same endpoint calls, but to Ollama instead of OpenAI." - ] - }, - { - "cell_type": "markdown", - "id": "bc7d1de3-e2ac-46ff-a302-3b4ba38c4c90", - "metadata": {}, - "source": [ - "## Also trying the amazing reasoning model DeepSeek\n", - "\n", - "Here we use the version of DeepSeek-reasoner that's been distilled to 1.5B. \n", - "This is actually a 1.5B variant of Qwen that has been fine-tuned using synethic data generated by Deepseek R1.\n", - "\n", - "Other sizes of DeepSeek are [here](https://ollama.com/library/deepseek-r1) all the way up to the full 671B parameter version, which would use up 404GB of your drive and is far too large for most!" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "cf9eb44e-fe5b-47aa-b719-0bb63669ab3d", - "metadata": {}, - "outputs": [], - "source": [ - "!ollama pull deepseek-r1:1.5b" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1d3d554b-e00d-4c08-9300-45e073950a76", - "metadata": {}, - "outputs": [], - "source": [ - "# This may take a few minutes to run! You should then see a fascinating \"thinking\" trace inside tags, followed by some decent definitions\n", - "\n", - "response = ollama_via_openai.chat.completions.create(\n", - " model=\"deepseek-r1:1.5b\",\n", - " messages=[{\"role\": \"user\", \"content\": \"Please give definitions of some core concepts behind LLMs: a neural network, attention and the transformer\"}]\n", - ")\n", - "\n", - "print(response.choices[0].message.content)" - ] - }, - { - "cell_type": "markdown", - "id": "1622d9bb-5c68-4d4e-9ca4-b492c751f898", - "metadata": {}, - "source": [ - "# NOW the exercise for you\n", - "\n", - "Take the code from day1 and incorporate it here, to build a website summarizer that uses Llama 3.2 running locally instead of OpenAI; use either of the above approaches." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6de38216-6d1c-48c4-877b-86d403f4e0f8", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.7" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} From 8f003da028670203d45e3721ae9c35699ba8663a Mon Sep 17 00:00:00 2001 From: Elijah Rwothoromo Date: Tue, 5 Aug 2025 22:16:09 +0300 Subject: [PATCH 10/14] Update exercise prompts --- .../rwothoromo/week1/week1 EXERCISE.ipynb | 20 ++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/week1/community-contributions/rwothoromo/week1/week1 EXERCISE.ipynb b/week1/community-contributions/rwothoromo/week1/week1 EXERCISE.ipynb index fc975e5..673c1b6 100644 --- a/week1/community-contributions/rwothoromo/week1/week1 EXERCISE.ipynb +++ b/week1/community-contributions/rwothoromo/week1/week1 EXERCISE.ipynb @@ -13,7 +13,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 33, "id": "c1070317-3ed9-4659-abe3-828943230e03", "metadata": {}, "outputs": [], @@ -28,7 +28,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 34, "id": "4a456906-915a-4bfd-bb9d-57e505c5093f", "metadata": {}, "outputs": [], @@ -41,7 +41,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 35, "id": "a8d7923c-5f28-4c30-8556-342d7c8497c1", "metadata": {}, "outputs": [], @@ -89,6 +89,12 @@ "# Popular dev site https://projecteuler.net/\n", "# \"\"\"\n", "\n", + "# question = \"\"\"\n", + "# Who is Blessed Goodteam (https://www.linkedin.com/in/blessed-goodteam-49b3ab30a)? \\\n", + "# How relevant is her work at Paint and Sip Uganda (https://paintandsipuganda.com/). \\\n", + "# What can we learn from her?\n", + "# \"\"\"\n", + "\n", "question = \"\"\"\n", "How good at Software Development is Elijah Rwothoromo? \\\n", "He has a Wordpress site https://rwothoromo.wordpress.com/. \\\n", @@ -200,6 +206,14 @@ "\n", "display(Markdown(get_llama_response(question)))" ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "157d5bb3-bed7-4fbd-9a5d-f2a14aaac869", + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { From 1220bd28111765cb15ce6c3961fb1717d41492a6 Mon Sep 17 00:00:00 2001 From: Elijah Rwothoromo Date: Tue, 5 Aug 2025 22:21:42 +0300 Subject: [PATCH 11/14] refine files --- week1/community-contributions/rwothoromo/week1/day1.ipynb | 2 +- .../rwothoromo/week1/week1 EXERCISE.ipynb | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/week1/community-contributions/rwothoromo/week1/day1.ipynb b/week1/community-contributions/rwothoromo/week1/day1.ipynb index e67c86d..d207af3 100644 --- a/week1/community-contributions/rwothoromo/week1/day1.ipynb +++ b/week1/community-contributions/rwothoromo/week1/day1.ipynb @@ -5,7 +5,7 @@ "id": "d15d8294-3328-4e07-ad16-8a03e9bbfdb9", "metadata": {}, "source": [ - "# How-To\n", + "# How to run a cell\n", "\n", "Press `Shift` + `Return` to run a Cell.\n" ] diff --git a/week1/community-contributions/rwothoromo/week1/week1 EXERCISE.ipynb b/week1/community-contributions/rwothoromo/week1/week1 EXERCISE.ipynb index 673c1b6..9f230dd 100644 --- a/week1/community-contributions/rwothoromo/week1/week1 EXERCISE.ipynb +++ b/week1/community-contributions/rwothoromo/week1/week1 EXERCISE.ipynb @@ -13,7 +13,7 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": null, "id": "c1070317-3ed9-4659-abe3-828943230e03", "metadata": {}, "outputs": [], @@ -28,7 +28,7 @@ }, { "cell_type": "code", - "execution_count": 34, + "execution_count": null, "id": "4a456906-915a-4bfd-bb9d-57e505c5093f", "metadata": {}, "outputs": [], @@ -41,7 +41,7 @@ }, { "cell_type": "code", - "execution_count": 35, + "execution_count": null, "id": "a8d7923c-5f28-4c30-8556-342d7c8497c1", "metadata": {}, "outputs": [], From ad0d418abbe1941ef50e9eb7697400920e2f36fc Mon Sep 17 00:00:00 2001 From: Elijah Rwothoromo Date: Tue, 5 Aug 2025 22:24:22 +0300 Subject: [PATCH 12/14] Move directory --- week1/community-contributions/rwothoromo/{week1 => }/day1.ipynb | 0 week1/community-contributions/rwothoromo/{week1 => }/day5.ipynb | 0 .../rwothoromo/{week1 => }/week1 EXERCISE.ipynb | 0 3 files changed, 0 insertions(+), 0 deletions(-) rename week1/community-contributions/rwothoromo/{week1 => }/day1.ipynb (100%) rename week1/community-contributions/rwothoromo/{week1 => }/day5.ipynb (100%) rename week1/community-contributions/rwothoromo/{week1 => }/week1 EXERCISE.ipynb (100%) diff --git a/week1/community-contributions/rwothoromo/week1/day1.ipynb b/week1/community-contributions/rwothoromo/day1.ipynb similarity index 100% rename from week1/community-contributions/rwothoromo/week1/day1.ipynb rename to week1/community-contributions/rwothoromo/day1.ipynb diff --git a/week1/community-contributions/rwothoromo/week1/day5.ipynb b/week1/community-contributions/rwothoromo/day5.ipynb similarity index 100% rename from week1/community-contributions/rwothoromo/week1/day5.ipynb rename to week1/community-contributions/rwothoromo/day5.ipynb diff --git a/week1/community-contributions/rwothoromo/week1/week1 EXERCISE.ipynb b/week1/community-contributions/rwothoromo/week1 EXERCISE.ipynb similarity index 100% rename from week1/community-contributions/rwothoromo/week1/week1 EXERCISE.ipynb rename to week1/community-contributions/rwothoromo/week1 EXERCISE.ipynb From 6fb0e87d120f3bc2ef3f1b7bfced71c3110759c5 Mon Sep 17 00:00:00 2001 From: Elijah Rwothoromo Date: Thu, 7 Aug 2025 20:33:30 +0300 Subject: [PATCH 13/14] week 1 exercise clean up --- .../rwothoromo/week1 EXERCISE.ipynb | 28 +++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/week1/community-contributions/rwothoromo/week1 EXERCISE.ipynb b/week1/community-contributions/rwothoromo/week1 EXERCISE.ipynb index 9f230dd..53723b2 100644 --- a/week1/community-contributions/rwothoromo/week1 EXERCISE.ipynb +++ b/week1/community-contributions/rwothoromo/week1 EXERCISE.ipynb @@ -189,10 +189,34 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "id": "8f7c8ea8-4082-4ad0-8751-3301adcf6538", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/markdown": [ + "**Summary:**\n", + "Elijah Rwothoromo is a software developer with expertise in various areas, as evident from his online presence. While there's limited information available about his specific skills and achievements, here are some insights that can be gleaned:\n", + "\n", + "* **Web Development:** His WordPress site showcases his proficiency in web development, which suggests he has experience with HTML, CSS, JavaScript, and possibly other frameworks or libraries.\n", + "* **Programming Languages:** The use of Flask (a Python-based microframework) on his WordPress site indicates familiarity with Python programming. However, his LinkedIn profile doesn't provide specific information about the languages he uses.\n", + "* **Software Development Frameworks:** As mentioned in his blog post, Rwothoromo has experience with both Django and Flask, two popular Python web development frameworks. This suggests he's well-versed in software development principles and has a solid understanding of framework-specific concepts.\n", + "\n", + "**Additional Insights:**\n", + "\n", + "* Elijah Rwothoromo seems to be an active contributor to the tech community, sharing his experiences through blog posts and participating in online discussions.\n", + "* His LinkedIn profile is incomplete, which might indicate that he hasn't fully optimized it for professional networking or is still building his online presence.\n", + "* While there's limited information about Rwothoromo's skills, it's clear that he has a strong foundation in web development, software development frameworks, and possibly other areas like project management (as indicated by the inclusion of tools like Visual Studio Code)." + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "# Get Llama 3.2 to answer\n", "\n", From e09bdbb46938ecdb8faff3906338228e16124218 Mon Sep 17 00:00:00 2001 From: Elijah Rwothoromo Date: Thu, 7 Aug 2025 20:36:11 +0300 Subject: [PATCH 14/14] week 1 exercise clean up --- .../rwothoromo/week1 EXERCISE.ipynb | 28 ++----------------- 1 file changed, 2 insertions(+), 26 deletions(-) diff --git a/week1/community-contributions/rwothoromo/week1 EXERCISE.ipynb b/week1/community-contributions/rwothoromo/week1 EXERCISE.ipynb index 53723b2..9f230dd 100644 --- a/week1/community-contributions/rwothoromo/week1 EXERCISE.ipynb +++ b/week1/community-contributions/rwothoromo/week1 EXERCISE.ipynb @@ -189,34 +189,10 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "id": "8f7c8ea8-4082-4ad0-8751-3301adcf6538", "metadata": {}, - "outputs": [ - { - "data": { - "text/markdown": [ - "**Summary:**\n", - "Elijah Rwothoromo is a software developer with expertise in various areas, as evident from his online presence. While there's limited information available about his specific skills and achievements, here are some insights that can be gleaned:\n", - "\n", - "* **Web Development:** His WordPress site showcases his proficiency in web development, which suggests he has experience with HTML, CSS, JavaScript, and possibly other frameworks or libraries.\n", - "* **Programming Languages:** The use of Flask (a Python-based microframework) on his WordPress site indicates familiarity with Python programming. However, his LinkedIn profile doesn't provide specific information about the languages he uses.\n", - "* **Software Development Frameworks:** As mentioned in his blog post, Rwothoromo has experience with both Django and Flask, two popular Python web development frameworks. This suggests he's well-versed in software development principles and has a solid understanding of framework-specific concepts.\n", - "\n", - "**Additional Insights:**\n", - "\n", - "* Elijah Rwothoromo seems to be an active contributor to the tech community, sharing his experiences through blog posts and participating in online discussions.\n", - "* His LinkedIn profile is incomplete, which might indicate that he hasn't fully optimized it for professional networking or is still building his online presence.\n", - "* While there's limited information about Rwothoromo's skills, it's clear that he has a strong foundation in web development, software development frameworks, and possibly other areas like project management (as indicated by the inclusion of tools like Visual Studio Code)." - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "# Get Llama 3.2 to answer\n", "\n",