From 9da9692a9b7b8db12a66913e4355a20fe18c565a Mon Sep 17 00:00:00 2001 From: Elijah Rwothoromo Date: Tue, 5 Aug 2025 20:42:50 +0300 Subject: [PATCH] Week 1 exercise --- week1/Guide to Jupyter.ipynb | 155 +++++++++++--- .../01_webpage_summarizer.ipynb | 80 ++++++-- week1/day1.ipynb | 94 ++++++++- week1/day2 EXERCISE.ipynb | 4 +- week1/day5.ipynb | 71 ++++++- week1/troubleshooting.ipynb | 189 ++++++++++++++++-- week1/week1 EXERCISE.ipynb | 135 ++++++++++++- 7 files changed, 649 insertions(+), 79 deletions(-) diff --git a/week1/Guide to Jupyter.ipynb b/week1/Guide to Jupyter.ipynb index ebcc9f0..0de6a9f 100644 --- a/week1/Guide to Jupyter.ipynb +++ b/week1/Guide to Jupyter.ipynb @@ -32,10 +32,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "33d37cd8-55c9-4e03-868c-34aa9cab2c80", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "4" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Click anywhere in this cell and press Shift + Return\n", "\n", @@ -54,7 +65,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "id": "585eb9c1-85ee-4c27-8dc2-b4d8d022eda0", "metadata": {}, "outputs": [], @@ -66,10 +77,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "id": "07792faa-761d-46cb-b9b7-2bbf70bb1628", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "'bananas'" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# The result of the last statement is shown after you run it\n", "\n", @@ -78,10 +100,18 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "id": "a067d2b1-53d5-4aeb-8a3c-574d39ff654a", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "My favorite fruit is bananas\n" + ] + } + ], "source": [ "# Use the variable\n", "\n", @@ -90,7 +120,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "id": "4c5a4e60-b7f4-4953-9e80-6d84ba4664ad", "metadata": {}, "outputs": [], @@ -116,10 +146,18 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "id": "8e5ec81d-7c5b-4025-bd2e-468d67b581b6", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "My favorite fruit is anything but bananas\n" + ] + } + ], "source": [ "# Then run this cell twice, and see if you understand what's going on\n", "\n", @@ -144,10 +182,18 @@ }, { "cell_type": "code", - "execution_count": null, - "id": "84b1e410-5eda-4e2c-97ce-4eebcff816c5", + "execution_count": 7, + "id": "ce258424-40c3-49a7-9462-e6fa25014b03", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "My favorite fruit is apples\n" + ] + } + ], "source": [ "print(f\"My favorite fruit is {favorite_fruit}\")" ] @@ -165,10 +211,12 @@ { "cell_type": "code", "execution_count": null, - "id": "ce258424-40c3-49a7-9462-e6fa25014b03", + "id": "84b1e410-5eda-4e2c-97ce-4eebcff816c5", "metadata": {}, "outputs": [], - "source": [] + "source": [ + "print(f\"My favorite fruit is {favorite_fruit} - ok\")" + ] }, { "cell_type": "markdown", @@ -221,10 +269,25 @@ "Click in the cell and press the Bin icon if you want to remove it." ] }, + { + "cell_type": "markdown", + "id": "b3b2d1ff-5d2c-47a9-9c1b-90a0cfb89dd9", + "metadata": {}, + "source": [ + "# This is a heading\n", + "## This is a sub-head\n", + "### And a sub-sub-head\n", + "\n", + "I like Jupyter Lab because it's\n", + "- Easy\n", + "- Flexible\n", + "- Satisfying" + ] + }, { "cell_type": "code", "execution_count": null, - "id": "e1586320-c90f-4f22-8b39-df6865484950", + "id": "a365b651-3a34-40ed-8def-df1e6a484b43", "metadata": {}, "outputs": [], "source": [] @@ -245,10 +308,21 @@ }, { "cell_type": "code", - "execution_count": null, - "id": "82042fc5-a907-4381-a4b8-eb9386df19cd", + "execution_count": 1, + "id": "b1b303d9-ce47-4cee-85e9-6416abca7d21", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Guide to Jupyter.ipynb day2 EXERCISE.ipynb troubleshooting.ipynb\n", + "Intermediate Python.ipynb day5.ipynb week1 EXERCISE.ipynb\n", + "\u001b[34mcommunity-contributions\u001b[m\u001b[m diagnostics.py\n", + "day1.ipynb \u001b[34msolutions\u001b[m\u001b[m\n" + ] + } + ], "source": [ "# list the current directory\n", "\n", @@ -258,13 +332,13 @@ { "cell_type": "code", "execution_count": null, - "id": "4fc3e3da-8a55-40cc-9706-48bf12a0e20e", + "id": "18685382-3768-4e00-817b-cc69dd1fb531", "metadata": {}, "outputs": [], "source": [ - "# ping cnn.com - press the stop button in the toolbar when you're bored\n", + "# ping a website\n", "\n", - "!ping cnn.com" + "!ping google.com" ] }, { @@ -295,7 +369,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "id": "2646a4e5-3c23-4aee-a34d-d623815187d2", "metadata": {}, "outputs": [], @@ -313,10 +387,18 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "6e96be3d-fa82-42a3-a8aa-b81dd20563a5", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|███████████████████████████████████████| 1000/1000 [00:11<00:00, 86.00it/s]\n" + ] + } + ], "source": [ "# And now, with a nice little progress bar:\n", "\n", @@ -331,10 +413,27 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "id": "63c788dd-4618-4bb4-a5ce-204411a38ade", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/markdown": [ + "# This is a big heading!\n", + "\n", + "- And this is a bullet-point\n", + "- So is this\n", + "- Me, too!" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "# On a different topic, here's a useful way to print output in markdown\n", "\n", @@ -372,7 +471,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.11" + "version": "3.13.5" } }, "nbformat": 4, diff --git a/week1/community-contributions/01_webpage_summarizer.ipynb b/week1/community-contributions/01_webpage_summarizer.ipynb index f8be204..8126396 100644 --- a/week1/community-contributions/01_webpage_summarizer.ipynb +++ b/week1/community-contributions/01_webpage_summarizer.ipynb @@ -42,17 +42,65 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "ebf2fa36", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Collecting selenium\n", + " Downloading selenium-4.34.2-py3-none-any.whl.metadata (7.5 kB)\n", + "Collecting webdriver-manager\n", + " Downloading webdriver_manager-4.0.2-py2.py3-none-any.whl.metadata (12 kB)\n", + "Requirement already satisfied: urllib3~=2.5.0 in /Users/elijahsmac/Desktop/code/llm/llm_engineering/llms/lib/python3.11/site-packages (from urllib3[socks]~=2.5.0->selenium) (2.5.0)\n", + "Collecting trio~=0.30.0 (from selenium)\n", + " Downloading trio-0.30.0-py3-none-any.whl.metadata (8.5 kB)\n", + "Collecting trio-websocket~=0.12.2 (from selenium)\n", + " Downloading trio_websocket-0.12.2-py3-none-any.whl.metadata (5.1 kB)\n", + "Requirement already satisfied: certifi>=2025.6.15 in /Users/elijahsmac/Desktop/code/llm/llm_engineering/llms/lib/python3.11/site-packages (from selenium) (2025.7.14)\n", + "Requirement already satisfied: typing_extensions~=4.14.0 in /Users/elijahsmac/Desktop/code/llm/llm_engineering/llms/lib/python3.11/site-packages (from selenium) (4.14.1)\n", + "Requirement already satisfied: websocket-client~=1.8.0 in /Users/elijahsmac/Desktop/code/llm/llm_engineering/llms/lib/python3.11/site-packages (from selenium) (1.8.0)\n", + "Requirement already satisfied: attrs>=23.2.0 in /Users/elijahsmac/Desktop/code/llm/llm_engineering/llms/lib/python3.11/site-packages (from trio~=0.30.0->selenium) (25.3.0)\n", + "Collecting sortedcontainers (from trio~=0.30.0->selenium)\n", + " Downloading sortedcontainers-2.4.0-py2.py3-none-any.whl.metadata (10 kB)\n", + "Requirement already satisfied: idna in /Users/elijahsmac/Desktop/code/llm/llm_engineering/llms/lib/python3.11/site-packages (from trio~=0.30.0->selenium) (3.10)\n", + "Collecting outcome (from trio~=0.30.0->selenium)\n", + " Downloading outcome-1.3.0.post0-py2.py3-none-any.whl.metadata (2.6 kB)\n", + "Requirement already satisfied: sniffio>=1.3.0 in /Users/elijahsmac/Desktop/code/llm/llm_engineering/llms/lib/python3.11/site-packages (from trio~=0.30.0->selenium) (1.3.1)\n", + "Collecting wsproto>=0.14 (from trio-websocket~=0.12.2->selenium)\n", + " Downloading wsproto-1.2.0-py3-none-any.whl.metadata (5.6 kB)\n", + "Collecting pysocks!=1.5.7,<2.0,>=1.5.6 (from urllib3[socks]~=2.5.0->selenium)\n", + " Downloading PySocks-1.7.1-py3-none-any.whl.metadata (13 kB)\n", + "Requirement already satisfied: requests in /Users/elijahsmac/Desktop/code/llm/llm_engineering/llms/lib/python3.11/site-packages (from webdriver-manager) (2.32.4)\n", + "Requirement already satisfied: python-dotenv in /Users/elijahsmac/Desktop/code/llm/llm_engineering/llms/lib/python3.11/site-packages (from webdriver-manager) (1.1.1)\n", + "Requirement already satisfied: packaging in /Users/elijahsmac/Desktop/code/llm/llm_engineering/llms/lib/python3.11/site-packages (from webdriver-manager) (25.0)\n", + "Requirement already satisfied: h11<1,>=0.9.0 in /Users/elijahsmac/Desktop/code/llm/llm_engineering/llms/lib/python3.11/site-packages (from wsproto>=0.14->trio-websocket~=0.12.2->selenium) (0.16.0)\n", + "Requirement already satisfied: charset_normalizer<4,>=2 in /Users/elijahsmac/Desktop/code/llm/llm_engineering/llms/lib/python3.11/site-packages (from requests->webdriver-manager) (3.4.2)\n", + "Downloading selenium-4.34.2-py3-none-any.whl (9.4 MB)\n", + "\u001b[2K \u001b[38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m9.4/9.4 MB\u001b[0m \u001b[31m7.0 MB/s\u001b[0m \u001b[33m0:00:01\u001b[0mm \u001b[31m7.2 MB/s\u001b[0m eta \u001b[36m0:00:01\u001b[0m\n", + "\u001b[?25hDownloading trio-0.30.0-py3-none-any.whl (499 kB)\n", + "Downloading trio_websocket-0.12.2-py3-none-any.whl (21 kB)\n", + "Downloading PySocks-1.7.1-py3-none-any.whl (16 kB)\n", + "Downloading webdriver_manager-4.0.2-py2.py3-none-any.whl (27 kB)\n", + "Downloading outcome-1.3.0.post0-py2.py3-none-any.whl (10 kB)\n", + "Downloading wsproto-1.2.0-py3-none-any.whl (24 kB)\n", + "Downloading sortedcontainers-2.4.0-py2.py3-none-any.whl (29 kB)\n", + "Installing collected packages: sortedcontainers, wsproto, pysocks, outcome, webdriver-manager, trio, trio-websocket, selenium\n", + "\u001b[2K \u001b[38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m8/8\u001b[0m [selenium]━━\u001b[0m \u001b[32m7/8\u001b[0m [selenium]-manager]\n", + "\u001b[1A\u001b[2KSuccessfully installed outcome-1.3.0.post0 pysocks-1.7.1 selenium-4.34.2 sortedcontainers-2.4.0 trio-0.30.0 trio-websocket-0.12.2 webdriver-manager-4.0.2 wsproto-1.2.0\n", + "Note: you may need to restart the kernel to use updated packages.\n" + ] + } + ], "source": [ "%pip install selenium webdriver-manager" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "id": "1dcf1d9d-c540-4900-b14e-ad36a28fc822", "metadata": {}, "outputs": [], @@ -92,10 +140,18 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "id": "8598c299-05ca-492e-b085-6bcc2f7dda0d", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "✅ API key loaded successfully!\n" + ] + } + ], "source": [ "load_dotenv(override=True)\n", "api_key = os.getenv('OPENAI_API_KEY')\n", @@ -109,7 +165,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "id": "8098defb", "metadata": {}, "outputs": [], @@ -128,7 +184,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "id": "c6fe5114", "metadata": {}, "outputs": [], @@ -233,7 +289,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "id": "02e3a673-a8a1-4101-a441-3816f7ab9e4d", "metadata": {}, "outputs": [], @@ -245,7 +301,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "id": "86bb80f9-9e7c-4825-985f-9b83fe50839f", "metadata": {}, "outputs": [], @@ -259,7 +315,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "id": "89998b18-77aa-4aaf-a137-f0d078d61f75", "metadata": {}, "outputs": [], @@ -335,7 +391,7 @@ ], "metadata": { "kernelspec": { - "display_name": ".venv", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -349,7 +405,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.9" + "version": "3.11.7" } }, "nbformat": 4, diff --git a/week1/day1.ipynb b/week1/day1.ipynb index f492110..414c638 100644 --- a/week1/day1.ipynb +++ b/week1/day1.ipynb @@ -497,6 +497,26 @@ "display_summary(\"https://anthropic.com\")" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "5a904323-acd9-4c8e-9a17-70df76184590", + "metadata": {}, + "outputs": [], + "source": [ + "display_summary(\"https://rwothoromo.wordpress.com/\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a66c9fe8-c26a-49dd-9bc4-9efffc638f95", + "metadata": {}, + "outputs": [], + "source": [ + "display_summary(\"https://openai.com\")" + ] + }, { "cell_type": "markdown", "id": "c951be1a-7f1b-448f-af1f-845978e47e2c", @@ -538,23 +558,55 @@ "source": [ "# Step 1: Create your prompts\n", "\n", - "system_prompt = \"something here\"\n", + "system_prompt = \"You are a professional assistant\"\n", "user_prompt = \"\"\"\n", - " Lots of text\n", - " Can be pasted here\n", + "Review this conversation and provide a comprehensive summary. Also, suggest how much better the converation could have gone:\n", + "\n", + "Dear Dev Contact,\n", + "\n", + "I hope this message finds you well.\n", + "I would like to share that I have proficiency in front-end design tools, particularly Figma, react and Angular. At this stage, I am keenly interested in finding opportunities to apply these skills professionally.\n", + "\n", + "If you are aware of any companies, projects, or platforms seeking enterprise in front-end design, I would be grateful for any advice or recommendations you might kindly provide.\n", + "\n", + "Thank you very much for your time and consideration.\n", + "\n", + "Hello Job Seeker,\n", + "\n", + "I hope you are doing well.\n", + "\n", + "Dev Contact: The last role (3 months gig) I saw was looking for a junior PHP Developer. Does your CV include that?\n", + "\n", + "Hello Dev Contact \n", + "Thank you for your feedback.\n", + "Yes my CV has PHP as one of my skill set. Can I share it with you?\n", + "\n", + "Dev Contact: They said \"It's late. Interviews were on Monday\"\n", + "\n", + "Hello Dev Contact\n", + "\n", + "Thanks for the update. When you hear of any opportunity please let me know.\n", + "\n", + "Dev Contact: For now, check out https://refactory.academy/courses/refactory-apprenticeship/\n", "\"\"\"\n", "\n", "# Step 2: Make the messages list\n", "\n", - "messages = [] # fill this in\n", + "messages = [\n", + " {\"role\": \"system\", \"content\": system_prompt},\n", + " {\"role\": \"user\", \"content\": user_prompt},\n", + "] # fill this in\n", "\n", "# Step 3: Call OpenAI\n", "\n", - "response =\n", + "response = openai.chat.completions.create(\n", + " model = \"gpt-4o-mini\",\n", + " messages = messages\n", + ")\n", "\n", "# Step 4: print the result\n", "\n", - "print(" + "print(response.choices[0].message.content)" ] }, { @@ -588,6 +640,34 @@ "id": "f4484fcf-8b39-4c3f-9674-37970ed71988", "metadata": {}, "outputs": [], + "source": [ + "# To perform summaries using a model running locally\n", + "import ollama\n", + "\n", + "# OLLAMA_API = \"http://localhost:11434/api/chat\"\n", + "# HEADERS = {\"Content-Type\": \"application/json\"}\n", + "MODEL = \"llama3.2\"\n", + "\n", + "\n", + "def summarize_with_local_model(url):\n", + " website = Website(url)\n", + " messages = messages_for(website)\n", + " response = ollama.chat(\n", + " model=MODEL,\n", + " messages=messages,\n", + " stream=False # just get the results, don't stream them\n", + " )\n", + " return response['message']['content']\n", + "\n", + "display(Markdown(summarize_with_local_model(\"https://rwothoromo.wordpress.com/\")))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e76cbf31-2a82-40b8-b2e7-e2ceae7483ed", + "metadata": {}, + "outputs": [], "source": [] } ], @@ -607,7 +687,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.12" + "version": "3.11.7" } }, "nbformat": 4, diff --git a/week1/day2 EXERCISE.ipynb b/week1/day2 EXERCISE.ipynb index 89a383f..cde9d4a 100644 --- a/week1/day2 EXERCISE.ipynb +++ b/week1/day2 EXERCISE.ipynb @@ -118,7 +118,7 @@ "payload = {\n", " \"model\": MODEL,\n", " \"messages\": messages,\n", - " \"stream\": False\n", + " \"stream\": False # just get the results, don't stream them\n", " }" ] }, @@ -308,7 +308,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.12" + "version": "3.11.7" } }, "nbformat": 4, diff --git a/week1/day5.ipynb b/week1/day5.ipynb index 5249ce8..39142ef 100644 --- a/week1/day5.ipynb +++ b/week1/day5.ipynb @@ -144,6 +144,15 @@ " {\"type\": \"careers page\", \"url\": \"https://another.full.url/careers\"}\n", " ]\n", "}\n", + "\"\"\"\n", + "link_system_prompt += \"And this example:\"\n", + "link_system_prompt += \"\"\"\n", + "{\n", + " \"links\": [\n", + " {\"type\": \"for-you page\", \"url\": \"https://full.url/goes/here/services\"},\n", + " {\"type\": \"speak-to-a-human page\", \"url\": \"https://another.full.url/contact-us\"}\n", + " ]\n", + "}\n", "\"\"\"" ] }, @@ -213,6 +222,9 @@ "source": [ "# Anthropic has made their site harder to scrape, so I'm using HuggingFace..\n", "\n", + "# anthropic = Website(\"https://anthropic.com\")\n", + "# anthropic.links\n", + "# get_links(\"https://anthropic.com\")\n", "huggingface = Website(\"https://huggingface.co\")\n", "huggingface.links" ] @@ -272,15 +284,15 @@ "metadata": {}, "outputs": [], "source": [ - "system_prompt = \"You are an assistant that analyzes the contents of several relevant pages from a company website \\\n", - "and creates a short brochure about the company for prospective customers, investors and recruits. Respond in markdown.\\\n", - "Include details of company culture, customers and careers/jobs if you have the information.\"\n", + "# system_prompt = \"You are an assistant that analyzes the contents of several relevant pages from a company website \\\n", + "# and creates a short brochure about the company for prospective customers, investors and recruits. Respond in markdown.\\\n", + "# Include details of company culture, customers and careers/jobs if you have the information.\"\n", "\n", "# Or uncomment the lines below for a more humorous brochure - this demonstrates how easy it is to incorporate 'tone':\n", "\n", - "# system_prompt = \"You are an assistant that analyzes the contents of several relevant pages from a company website \\\n", - "# and creates a short humorous, entertaining, jokey brochure about the company for prospective customers, investors and recruits. Respond in markdown.\\\n", - "# Include details of company culture, customers and careers/jobs if you have the information.\"\n" + "system_prompt = \"You are an assistant that analyzes the contents of several relevant pages from a company website \\\n", + "and creates a short humorous, entertaining, jokey brochure about the company for prospective customers, investors and recruits. Respond in markdown.\\\n", + "Include details of company culture, customers and careers/jobs if you have the information.\"\n" ] }, { @@ -293,6 +305,7 @@ "def get_brochure_user_prompt(company_name, url):\n", " user_prompt = f\"You are looking at a company called: {company_name}\\n\"\n", " user_prompt += f\"Here are the contents of its landing page and other relevant pages; use this information to build a short brochure of the company in markdown.\\n\"\n", + " user_prompt += f\"Keep the details brief or concise, factoring in that they would be printed on a simple hand-out flyer.\\n\"\n", " user_prompt += get_all_details(url)\n", " user_prompt = user_prompt[:5_000] # Truncate if more than 5,000 characters\n", " return user_prompt" @@ -324,6 +337,28 @@ " ],\n", " )\n", " result = response.choices[0].message.content\n", + " # display(Markdown(result))\n", + " # print(result)\n", + " return result" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0029e063-0c07-4712-82d9-536ec3579e80", + "metadata": {}, + "outputs": [], + "source": [ + "def translate_brochure(brochure, language):\n", + " system_prompt_for_language = \"You're an expert in \" + language + \". Translate the brochure!\"\n", + " response = openai.chat.completions.create(\n", + " model=MODEL,\n", + " messages=[\n", + " {\"role\": \"system\", \"content\": system_prompt_for_language},\n", + " {\"role\": \"user\", \"content\": brochure}\n", + " ],\n", + " )\n", + " result = response.choices[0].message.content\n", " display(Markdown(result))" ] }, @@ -337,6 +372,28 @@ "create_brochure(\"HuggingFace\", \"https://huggingface.co\")" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "f8371bf5-c4c0-4e52-9a2a-066d994b0510", + "metadata": {}, + "outputs": [], + "source": [ + "brochure = create_brochure(\"Paint and Sip Uganda\", \"https://paintandsipuganda.com/\")\n", + "# translate_brochure(brochure, \"Spanish\")\n", + "translate_brochure(brochure, \"Swahili\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "34e03db6-61d0-4fc5-bf66-4f679b9befde", + "metadata": {}, + "outputs": [], + "source": [ + "create_brochure(\"Wabeh\", \"https://wabeh.com/\")" + ] + }, { "cell_type": "markdown", "id": "61eaaab7-0b47-4b29-82d4-75d474ad8d18", @@ -501,7 +558,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.13" + "version": "3.11.7" } }, "nbformat": 4, diff --git a/week1/troubleshooting.ipynb b/week1/troubleshooting.ipynb index 23eca6f..d8cdf8f 100644 --- a/week1/troubleshooting.ipynb +++ b/week1/troubleshooting.ipynb @@ -40,10 +40,18 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "d296f9b6-8de4-44db-b5f5-9b653dfd3d81", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Connected to the internet and can reach Google\n" + ] + } + ], "source": [ "import urllib.request\n", "\n", @@ -101,10 +109,20 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "id": "7c8c0bb3-0e94-466e-8d1a-4dfbaa014cbe", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Virtualenv is active:\n", + "Environment Path: /Users/elijahsmac/Desktop/code/llm/llm_engineering/llms\n", + "Environment Name: llms\n" + ] + } + ], "source": [ "# Some quick checks that your Conda environment or VirtualEnv is as expected\n", "# The Environment Name should be: llms\n", @@ -164,10 +182,22 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "id": "6c78b7d9-1eea-412d-8751-3de20c0f6e2f", "metadata": {}, - "outputs": [], + "outputs": [ + { + "ename": "ModuleNotFoundError", + "evalue": "No module named 'openai'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[8], line 3\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m# This import should work if your environment is active and dependencies are installed!\u001b[39;00m\n\u001b[0;32m----> 3\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mopenai\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m OpenAI\n", + "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'openai'" + ] + } + ], "source": [ "# This import should work if your environment is active and dependencies are installed!\n", "\n", @@ -201,10 +231,19 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "id": "caa4837e-b970-4f89-aa9a-8aa793c754fd", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + ".env file found.\n", + "SUCCESS! OPENAI_API_KEY found and it has the right prefix\n" + ] + } + ], "source": [ "from pathlib import Path\n", "\n", @@ -254,10 +293,18 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "id": "ab9ea6ef-49ee-4899-a1c7-75a8bd9ac36b", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "There is already a .env file - if you want me to create a new one, change the variable overwrite_if_already_exists to True above\n" + ] + } + ], "source": [ "# Only run this code in this cell if you want to have a .env file created for you!\n", "\n", @@ -371,10 +418,22 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "id": "cccb58e7-6626-4033-9dc1-e7e3ff742f6b", "metadata": {}, - "outputs": [], + "outputs": [ + { + "ename": "ModuleNotFoundError", + "evalue": "No module named 'openai'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[9], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mopenai\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m OpenAI\n\u001b[1;32m 2\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mdotenv\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m load_dotenv\n\u001b[1;32m 3\u001b[0m load_dotenv(override\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n", + "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'openai'" + ] + } + ], "source": [ "from openai import OpenAI\n", "from dotenv import load_dotenv\n", @@ -463,10 +522,110 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "id": "248204f0-7bad-482a-b715-fb06a3553916", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Starting diagnostics at 2025-08-02 23:16:47\n", + "\n", + "===== System Information =====\n", + "Operating System: Darwin\n", + "MacOS Version: 10.16\n", + "Architecture: ('64bit', 'Mach-O')\n", + "Machine: x86_64\n", + "Processor: i386\n", + "Total RAM: 16.00 GB\n", + "Available RAM: 6.04 GB\n", + "Free Disk Space: 315.94 GB\n", + "\n", + "===== File System Information =====\n", + "Current Directory: /Users/elijahsmac/Desktop/code/llm/llm_engineering/week1\n", + "Write permission: OK\n", + "\n", + "Files in Current Directory:\n", + " - .ipynb_checkpoints\n", + " - Guide to Jupyter.ipynb\n", + " - Intermediate Python.ipynb\n", + " - __pycache__\n", + " - community-contributions\n", + " - day1.ipynb\n", + " - day2 EXERCISE.ipynb\n", + " - day5.ipynb\n", + " - diagnostics.py\n", + " - report.txt\n", + " - solutions\n", + " - troubleshooting.ipynb\n", + " - week1 EXERCISE.ipynb\n", + "\n", + "===== Git Repository Information =====\n", + "Git Repository Root: /Users/elijahsmac/Desktop/code/llm/llm_engineering\n", + "Current Commit: 3a042500d7db3c0e8cde89f836a60e6cd7ab9ba1\n", + "Remote Origin: git@github.com:ed-donner/llm_engineering.git\n", + "\n", + "===== Environment File Check =====\n", + ".env file exists at: /Users/elijahsmac/Desktop/code/llm/llm_engineering/.env\n", + "OPENAI_API_KEY found in .env file\n", + "\n", + "===== Anaconda Environment Check =====\n", + "No active Anaconda environment detected\n", + "\n", + "===== Virtualenv Check =====\n", + "Virtualenv is active:\n", + "Environment Path: /Users/elijahsmac/Desktop/code/llm/llm_engineering/llms\n", + "Environment Name: llms\n", + "\n", + "Python Environment:\n", + "Python Version: 3.13.5 | packaged by Anaconda, Inc. | (main, Jun 12 2025, 11:09:21) [Clang 14.0.6 ]\n", + "Python Executable: /Users/elijahsmac/anaconda3/bin/python\n", + "\n", + "Required Package Versions:\n", + "ERROR: Required package 'openai' is not installed\n", + "python-dotenv: 1.1.0\n", + "requests: 2.32.3\n", + "ERROR: Required package 'gradio' is not installed\n", + "ERROR: Required package 'transformers' is not installed\n", + "\n", + "===== Network Connectivity Check =====\n", + "SSL Version: OpenSSL 3.0.16 11 Feb 2025\n", + "ERROR: Required packages are not installed. Please install them using 'pip install requests speedtest-cli'\n", + "\n", + "===== Environment Variables Check =====\n", + "\n", + "PYTHONPATH is not set.\n", + "\n", + "Python sys.path:\n", + " - /Users/elijahsmac/anaconda3/lib/python313.zip\n", + " - /Users/elijahsmac/anaconda3/lib/python3.13\n", + " - /Users/elijahsmac/anaconda3/lib/python3.13/lib-dynload\n", + " - \n", + " - /Users/elijahsmac/anaconda3/lib/python3.13/site-packages\n", + " - /Users/elijahsmac/anaconda3/lib/python3.13/site-packages/aeosa\n", + " - /Users/elijahsmac/anaconda3/lib/python3.13/site-packages/setuptools/_vendor\n", + "OPENAI_API_KEY is set after calling load_dotenv()\n", + "\n", + "===== Additional Diagnostics =====\n", + "Temp directory is writable: /var/folders/_1/d6xg9lvd4lb714ry78xpc10w0000gn/T\n", + "\n", + "===== Errors Found =====\n", + "The following critical issues were detected. Please address them before proceeding:\n", + "- Required package 'openai' is not installed\n", + "- Required package 'gradio' is not installed\n", + "- Required package 'transformers' is not installed\n", + "- Required packages are not installed. Please install them using 'pip install requests speedtest-cli'\n", + "\n", + "\n", + "Completed diagnostics at 2025-08-02 23:16:47\n", + "\n", + "\n", + "Please send these diagnostics to me at ed@edwarddonner.com\n", + "Either copy & paste the above output into an email, or attach the file report.txt that has been created in this directory.\n" + ] + } + ], "source": [ "# Run my diagnostics report to collect key information for debugging\n", "# Please email me the results. Either copy & paste the output, or attach the file report.txt\n", @@ -501,7 +660,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.11" + "version": "3.11.7" } }, "nbformat": 4, diff --git a/week1/week1 EXERCISE.ipynb b/week1/week1 EXERCISE.ipynb index f3486fe..2f79587 100644 --- a/week1/week1 EXERCISE.ipynb +++ b/week1/week1 EXERCISE.ipynb @@ -18,7 +18,13 @@ "metadata": {}, "outputs": [], "source": [ - "# imports" + "# imports\n", + "import os, re, requests, json, ollama\n", + "from typing import List\n", + "from dotenv import load_dotenv\n", + "from bs4 import BeautifulSoup\n", + "from IPython.display import Markdown, display, update_display\n", + "from openai import OpenAI" ] }, { @@ -41,7 +47,27 @@ "metadata": {}, "outputs": [], "source": [ - "# set up environment" + "# set up environment\n", + "\n", + "headers = {\n", + " \"User-Agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36\"\n", + "}\n", + "\n", + "class Website:\n", + "\n", + " def __init__(self, url):\n", + " \"\"\"\n", + " Create this Website object from the given url using the BeautifulSoup library\n", + " \"\"\"\n", + " self.url = url\n", + " response = requests.get(url, headers=headers)\n", + " soup = BeautifulSoup(response.content, 'html.parser')\n", + " self.title = soup.title.string if soup.title else \"No title found\"\n", + " for irrelevant in soup.body([\"script\", \"style\", \"img\", \"input\"]):\n", + " irrelevant.decompose()\n", + " self.text = soup.body.get_text(separator=\"\\n\", strip=True)\n", + "\n", + "openai = OpenAI()\n" ] }, { @@ -53,10 +79,68 @@ "source": [ "# here is the question; type over this to ask something new\n", "\n", + "# question = \"\"\"\n", + "# Please explain what this code does and why:\n", + "# yield from {book.get(\"author\") for book in books if book.get(\"author\")}\n", + "# \"\"\"\n", + "\n", "question = \"\"\"\n", - "Please explain what this code does and why:\n", - "yield from {book.get(\"author\") for book in books if book.get(\"author\")}\n", - "\"\"\"" + "How good at Software Development is Elijah Rwothoromo? \\\n", + "He has a Wordpress site https://rwothoromo.wordpress.com/. \\\n", + "He also has a LinkedIn profile https://www.linkedin.com/in/rwothoromoelaijah/. \\\n", + "What can we learn from him?\n", + "\"\"\"\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e14fd3a1-0aca-4794-a0e0-57458e111fc9", + "metadata": {}, + "outputs": [], + "source": [ + "# Process URLs in the question to improve the prompt\n", + "\n", + "# Extract all URLs from the question string using regular expressions\n", + "urls = re.findall(r'https?://[^\\s)]+', question)\n", + "\n", + "# Fetch the content for each URL using the Website class\n", + "scraped_content = []\n", + "for url in urls:\n", + " print(f\"Scraping: {url}\")\n", + " try:\n", + " site = Website(url)\n", + " content = f\"Content from {url}:\\n---\\n{site.text}\\n---\\n\" # delimiter ---\n", + " scraped_content.append(content)\n", + " except Exception as e:\n", + " print(f\"Could not scrape {url}: {e}\")\n", + " scraped_content.append(f\"Could not retrieve content from {url}.\\n\")\n", + "\n", + "# Combine all the scraped text into one string\n", + "all_scraped_text = \"\\n\".join(scraped_content)\n", + "\n", + "# Update the question with the scraped content\n", + "augmented_question = f\"\"\"\n", + "Based on the following information, please answer the user's original question.\n", + "\n", + "--- TEXT FROM WEBSITES ---\n", + "{all_scraped_text}\n", + "--- END TEXT FROM WEBSITES ---\n", + "\n", + "--- ORIGINAL QUESTION ---\n", + "{question}\n", + "\"\"\"\n", + "\n", + "# system prompt to be more accurate for AI to just analyze the provided text.\n", + "system_prompt = \"You are an expert assistant. \\\n", + "Analyze the user's question and the provided text from relevant websites to synthesize a comprehensive answer in markdown format.\\\n", + "Provides a short summary, ignoring text that might be navigation-related.\"\n", + "\n", + "# Create the messages list with the new augmented prompt\n", + "messages = [\n", + " {\"role\": \"system\", \"content\": system_prompt},\n", + " {\"role\": \"user\", \"content\": augmented_question},\n", + "]\n" ] }, { @@ -66,7 +150,25 @@ "metadata": {}, "outputs": [], "source": [ - "# Get gpt-4o-mini to answer, with streaming" + "# Get gpt-4o-mini to answer, with streaming\n", + "\n", + "def get_gpt_response(question):\n", + " # return response.choices[0].message.content\n", + "\n", + " stream = openai.chat.completions.create(\n", + " model=MODEL_GPT,\n", + " messages=messages,\n", + " stream=True\n", + " )\n", + " \n", + " response = \"\"\n", + " display_handle = display(Markdown(\"\"), display_id=True)\n", + " for chunk in stream:\n", + " response += chunk.choices[0].delta.content or ''\n", + " response = response.replace(\"```\",\"\").replace(\"markdown\", \"\")\n", + " update_display(Markdown(response), display_id=display_handle.display_id)\n", + "\n", + "get_gpt_response(question)" ] }, { @@ -76,8 +178,25 @@ "metadata": {}, "outputs": [], "source": [ - "# Get Llama 3.2 to answer" + "# Get Llama 3.2 to answer\n", + "def get_llama_response(question):\n", + " response = ollama.chat(\n", + " model=MODEL_LLAMA,\n", + " messages=messages,\n", + " stream=False # just get the results, don't stream them\n", + " )\n", + " return response['message']['content']\n", + "\n", + "display(Markdown(get_llama_response(question)))" ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fa1e9987-7b6d-49c1-9a81-b1a92aceea72", + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { @@ -96,7 +215,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.11" + "version": "3.11.7" } }, "nbformat": 4,