Week 1 exercise

This commit is contained in:
Elijah Rwothoromo
2025-08-05 20:42:50 +03:00
parent 3a042500d7
commit 9da9692a9b
7 changed files with 649 additions and 79 deletions

View File

@@ -32,10 +32,21 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 1,
"id": "33d37cd8-55c9-4e03-868c-34aa9cab2c80",
"metadata": {},
"outputs": [],
"outputs": [
{
"data": {
"text/plain": [
"4"
]
},
"execution_count": 1,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Click anywhere in this cell and press Shift + Return\n",
"\n",
@@ -54,7 +65,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 2,
"id": "585eb9c1-85ee-4c27-8dc2-b4d8d022eda0",
"metadata": {},
"outputs": [],
@@ -66,10 +77,21 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 3,
"id": "07792faa-761d-46cb-b9b7-2bbf70bb1628",
"metadata": {},
"outputs": [],
"outputs": [
{
"data": {
"text/plain": [
"'bananas'"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# The result of the last statement is shown after you run it\n",
"\n",
@@ -78,10 +100,18 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 4,
"id": "a067d2b1-53d5-4aeb-8a3c-574d39ff654a",
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"My favorite fruit is bananas\n"
]
}
],
"source": [
"# Use the variable\n",
"\n",
@@ -90,7 +120,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 5,
"id": "4c5a4e60-b7f4-4953-9e80-6d84ba4664ad",
"metadata": {},
"outputs": [],
@@ -116,10 +146,18 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 6,
"id": "8e5ec81d-7c5b-4025-bd2e-468d67b581b6",
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"My favorite fruit is anything but bananas\n"
]
}
],
"source": [
"# Then run this cell twice, and see if you understand what's going on\n",
"\n",
@@ -144,10 +182,18 @@
},
{
"cell_type": "code",
"execution_count": null,
"id": "84b1e410-5eda-4e2c-97ce-4eebcff816c5",
"execution_count": 7,
"id": "ce258424-40c3-49a7-9462-e6fa25014b03",
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"My favorite fruit is apples\n"
]
}
],
"source": [
"print(f\"My favorite fruit is {favorite_fruit}\")"
]
@@ -165,10 +211,12 @@
{
"cell_type": "code",
"execution_count": null,
"id": "ce258424-40c3-49a7-9462-e6fa25014b03",
"id": "84b1e410-5eda-4e2c-97ce-4eebcff816c5",
"metadata": {},
"outputs": [],
"source": []
"source": [
"print(f\"My favorite fruit is {favorite_fruit} - ok\")"
]
},
{
"cell_type": "markdown",
@@ -221,10 +269,25 @@
"Click in the cell and press the Bin icon if you want to remove it."
]
},
{
"cell_type": "markdown",
"id": "b3b2d1ff-5d2c-47a9-9c1b-90a0cfb89dd9",
"metadata": {},
"source": [
"# This is a heading\n",
"## This is a sub-head\n",
"### And a sub-sub-head\n",
"\n",
"I like Jupyter Lab because it's\n",
"- Easy\n",
"- Flexible\n",
"- Satisfying"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e1586320-c90f-4f22-8b39-df6865484950",
"id": "a365b651-3a34-40ed-8def-df1e6a484b43",
"metadata": {},
"outputs": [],
"source": []
@@ -245,10 +308,21 @@
},
{
"cell_type": "code",
"execution_count": null,
"id": "82042fc5-a907-4381-a4b8-eb9386df19cd",
"execution_count": 1,
"id": "b1b303d9-ce47-4cee-85e9-6416abca7d21",
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Guide to Jupyter.ipynb day2 EXERCISE.ipynb troubleshooting.ipynb\n",
"Intermediate Python.ipynb day5.ipynb week1 EXERCISE.ipynb\n",
"\u001b[34mcommunity-contributions\u001b[m\u001b[m diagnostics.py\n",
"day1.ipynb \u001b[34msolutions\u001b[m\u001b[m\n"
]
}
],
"source": [
"# list the current directory\n",
"\n",
@@ -258,13 +332,13 @@
{
"cell_type": "code",
"execution_count": null,
"id": "4fc3e3da-8a55-40cc-9706-48bf12a0e20e",
"id": "18685382-3768-4e00-817b-cc69dd1fb531",
"metadata": {},
"outputs": [],
"source": [
"# ping cnn.com - press the stop button in the toolbar when you're bored\n",
"# ping a website\n",
"\n",
"!ping cnn.com"
"!ping google.com"
]
},
{
@@ -295,7 +369,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 5,
"id": "2646a4e5-3c23-4aee-a34d-d623815187d2",
"metadata": {},
"outputs": [],
@@ -313,10 +387,18 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 1,
"id": "6e96be3d-fa82-42a3-a8aa-b81dd20563a5",
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|███████████████████████████████████████| 1000/1000 [00:11<00:00, 86.00it/s]\n"
]
}
],
"source": [
"# And now, with a nice little progress bar:\n",
"\n",
@@ -331,10 +413,27 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 7,
"id": "63c788dd-4618-4bb4-a5ce-204411a38ade",
"metadata": {},
"outputs": [],
"outputs": [
{
"data": {
"text/markdown": [
"# This is a big heading!\n",
"\n",
"- And this is a bullet-point\n",
"- So is this\n",
"- Me, too!"
],
"text/plain": [
"<IPython.core.display.Markdown object>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# On a different topic, here's a useful way to print output in markdown\n",
"\n",
@@ -372,7 +471,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.11"
"version": "3.13.5"
}
},
"nbformat": 4,

View File

@@ -42,17 +42,65 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 1,
"id": "ebf2fa36",
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Collecting selenium\n",
" Downloading selenium-4.34.2-py3-none-any.whl.metadata (7.5 kB)\n",
"Collecting webdriver-manager\n",
" Downloading webdriver_manager-4.0.2-py2.py3-none-any.whl.metadata (12 kB)\n",
"Requirement already satisfied: urllib3~=2.5.0 in /Users/elijahsmac/Desktop/code/llm/llm_engineering/llms/lib/python3.11/site-packages (from urllib3[socks]~=2.5.0->selenium) (2.5.0)\n",
"Collecting trio~=0.30.0 (from selenium)\n",
" Downloading trio-0.30.0-py3-none-any.whl.metadata (8.5 kB)\n",
"Collecting trio-websocket~=0.12.2 (from selenium)\n",
" Downloading trio_websocket-0.12.2-py3-none-any.whl.metadata (5.1 kB)\n",
"Requirement already satisfied: certifi>=2025.6.15 in /Users/elijahsmac/Desktop/code/llm/llm_engineering/llms/lib/python3.11/site-packages (from selenium) (2025.7.14)\n",
"Requirement already satisfied: typing_extensions~=4.14.0 in /Users/elijahsmac/Desktop/code/llm/llm_engineering/llms/lib/python3.11/site-packages (from selenium) (4.14.1)\n",
"Requirement already satisfied: websocket-client~=1.8.0 in /Users/elijahsmac/Desktop/code/llm/llm_engineering/llms/lib/python3.11/site-packages (from selenium) (1.8.0)\n",
"Requirement already satisfied: attrs>=23.2.0 in /Users/elijahsmac/Desktop/code/llm/llm_engineering/llms/lib/python3.11/site-packages (from trio~=0.30.0->selenium) (25.3.0)\n",
"Collecting sortedcontainers (from trio~=0.30.0->selenium)\n",
" Downloading sortedcontainers-2.4.0-py2.py3-none-any.whl.metadata (10 kB)\n",
"Requirement already satisfied: idna in /Users/elijahsmac/Desktop/code/llm/llm_engineering/llms/lib/python3.11/site-packages (from trio~=0.30.0->selenium) (3.10)\n",
"Collecting outcome (from trio~=0.30.0->selenium)\n",
" Downloading outcome-1.3.0.post0-py2.py3-none-any.whl.metadata (2.6 kB)\n",
"Requirement already satisfied: sniffio>=1.3.0 in /Users/elijahsmac/Desktop/code/llm/llm_engineering/llms/lib/python3.11/site-packages (from trio~=0.30.0->selenium) (1.3.1)\n",
"Collecting wsproto>=0.14 (from trio-websocket~=0.12.2->selenium)\n",
" Downloading wsproto-1.2.0-py3-none-any.whl.metadata (5.6 kB)\n",
"Collecting pysocks!=1.5.7,<2.0,>=1.5.6 (from urllib3[socks]~=2.5.0->selenium)\n",
" Downloading PySocks-1.7.1-py3-none-any.whl.metadata (13 kB)\n",
"Requirement already satisfied: requests in /Users/elijahsmac/Desktop/code/llm/llm_engineering/llms/lib/python3.11/site-packages (from webdriver-manager) (2.32.4)\n",
"Requirement already satisfied: python-dotenv in /Users/elijahsmac/Desktop/code/llm/llm_engineering/llms/lib/python3.11/site-packages (from webdriver-manager) (1.1.1)\n",
"Requirement already satisfied: packaging in /Users/elijahsmac/Desktop/code/llm/llm_engineering/llms/lib/python3.11/site-packages (from webdriver-manager) (25.0)\n",
"Requirement already satisfied: h11<1,>=0.9.0 in /Users/elijahsmac/Desktop/code/llm/llm_engineering/llms/lib/python3.11/site-packages (from wsproto>=0.14->trio-websocket~=0.12.2->selenium) (0.16.0)\n",
"Requirement already satisfied: charset_normalizer<4,>=2 in /Users/elijahsmac/Desktop/code/llm/llm_engineering/llms/lib/python3.11/site-packages (from requests->webdriver-manager) (3.4.2)\n",
"Downloading selenium-4.34.2-py3-none-any.whl (9.4 MB)\n",
"\u001b[2K \u001b[38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m9.4/9.4 MB\u001b[0m \u001b[31m7.0 MB/s\u001b[0m \u001b[33m0:00:01\u001b[0mm \u001b[31m7.2 MB/s\u001b[0m eta \u001b[36m0:00:01\u001b[0m\n",
"\u001b[?25hDownloading trio-0.30.0-py3-none-any.whl (499 kB)\n",
"Downloading trio_websocket-0.12.2-py3-none-any.whl (21 kB)\n",
"Downloading PySocks-1.7.1-py3-none-any.whl (16 kB)\n",
"Downloading webdriver_manager-4.0.2-py2.py3-none-any.whl (27 kB)\n",
"Downloading outcome-1.3.0.post0-py2.py3-none-any.whl (10 kB)\n",
"Downloading wsproto-1.2.0-py3-none-any.whl (24 kB)\n",
"Downloading sortedcontainers-2.4.0-py2.py3-none-any.whl (29 kB)\n",
"Installing collected packages: sortedcontainers, wsproto, pysocks, outcome, webdriver-manager, trio, trio-websocket, selenium\n",
"\u001b[2K \u001b[38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m8/8\u001b[0m [selenium]━━\u001b[0m \u001b[32m7/8\u001b[0m [selenium]-manager]\n",
"\u001b[1A\u001b[2KSuccessfully installed outcome-1.3.0.post0 pysocks-1.7.1 selenium-4.34.2 sortedcontainers-2.4.0 trio-0.30.0 trio-websocket-0.12.2 webdriver-manager-4.0.2 wsproto-1.2.0\n",
"Note: you may need to restart the kernel to use updated packages.\n"
]
}
],
"source": [
"%pip install selenium webdriver-manager"
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 2,
"id": "1dcf1d9d-c540-4900-b14e-ad36a28fc822",
"metadata": {},
"outputs": [],
@@ -92,10 +140,18 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 3,
"id": "8598c299-05ca-492e-b085-6bcc2f7dda0d",
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"✅ API key loaded successfully!\n"
]
}
],
"source": [
"load_dotenv(override=True)\n",
"api_key = os.getenv('OPENAI_API_KEY')\n",
@@ -109,7 +165,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 4,
"id": "8098defb",
"metadata": {},
"outputs": [],
@@ -128,7 +184,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 6,
"id": "c6fe5114",
"metadata": {},
"outputs": [],
@@ -233,7 +289,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 7,
"id": "02e3a673-a8a1-4101-a441-3816f7ab9e4d",
"metadata": {},
"outputs": [],
@@ -245,7 +301,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 8,
"id": "86bb80f9-9e7c-4825-985f-9b83fe50839f",
"metadata": {},
"outputs": [],
@@ -259,7 +315,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 9,
"id": "89998b18-77aa-4aaf-a137-f0d078d61f75",
"metadata": {},
"outputs": [],
@@ -335,7 +391,7 @@
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
@@ -349,7 +405,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.9"
"version": "3.11.7"
}
},
"nbformat": 4,

View File

@@ -497,6 +497,26 @@
"display_summary(\"https://anthropic.com\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5a904323-acd9-4c8e-9a17-70df76184590",
"metadata": {},
"outputs": [],
"source": [
"display_summary(\"https://rwothoromo.wordpress.com/\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a66c9fe8-c26a-49dd-9bc4-9efffc638f95",
"metadata": {},
"outputs": [],
"source": [
"display_summary(\"https://openai.com\")"
]
},
{
"cell_type": "markdown",
"id": "c951be1a-7f1b-448f-af1f-845978e47e2c",
@@ -538,23 +558,55 @@
"source": [
"# Step 1: Create your prompts\n",
"\n",
"system_prompt = \"something here\"\n",
"system_prompt = \"You are a professional assistant\"\n",
"user_prompt = \"\"\"\n",
" Lots of text\n",
" Can be pasted here\n",
"Review this conversation and provide a comprehensive summary. Also, suggest how much better the converation could have gone:\n",
"\n",
"Dear Dev Contact,\n",
"\n",
"I hope this message finds you well.\n",
"I would like to share that I have proficiency in front-end design tools, particularly Figma, react and Angular. At this stage, I am keenly interested in finding opportunities to apply these skills professionally.\n",
"\n",
"If you are aware of any companies, projects, or platforms seeking enterprise in front-end design, I would be grateful for any advice or recommendations you might kindly provide.\n",
"\n",
"Thank you very much for your time and consideration.\n",
"\n",
"Hello Job Seeker,\n",
"\n",
"I hope you are doing well.\n",
"\n",
"Dev Contact: The last role (3 months gig) I saw was looking for a junior PHP Developer. Does your CV include that?\n",
"\n",
"Hello Dev Contact \n",
"Thank you for your feedback.\n",
"Yes my CV has PHP as one of my skill set. Can I share it with you?\n",
"\n",
"Dev Contact: They said \"It's late. Interviews were on Monday\"\n",
"\n",
"Hello Dev Contact\n",
"\n",
"Thanks for the update. When you hear of any opportunity please let me know.\n",
"\n",
"Dev Contact: For now, check out https://refactory.academy/courses/refactory-apprenticeship/\n",
"\"\"\"\n",
"\n",
"# Step 2: Make the messages list\n",
"\n",
"messages = [] # fill this in\n",
"messages = [\n",
" {\"role\": \"system\", \"content\": system_prompt},\n",
" {\"role\": \"user\", \"content\": user_prompt},\n",
"] # fill this in\n",
"\n",
"# Step 3: Call OpenAI\n",
"\n",
"response =\n",
"response = openai.chat.completions.create(\n",
" model = \"gpt-4o-mini\",\n",
" messages = messages\n",
")\n",
"\n",
"# Step 4: print the result\n",
"\n",
"print("
"print(response.choices[0].message.content)"
]
},
{
@@ -588,6 +640,34 @@
"id": "f4484fcf-8b39-4c3f-9674-37970ed71988",
"metadata": {},
"outputs": [],
"source": [
"# To perform summaries using a model running locally\n",
"import ollama\n",
"\n",
"# OLLAMA_API = \"http://localhost:11434/api/chat\"\n",
"# HEADERS = {\"Content-Type\": \"application/json\"}\n",
"MODEL = \"llama3.2\"\n",
"\n",
"\n",
"def summarize_with_local_model(url):\n",
" website = Website(url)\n",
" messages = messages_for(website)\n",
" response = ollama.chat(\n",
" model=MODEL,\n",
" messages=messages,\n",
" stream=False # just get the results, don't stream them\n",
" )\n",
" return response['message']['content']\n",
"\n",
"display(Markdown(summarize_with_local_model(\"https://rwothoromo.wordpress.com/\")))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e76cbf31-2a82-40b8-b2e7-e2ceae7483ed",
"metadata": {},
"outputs": [],
"source": []
}
],
@@ -607,7 +687,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.12"
"version": "3.11.7"
}
},
"nbformat": 4,

View File

@@ -118,7 +118,7 @@
"payload = {\n",
" \"model\": MODEL,\n",
" \"messages\": messages,\n",
" \"stream\": False\n",
" \"stream\": False # just get the results, don't stream them\n",
" }"
]
},
@@ -308,7 +308,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.12"
"version": "3.11.7"
}
},
"nbformat": 4,

View File

@@ -144,6 +144,15 @@
" {\"type\": \"careers page\", \"url\": \"https://another.full.url/careers\"}\n",
" ]\n",
"}\n",
"\"\"\"\n",
"link_system_prompt += \"And this example:\"\n",
"link_system_prompt += \"\"\"\n",
"{\n",
" \"links\": [\n",
" {\"type\": \"for-you page\", \"url\": \"https://full.url/goes/here/services\"},\n",
" {\"type\": \"speak-to-a-human page\", \"url\": \"https://another.full.url/contact-us\"}\n",
" ]\n",
"}\n",
"\"\"\""
]
},
@@ -213,6 +222,9 @@
"source": [
"# Anthropic has made their site harder to scrape, so I'm using HuggingFace..\n",
"\n",
"# anthropic = Website(\"https://anthropic.com\")\n",
"# anthropic.links\n",
"# get_links(\"https://anthropic.com\")\n",
"huggingface = Website(\"https://huggingface.co\")\n",
"huggingface.links"
]
@@ -272,15 +284,15 @@
"metadata": {},
"outputs": [],
"source": [
"system_prompt = \"You are an assistant that analyzes the contents of several relevant pages from a company website \\\n",
"and creates a short brochure about the company for prospective customers, investors and recruits. Respond in markdown.\\\n",
"Include details of company culture, customers and careers/jobs if you have the information.\"\n",
"# system_prompt = \"You are an assistant that analyzes the contents of several relevant pages from a company website \\\n",
"# and creates a short brochure about the company for prospective customers, investors and recruits. Respond in markdown.\\\n",
"# Include details of company culture, customers and careers/jobs if you have the information.\"\n",
"\n",
"# Or uncomment the lines below for a more humorous brochure - this demonstrates how easy it is to incorporate 'tone':\n",
"\n",
"# system_prompt = \"You are an assistant that analyzes the contents of several relevant pages from a company website \\\n",
"# and creates a short humorous, entertaining, jokey brochure about the company for prospective customers, investors and recruits. Respond in markdown.\\\n",
"# Include details of company culture, customers and careers/jobs if you have the information.\"\n"
"system_prompt = \"You are an assistant that analyzes the contents of several relevant pages from a company website \\\n",
"and creates a short humorous, entertaining, jokey brochure about the company for prospective customers, investors and recruits. Respond in markdown.\\\n",
"Include details of company culture, customers and careers/jobs if you have the information.\"\n"
]
},
{
@@ -293,6 +305,7 @@
"def get_brochure_user_prompt(company_name, url):\n",
" user_prompt = f\"You are looking at a company called: {company_name}\\n\"\n",
" user_prompt += f\"Here are the contents of its landing page and other relevant pages; use this information to build a short brochure of the company in markdown.\\n\"\n",
" user_prompt += f\"Keep the details brief or concise, factoring in that they would be printed on a simple hand-out flyer.\\n\"\n",
" user_prompt += get_all_details(url)\n",
" user_prompt = user_prompt[:5_000] # Truncate if more than 5,000 characters\n",
" return user_prompt"
@@ -324,6 +337,28 @@
" ],\n",
" )\n",
" result = response.choices[0].message.content\n",
" # display(Markdown(result))\n",
" # print(result)\n",
" return result"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0029e063-0c07-4712-82d9-536ec3579e80",
"metadata": {},
"outputs": [],
"source": [
"def translate_brochure(brochure, language):\n",
" system_prompt_for_language = \"You're an expert in \" + language + \". Translate the brochure!\"\n",
" response = openai.chat.completions.create(\n",
" model=MODEL,\n",
" messages=[\n",
" {\"role\": \"system\", \"content\": system_prompt_for_language},\n",
" {\"role\": \"user\", \"content\": brochure}\n",
" ],\n",
" )\n",
" result = response.choices[0].message.content\n",
" display(Markdown(result))"
]
},
@@ -337,6 +372,28 @@
"create_brochure(\"HuggingFace\", \"https://huggingface.co\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f8371bf5-c4c0-4e52-9a2a-066d994b0510",
"metadata": {},
"outputs": [],
"source": [
"brochure = create_brochure(\"Paint and Sip Uganda\", \"https://paintandsipuganda.com/\")\n",
"# translate_brochure(brochure, \"Spanish\")\n",
"translate_brochure(brochure, \"Swahili\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "34e03db6-61d0-4fc5-bf66-4f679b9befde",
"metadata": {},
"outputs": [],
"source": [
"create_brochure(\"Wabeh\", \"https://wabeh.com/\")"
]
},
{
"cell_type": "markdown",
"id": "61eaaab7-0b47-4b29-82d4-75d474ad8d18",
@@ -501,7 +558,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.13"
"version": "3.11.7"
}
},
"nbformat": 4,

View File

@@ -40,10 +40,18 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 1,
"id": "d296f9b6-8de4-44db-b5f5-9b653dfd3d81",
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Connected to the internet and can reach Google\n"
]
}
],
"source": [
"import urllib.request\n",
"\n",
@@ -101,10 +109,20 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 2,
"id": "7c8c0bb3-0e94-466e-8d1a-4dfbaa014cbe",
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Virtualenv is active:\n",
"Environment Path: /Users/elijahsmac/Desktop/code/llm/llm_engineering/llms\n",
"Environment Name: llms\n"
]
}
],
"source": [
"# Some quick checks that your Conda environment or VirtualEnv is as expected\n",
"# The Environment Name should be: llms\n",
@@ -164,10 +182,22 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 8,
"id": "6c78b7d9-1eea-412d-8751-3de20c0f6e2f",
"metadata": {},
"outputs": [],
"outputs": [
{
"ename": "ModuleNotFoundError",
"evalue": "No module named 'openai'",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[8], line 3\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m# This import should work if your environment is active and dependencies are installed!\u001b[39;00m\n\u001b[0;32m----> 3\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mopenai\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m OpenAI\n",
"\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'openai'"
]
}
],
"source": [
"# This import should work if your environment is active and dependencies are installed!\n",
"\n",
@@ -201,10 +231,19 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 4,
"id": "caa4837e-b970-4f89-aa9a-8aa793c754fd",
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
".env file found.\n",
"SUCCESS! OPENAI_API_KEY found and it has the right prefix\n"
]
}
],
"source": [
"from pathlib import Path\n",
"\n",
@@ -254,10 +293,18 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 5,
"id": "ab9ea6ef-49ee-4899-a1c7-75a8bd9ac36b",
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"There is already a .env file - if you want me to create a new one, change the variable overwrite_if_already_exists to True above\n"
]
}
],
"source": [
"# Only run this code in this cell if you want to have a .env file created for you!\n",
"\n",
@@ -371,10 +418,22 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 9,
"id": "cccb58e7-6626-4033-9dc1-e7e3ff742f6b",
"metadata": {},
"outputs": [],
"outputs": [
{
"ename": "ModuleNotFoundError",
"evalue": "No module named 'openai'",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[9], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mopenai\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m OpenAI\n\u001b[1;32m 2\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mdotenv\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m load_dotenv\n\u001b[1;32m 3\u001b[0m load_dotenv(override\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n",
"\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'openai'"
]
}
],
"source": [
"from openai import OpenAI\n",
"from dotenv import load_dotenv\n",
@@ -463,10 +522,110 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 11,
"id": "248204f0-7bad-482a-b715-fb06a3553916",
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Starting diagnostics at 2025-08-02 23:16:47\n",
"\n",
"===== System Information =====\n",
"Operating System: Darwin\n",
"MacOS Version: 10.16\n",
"Architecture: ('64bit', 'Mach-O')\n",
"Machine: x86_64\n",
"Processor: i386\n",
"Total RAM: 16.00 GB\n",
"Available RAM: 6.04 GB\n",
"Free Disk Space: 315.94 GB\n",
"\n",
"===== File System Information =====\n",
"Current Directory: /Users/elijahsmac/Desktop/code/llm/llm_engineering/week1\n",
"Write permission: OK\n",
"\n",
"Files in Current Directory:\n",
" - .ipynb_checkpoints\n",
" - Guide to Jupyter.ipynb\n",
" - Intermediate Python.ipynb\n",
" - __pycache__\n",
" - community-contributions\n",
" - day1.ipynb\n",
" - day2 EXERCISE.ipynb\n",
" - day5.ipynb\n",
" - diagnostics.py\n",
" - report.txt\n",
" - solutions\n",
" - troubleshooting.ipynb\n",
" - week1 EXERCISE.ipynb\n",
"\n",
"===== Git Repository Information =====\n",
"Git Repository Root: /Users/elijahsmac/Desktop/code/llm/llm_engineering\n",
"Current Commit: 3a042500d7db3c0e8cde89f836a60e6cd7ab9ba1\n",
"Remote Origin: git@github.com:ed-donner/llm_engineering.git\n",
"\n",
"===== Environment File Check =====\n",
".env file exists at: /Users/elijahsmac/Desktop/code/llm/llm_engineering/.env\n",
"OPENAI_API_KEY found in .env file\n",
"\n",
"===== Anaconda Environment Check =====\n",
"No active Anaconda environment detected\n",
"\n",
"===== Virtualenv Check =====\n",
"Virtualenv is active:\n",
"Environment Path: /Users/elijahsmac/Desktop/code/llm/llm_engineering/llms\n",
"Environment Name: llms\n",
"\n",
"Python Environment:\n",
"Python Version: 3.13.5 | packaged by Anaconda, Inc. | (main, Jun 12 2025, 11:09:21) [Clang 14.0.6 ]\n",
"Python Executable: /Users/elijahsmac/anaconda3/bin/python\n",
"\n",
"Required Package Versions:\n",
"ERROR: Required package 'openai' is not installed\n",
"python-dotenv: 1.1.0\n",
"requests: 2.32.3\n",
"ERROR: Required package 'gradio' is not installed\n",
"ERROR: Required package 'transformers' is not installed\n",
"\n",
"===== Network Connectivity Check =====\n",
"SSL Version: OpenSSL 3.0.16 11 Feb 2025\n",
"ERROR: Required packages are not installed. Please install them using 'pip install requests speedtest-cli'\n",
"\n",
"===== Environment Variables Check =====\n",
"\n",
"PYTHONPATH is not set.\n",
"\n",
"Python sys.path:\n",
" - /Users/elijahsmac/anaconda3/lib/python313.zip\n",
" - /Users/elijahsmac/anaconda3/lib/python3.13\n",
" - /Users/elijahsmac/anaconda3/lib/python3.13/lib-dynload\n",
" - \n",
" - /Users/elijahsmac/anaconda3/lib/python3.13/site-packages\n",
" - /Users/elijahsmac/anaconda3/lib/python3.13/site-packages/aeosa\n",
" - /Users/elijahsmac/anaconda3/lib/python3.13/site-packages/setuptools/_vendor\n",
"OPENAI_API_KEY is set after calling load_dotenv()\n",
"\n",
"===== Additional Diagnostics =====\n",
"Temp directory is writable: /var/folders/_1/d6xg9lvd4lb714ry78xpc10w0000gn/T\n",
"\n",
"===== Errors Found =====\n",
"The following critical issues were detected. Please address them before proceeding:\n",
"- Required package 'openai' is not installed\n",
"- Required package 'gradio' is not installed\n",
"- Required package 'transformers' is not installed\n",
"- Required packages are not installed. Please install them using 'pip install requests speedtest-cli'\n",
"\n",
"\n",
"Completed diagnostics at 2025-08-02 23:16:47\n",
"\n",
"\n",
"Please send these diagnostics to me at ed@edwarddonner.com\n",
"Either copy & paste the above output into an email, or attach the file report.txt that has been created in this directory.\n"
]
}
],
"source": [
"# Run my diagnostics report to collect key information for debugging\n",
"# Please email me the results. Either copy & paste the output, or attach the file report.txt\n",
@@ -501,7 +660,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.11"
"version": "3.11.7"
}
},
"nbformat": 4,

View File

@@ -18,7 +18,13 @@
"metadata": {},
"outputs": [],
"source": [
"# imports"
"# imports\n",
"import os, re, requests, json, ollama\n",
"from typing import List\n",
"from dotenv import load_dotenv\n",
"from bs4 import BeautifulSoup\n",
"from IPython.display import Markdown, display, update_display\n",
"from openai import OpenAI"
]
},
{
@@ -41,7 +47,27 @@
"metadata": {},
"outputs": [],
"source": [
"# set up environment"
"# set up environment\n",
"\n",
"headers = {\n",
" \"User-Agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36\"\n",
"}\n",
"\n",
"class Website:\n",
"\n",
" def __init__(self, url):\n",
" \"\"\"\n",
" Create this Website object from the given url using the BeautifulSoup library\n",
" \"\"\"\n",
" self.url = url\n",
" response = requests.get(url, headers=headers)\n",
" soup = BeautifulSoup(response.content, 'html.parser')\n",
" self.title = soup.title.string if soup.title else \"No title found\"\n",
" for irrelevant in soup.body([\"script\", \"style\", \"img\", \"input\"]):\n",
" irrelevant.decompose()\n",
" self.text = soup.body.get_text(separator=\"\\n\", strip=True)\n",
"\n",
"openai = OpenAI()\n"
]
},
{
@@ -53,10 +79,68 @@
"source": [
"# here is the question; type over this to ask something new\n",
"\n",
"# question = \"\"\"\n",
"# Please explain what this code does and why:\n",
"# yield from {book.get(\"author\") for book in books if book.get(\"author\")}\n",
"# \"\"\"\n",
"\n",
"question = \"\"\"\n",
"Please explain what this code does and why:\n",
"yield from {book.get(\"author\") for book in books if book.get(\"author\")}\n",
"\"\"\""
"How good at Software Development is Elijah Rwothoromo? \\\n",
"He has a Wordpress site https://rwothoromo.wordpress.com/. \\\n",
"He also has a LinkedIn profile https://www.linkedin.com/in/rwothoromoelaijah/. \\\n",
"What can we learn from him?\n",
"\"\"\"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e14fd3a1-0aca-4794-a0e0-57458e111fc9",
"metadata": {},
"outputs": [],
"source": [
"# Process URLs in the question to improve the prompt\n",
"\n",
"# Extract all URLs from the question string using regular expressions\n",
"urls = re.findall(r'https?://[^\\s)]+', question)\n",
"\n",
"# Fetch the content for each URL using the Website class\n",
"scraped_content = []\n",
"for url in urls:\n",
" print(f\"Scraping: {url}\")\n",
" try:\n",
" site = Website(url)\n",
" content = f\"Content from {url}:\\n---\\n{site.text}\\n---\\n\" # delimiter ---\n",
" scraped_content.append(content)\n",
" except Exception as e:\n",
" print(f\"Could not scrape {url}: {e}\")\n",
" scraped_content.append(f\"Could not retrieve content from {url}.\\n\")\n",
"\n",
"# Combine all the scraped text into one string\n",
"all_scraped_text = \"\\n\".join(scraped_content)\n",
"\n",
"# Update the question with the scraped content\n",
"augmented_question = f\"\"\"\n",
"Based on the following information, please answer the user's original question.\n",
"\n",
"--- TEXT FROM WEBSITES ---\n",
"{all_scraped_text}\n",
"--- END TEXT FROM WEBSITES ---\n",
"\n",
"--- ORIGINAL QUESTION ---\n",
"{question}\n",
"\"\"\"\n",
"\n",
"# system prompt to be more accurate for AI to just analyze the provided text.\n",
"system_prompt = \"You are an expert assistant. \\\n",
"Analyze the user's question and the provided text from relevant websites to synthesize a comprehensive answer in markdown format.\\\n",
"Provides a short summary, ignoring text that might be navigation-related.\"\n",
"\n",
"# Create the messages list with the new augmented prompt\n",
"messages = [\n",
" {\"role\": \"system\", \"content\": system_prompt},\n",
" {\"role\": \"user\", \"content\": augmented_question},\n",
"]\n"
]
},
{
@@ -66,7 +150,25 @@
"metadata": {},
"outputs": [],
"source": [
"# Get gpt-4o-mini to answer, with streaming"
"# Get gpt-4o-mini to answer, with streaming\n",
"\n",
"def get_gpt_response(question):\n",
" # return response.choices[0].message.content\n",
"\n",
" stream = openai.chat.completions.create(\n",
" model=MODEL_GPT,\n",
" messages=messages,\n",
" stream=True\n",
" )\n",
" \n",
" response = \"\"\n",
" display_handle = display(Markdown(\"\"), display_id=True)\n",
" for chunk in stream:\n",
" response += chunk.choices[0].delta.content or ''\n",
" response = response.replace(\"```\",\"\").replace(\"markdown\", \"\")\n",
" update_display(Markdown(response), display_id=display_handle.display_id)\n",
"\n",
"get_gpt_response(question)"
]
},
{
@@ -76,8 +178,25 @@
"metadata": {},
"outputs": [],
"source": [
"# Get Llama 3.2 to answer"
"# Get Llama 3.2 to answer\n",
"def get_llama_response(question):\n",
" response = ollama.chat(\n",
" model=MODEL_LLAMA,\n",
" messages=messages,\n",
" stream=False # just get the results, don't stream them\n",
" )\n",
" return response['message']['content']\n",
"\n",
"display(Markdown(get_llama_response(question)))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "fa1e9987-7b6d-49c1-9a81-b1a92aceea72",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
@@ -96,7 +215,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.11"
"version": "3.11.7"
}
},
"nbformat": 4,