From 73836686490c14c8fd2941f5001bb4fa9ed2eec4 Mon Sep 17 00:00:00 2001 From: unknown Date: Mon, 20 Oct 2025 12:36:16 +0300 Subject: [PATCH] Complete Week 1 assignments: Day 1-5 + Exercise (community contribution) --- ...day1_kenyan_legal_research_assistant.ipynb | 344 ++++++++++++++ .../day2_ollama_llama32_web_summary.ipynb | 230 ++++++++++ .../day4_tokenization_cost_chunking.ipynb | 240 ++++++++++ .../day5_business_brochure_generator.ipynb | 418 ++++++++++++++++++ ...exercise_technical_question_answerer.ipynb | 295 ++++++++++++ 5 files changed, 1527 insertions(+) create mode 100644 week1/community-contributions/week1-assignment-Joshua/day1_kenyan_legal_research_assistant.ipynb create mode 100644 week1/community-contributions/week1-assignment-Joshua/day2_ollama_llama32_web_summary.ipynb create mode 100644 week1/community-contributions/week1-assignment-Joshua/day4_tokenization_cost_chunking.ipynb create mode 100644 week1/community-contributions/week1-assignment-Joshua/day5_business_brochure_generator.ipynb create mode 100644 week1/community-contributions/week1-assignment-Joshua/week1_exercise_technical_question_answerer.ipynb diff --git a/week1/community-contributions/week1-assignment-Joshua/day1_kenyan_legal_research_assistant.ipynb b/week1/community-contributions/week1-assignment-Joshua/day1_kenyan_legal_research_assistant.ipynb new file mode 100644 index 0000000..688fb64 --- /dev/null +++ b/week1/community-contributions/week1-assignment-Joshua/day1_kenyan_legal_research_assistant.ipynb @@ -0,0 +1,344 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Week 1 Day 1 - Kenyan Legal Research Assistant (Community Contribution)\n", + "\n", + "This notebook implements a legal research assistant focused on Kenyan law, following the course Day 1 structure and the community contribution format.\n", + "\n", + "- Reads API key from environment via `.env`\n", + "- Outputs valid Markdown\n", + "- Cites authoritative sources from `https://new.kenyalaw.org/`\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "from dotenv import load_dotenv\n", + "from IPython.display import Markdown, display\n", + "from openai import OpenAI" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "API key found and looks good so far!\n" + ] + } + ], + "source": [ + "# Load environment variables in a file called .env\n", + "\n", + "load_dotenv(override=True)\n", + "api_key = os.getenv('OPENAI_API_KEY')\n", + "\n", + "# Check the key\n", + "\n", + "if not api_key:\n", + " print(\"No API key was found - please head over to the troubleshooting notebook in this folder to identify & fix!\")\n", + "elif not api_key.startswith(\"sk-proj-\"):\n", + " print(\"An API key was found, but it doesn't start sk-proj-; please check you're using the right key - see troubleshooting notebook\")\n", + "elif api_key.strip() != api_key:\n", + " print(\"An API key was found, but it looks like it might have space or tab characters at the start or end - please remove them - see troubleshooting notebook\")\n", + "else:\n", + " print(\"API key found and looks good so far!\")" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "openai = OpenAI()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 🧠 Step 1: Define the system prompt with Markdown formatting" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "# === System Prompt for Kenyan Legal Research Assistant (Markdown Output) ===\n", + "\n", + "SYSTEM_PROMPT = \"\"\"\n", + "You are a meticulous Legal Research Assistant specialized in Kenyan law.\n", + "\n", + "# Role\n", + "Provide in-depth, citation-backed research strictly based on:\n", + "- The Constitution of Kenya (2010)\n", + "- Acts of Parliament and subsidiary legislation\n", + "- Case law from all levels of Kenyan courts\n", + "- Practice directions, Gazette notices, and policy circulars\n", + "\n", + "Prefer authoritative sources from https://new.kenyalaw.org/.\n", + "\n", + "# Formatting Rules\n", + "- **All responses must be written in valid Markdown.**\n", + "- Use headings (#, ##, ###) for structure.\n", + "- Use bullet points, bold text, and links for clarity.\n", + "- Format all citations as Markdown links to Kenya Law pages.\n", + "\n", + "# Research Principles\n", + "1. **Accuracy First:** Only use verified information from Kenyan legal sources.\n", + "2. **Citations Mandatory:**\n", + " - **Cases:** *Case Name v Case Name* [Year] eKLR β€” include paragraph pinpoints and working URL.\n", + " - **Statutes:** *Act name*, section/subsection, amendment year, and link.\n", + " - **Constitution:** Article/Clause (and sub-article) plus URL.\n", + "3. **Currency:** Indicate if the law or case has been amended, repealed, or overturned.\n", + "4. **Precedence:** Prefer Supreme Court > Court of Appeal > High Court. Note persuasive vs binding authority.\n", + "5. **No Fabrication:** If uncertain or source unavailable, state β€œSource unavailable on Kenya Law.”\n", + "6. **Comparative Law:** Mention only if explicitly requested, and label as β€œComparative Reference”.\n", + "\n", + "# Response Structure\n", + "Your output must follow this structure:\n", + "## Issues\n", + "List the legal questions.\n", + "## Law\n", + "Summarize relevant principles, cases, and statutes (with citations).\n", + "## Application\n", + "Apply the legal principles to the stated facts.\n", + "## Counter-Arguments\n", + "Present potential opposing interpretations.\n", + "## Conclusion\n", + "Summarize the likely legal position.\n", + "## Table of Authorities\n", + "Provide a list of all cases, statutes, and other references used.\n", + "\n", + "# Writing Style\n", + "- Use plain, professional Kenyan legal English.\n", + "- Always include working links to cited Kenya Law pages.\n", + "- Maintain objectivity β€” do **not** provide personal or client-specific legal advice.\n", + "\n", + "# Disclaimer\n", + "This output is for **research and educational use only**. It is **not legal advice**.\n", + "\"\"\"\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 🧩 Step 2: Connect it to your OpenAI call and render Markdown" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from openai import OpenAI\n", + "from IPython.display import Markdown, display\n", + "\n", + "#If openai is not defined (define it here)\n", + "#eg openai = OpenAI()\n", + "\n", + "def get_legal_research(topic, facts, questions):\n", + " user_prompt = f\"\"\"\n", + " Task: Deep legal research in Kenyan law.\n", + " Topic: {topic}\n", + "\n", + " Facts:\n", + " {facts}\n", + "\n", + " Questions:\n", + " {questions}\n", + "\n", + " Constraints:\n", + " - Cite every legal proposition with paragraph/section and working Kenya Law link.\n", + " - Note amendments or recent cases.\n", + " - Follow the Issues-Law-Application-Counterarguments-Conclusion structure.\n", + " - Format all output in Markdown.\n", + " \"\"\"\n", + "\n", + " response = openai.responses.create(\n", + " model=\"gpt-4.1-mini\",\n", + " input=[\n", + " {\"role\": \"system\", \"content\": SYSTEM_PROMPT},\n", + " {\"role\": \"user\", \"content\": user_prompt}\n", + " ]\n", + " )\n", + "\n", + " # Render as Markdown\n", + " display(Markdown(response.output_text))\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 🧾 Example usage" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/markdown": [ + "## Issues\n", + "1. What is the legal test for conducting a warrantless search of a vehicle in Kenya? \n", + "2. How has Article 31 of the Constitution of Kenya (2010) on the right to privacy been interpreted by Kenyan courts?\n", + "\n", + "---\n", + "\n", + "## Law\n", + "\n", + "### Article 31 of the Constitution of Kenya (2010) β€” Right to Privacy \n", + "- **Article 31(1)** provides: \n", + " > \"Every person has the right to privacy, which includes the right not to haveβ€” \n", + " > (a) their person, home or property searched; \n", + " > (b) their possessions seized; \n", + " > (c) information relating to their family or private affairs unnecessarily required or revealed; or \n", + " > (d) the privacy of their communications infringed.\" \n", + "- This right is subject to **Article 31(2)** which allows limitations if they are: \n", + " - Prescribed by law, \n", + " - Reasonable, and \n", + " - Justifiable in an open and democratic society.\n", + "\n", + "- [Constitution of Kenya, Article 31](https://kenyalaw.org/kl/fileadmin/pdfdownloads/Constitution_of_Kenya__2010.pdf#page=30) (p.30)\n", + "\n", + "---\n", + "\n", + "### Legal Test for Warrantless Searches in Kenya (Vehicle Searches)\n", + "\n", + "- **Section 59 of the Criminal Procedure Code (CPC), Cap. 75** deals broadly with searches and seizures but does not provide an explicit test for warrantless vehicle searches. Instead, the search powers are generally framed around authorisations by law.\n", + "\n", + "- The leading jurisprudence on warrantless searches is framed through **constitutional interpretation of Article 31** in conjunction with search and seizure provisions.\n", + "\n", + "- **Key case law:**\n", + "\n", + " 1. *DCI v Geoffrey Ngare & 3 Others [2023] eKLR* (Supreme Court) β€” regarding search and seizure under Article 31: \n", + " - The Court held that **a warrantless search is permissible only if:**\n", + " - There are **exceptional circumstances** that justify the absence of a warrant, \n", + " - The search is **conducted reasonably and proportionately**, \n", + " - The officers had **reasonable suspicion or justification** to believe that the search was necessary to prevent evidence destruction or to preserve public safety. \n", + " - [DCI v Geoffrey Ngare & 3 Others [2023] eKLR, paras 65-72](https://kenyalaw.org/caselaw/cases/view/320512/) (Supreme Court)\n", + "\n", + " 2. *DCI v Republic [2018] eKLR* (High Court) β€” detailed the reasoning that: \n", + " - Warrantless vehicle searches require a **reasonable suspicion** relating to either a traffic offence or criminal activity. \n", + " - The scope of the search must be connected to the suspicion and the offence. \n", + " - The search must be conducted in a manner that respects privacy and dignity. \n", + " - [DCI v Republic [2018] eKLR, paras 45-48](https://kenyalaw.org/caselaw/cases/view/151798/)\n", + "\n", + " 3. *Joseph Kigo Ngigi v Republic [2020] eKLR* β€” emphasized that: \n", + " - Warrantless searches are **exceptions** to a rule; thus, investigative officers must show strict compliance with constitutional safeguards. \n", + " - [Joseph Kigo Ngigi v Republic [2020] eKLR, paras 30-35](https://kenyalaw.org/caselaw/cases/view/199937/) (Court of Appeal)\n", + "\n", + "---\n", + "\n", + "## Application\n", + "\n", + "- In the facts, a police officer stopped a vehicle for expired insurance and then conducted a search of the trunk without a warrant finding contraband.\n", + "\n", + "- **Applying the legal test:**\n", + "\n", + " - The stop for expired insurance is a valid traffic-related ground for initial police interaction.\n", + "\n", + " - However, a **search of the trunk is intrusive** and engages Article 31's right to privacy.\n", + "\n", + " - The officer must have had **reasonable suspicion** that the vehicle contained items connected to crime beyond the traffic infringement especially since expired insurance does not by itself justify searching the entire vehicle.\n", + "\n", + " - The search must be **proportionate and justified by law**, for example, if the officer had reasonable grounds that the contraband would be found or be destroyed.\n", + "\n", + " - If no such justification or reasonable suspicion existed, the search may be unconstitutional, and evidence seized thereby potentially inadmissible under the exclusionary rule per Article 31.\n", + "\n", + "---\n", + "\n", + "## Counter-Arguments\n", + "\n", + "- The police may argue the **\"exigent circumstances\" exception** if they reasonably feared that contraband could be removed or destroyed if a warrant was delayed, making the warrantless search justified.\n", + "\n", + "- They may also rely on the doctrine of **implied consent** in traffic stops for prompts searches or **searches necessary for public safety**.\n", + "\n", + "- However, courts require that these exceptions be narrowly applied and that the burden of proof for reasonableness lies on the police.\n", + "\n", + "---\n", + "\n", + "## Conclusion\n", + "\n", + "- Warrantless searches of vehicles during traffic stops in Kenya must meet a strict legal threshold as per **Article 31 of the Constitution** and related case law.\n", + "\n", + "- The search must be based on **reasonable suspicion**, justified by law, **proportionate**, and preferably precede by a warrant unless exceptional circumstances exist.\n", + "\n", + "- Searching a vehicle for contraband after stopping for expired insurance without additional grounds could violate the right to privacy and render the evidence inadmissible.\n", + "\n", + "---\n", + "\n", + "## Table of Authorities\n", + "\n", + "| Authority | Citation | Link |\n", + "|-----------|----------|------|\n", + "| Constitution of Kenya (2010), Article 31 | Art. 31, Constitution of Kenya (2010) | [Link](https://kenyalaw.org/kl/fileadmin/pdfdownloads/Constitution_of_Kenya__2010.pdf#page=30) |\n", + "| Criminal Procedure Code (Cap. 75), Section 59 | Search and seizure provisions | [Link](https://kenyalaw.org/kl/fileadmin/pdfdownloads/Acts/CriminalProcedureCode_Cap75No10of1963_Revised2012.pdf) |\n", + "| *DCI v Geoffrey Ngare & 3 Others* [2023] eKLR | Supreme Court, paras 65-72 | [Link](https://kenyalaw.org/caselaw/cases/view/320512/) |\n", + "| *DCI v Republic* [2018] eKLR | High Court, paras 45-48 | [Link](https://kenyalaw.org/caselaw/cases/view/151798/) |\n", + "| *Joseph Kigo Ngigi v Republic* [2020] eKLR | Court of Appeal, paras 30-35 | [Link](https://kenyalaw.org/caselaw/cases/view/199937/) |\n", + "\n", + "---\n", + "\n", + "# Disclaimer \n", + "This research is for educational purposes only and does not constitute legal advice." + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "get_legal_research(\n", + " topic=\"Warrantless search of a vehicle after a traffic stop\",\n", + " facts=\"- Police stopped a driver for expired insurance and found contraband in the trunk.\",\n", + " questions=\"1. What is the legal test for warrantless searches in Kenya? 2. Which cases interpret Article 31 of the Constitution on privacy?\"\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.12" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/week1/community-contributions/week1-assignment-Joshua/day2_ollama_llama32_web_summary.ipynb b/week1/community-contributions/week1-assignment-Joshua/day2_ollama_llama32_web_summary.ipynb new file mode 100644 index 0000000..97e6150 --- /dev/null +++ b/week1/community-contributions/week1-assignment-Joshua/day2_ollama_llama32_web_summary.ipynb @@ -0,0 +1,230 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Week 1 Day 2 - Webpage Summarizer using Ollama (llama3.2)\n", + "\n", + "This notebook upgrades the Day 1 project to use an open-source local model via Ollama instead of OpenAI.\n", + "\n", + "- Model: `llama3.2` (or `llama3.2:1b` if your machine is slower)\n", + "- Endpoint: `http://localhost:11434/v1` (OpenAI-compatible)\n", + "- No API charges; data stays on your machine\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Client ready. Using model: llama3.2\n" + ] + } + ], + "source": [ + "# Setup Ollama OpenAI-compatible client\n", + "import os\n", + "from openai import OpenAI\n", + "from dotenv import load_dotenv\n", + "\n", + "# Optional .env for other configs\n", + "load_dotenv(override=True)\n", + "\n", + "OLLAMA_BASE_URL = \"http://localhost:11434/v1\"\n", + "MODEL = os.getenv(\"OLLAMA_MODEL\", \"llama3.2\") \n", + "\n", + "# Create client pointing to Ollama endpoint (api_key can be any non-empty string)\n", + "ollama = OpenAI(base_url=OLLAMA_BASE_URL, api_key=\"ollama\")\n", + "\n", + "print(f\"Client ready. Using model: {MODEL}\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Scraper ready.\n" + ] + } + ], + "source": [ + "# Minimal scraper utilities (requests + BeautifulSoup)\n", + "import requests\n", + "from bs4 import BeautifulSoup\n", + "from urllib.parse import urljoin\n", + "\n", + "HEADERS = {\n", + " \"User-Agent\": (\n", + " \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) \"\n", + " \"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36\"\n", + " )\n", + "}\n", + "\n", + "def fetch_website_contents(url, char_limit=2000):\n", + " try:\n", + " r = requests.get(url, headers=HEADERS, timeout=15)\n", + " r.raise_for_status()\n", + " html = r.text\n", + " except Exception as e:\n", + " return f\"Error fetching {url}: {e}\"\n", + "\n", + " soup = BeautifulSoup(html, \"html.parser\")\n", + "\n", + " # Remove scripts/styles\n", + " for el in soup([\"script\", \"style\", \"noscript\", \"template\"]):\n", + " el.decompose()\n", + "\n", + " title = soup.title.get_text(strip=True) if soup.title else \"No title\"\n", + " text = soup.get_text(\"\\n\")\n", + " # Basic whitespace cleanup\n", + " lines = [ln.strip() for ln in text.splitlines() if ln.strip()]\n", + " text = \"\\n\".join(lines)\n", + "\n", + " return (f\"{title}\\n\\n{text}\")[:char_limit]\n", + "\n", + "print(\"Scraper ready.\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Summarizer ready.\n" + ] + } + ], + "source": [ + "# Summarization with llama3.2 via Ollama's OpenAI-compatible API\n", + "from IPython.display import Markdown, display\n", + "\n", + "def summarize_url(url, model=MODEL, temperature=0.4, max_tokens=400):\n", + " website = fetch_website_contents(url, char_limit=3000)\n", + "\n", + " system_prompt = (\n", + " \"You are a helpful assistant that analyzes a website's textual content \"\n", + " \"and produces a clear, concise markdown summary with bullet points.\"\n", + " )\n", + "\n", + " user_prompt = f\"\"\"\n", + "Here are the contents of a website.\n", + "Provide a short summary of this website in markdown.\n", + "Include key sections, offerings, and any notable announcements.\n", + "\n", + "{website}\n", + "\"\"\"\n", + "\n", + " resp = ollama.chat.completions.create(\n", + " model=model,\n", + " messages=[\n", + " {\"role\": \"system\", \"content\": system_prompt},\n", + " {\"role\": \"user\", \"content\": user_prompt},\n", + " ],\n", + " temperature=temperature,\n", + " max_tokens=max_tokens,\n", + " )\n", + "\n", + " return resp.choices[0].message.content\n", + "\n", + "\n", + "def display_summary(url, **kwargs):\n", + " print(f\"Summarizing with {MODEL} @ Ollama β†’ {url}\")\n", + " md = summarize_url(url, **kwargs)\n", + " display(Markdown(md))\n", + "\n", + "print(\"Summarizer ready.\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Summarizing with llama3.2 @ Ollama β†’ https://the-star.co.ke\n" + ] + }, + { + "data": { + "text/markdown": [ + "**The Star Website Summary**\n", + "=====================================\n", + "\n", + "### Key Sections\n", + "\n", + "* **News**: Latest news articles, including updates on Mashujaa Day celebrations, politics, business, health, and more.\n", + "* **Podcasts**: Audio content available for listening or download.\n", + "* **In-pictures**: Galleries of photos related to various topics.\n", + "\n", + "### Offerings\n", + "\n", + "* **Mashujaa Day Coverage**: In-depth coverage of the Kenya's Mashujaa Day celebrations, including news articles, performances, and events.\n", + "* **Raila Odinga Tribute**: Articles and features honoring the late former President Raila Odinga, including his life, legacy, and impact on Kenyan politics.\n", + "\n", + "### Notable Announcements\n", + "\n", + "* **KQ-Qatar Airways Deal**: Partnership to open travel to 19 destinations, effective October 26, 2025.\n", + "* **Raila's State Burial**: Details of the state funeral ceremony held for former President Raila Odinga.\n", + "* **Mashujaa Day Performances**: Special performances honoring Raila Odinga, including music and cultural events.\n", + "\n", + "### Other Key Content\n", + "\n", + "* **Editorials and Op-Eds**: Articles on current events, politics, and social issues from various perspectives.\n", + "* **Infographics**: Visual representations of data and information on topics such as Kenyan leaders who have been accorded state funeral, phrases Raila Odinga loved, and more." + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Try it out\n", + "TEST_URL = \"https://the-star.co.ke\" # change to any site you want\n", + "\n", + "display_summary(TEST_URL, temperature=0.4, max_tokens=400)\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.12" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/week1/community-contributions/week1-assignment-Joshua/day4_tokenization_cost_chunking.ipynb b/week1/community-contributions/week1-assignment-Joshua/day4_tokenization_cost_chunking.ipynb new file mode 100644 index 0000000..01eac7a --- /dev/null +++ b/week1/community-contributions/week1-assignment-Joshua/day4_tokenization_cost_chunking.ipynb @@ -0,0 +1,240 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Week 1 Day 4 - Tokenization, Cost Estimation, and Chunking (Community Contribution)\n", + "\n", + "This notebook demonstrates:\n", + "- Tokenization using `tiktoken`\n", + "- Token counting per model\n", + "- Simple cost estimation\n", + "- Chunking long text by tokens and by sentences\n" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Setup complete\n" + ] + } + ], + "source": [ + "# Imports and setup\n", + "import tiktoken\n", + "from openai import OpenAI\n", + "from dotenv import load_dotenv\n", + "import os\n", + "\n", + "load_dotenv(override=True)\n", + "openai = OpenAI()\n", + "\n", + "print(\"Setup complete\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "βœ… gpt-4o-mini: o200k_base\n", + "βœ… gpt-4o: o200k_base\n", + "βœ… gpt-3.5-turbo: cl100k_base\n", + "\n", + "Text length: 73 chars\n", + "\n", + "gpt-4o-mini: 20 tokens -> [12194, 922, 1308, 382, 6117, 326, 357, 1299, 9171, 26458, 5148, 13, 1328, 382, 261, 1746, 328, 6602, 2860, 0]\n", + "\n", + "gpt-4o: 20 tokens -> [12194, 922, 1308, 382, 6117, 326, 357, 1299, 9171, 26458, 5148, 13, 1328, 382, 261, 1746, 328, 6602, 2860, 0]\n", + "\n", + "gpt-3.5-turbo: 20 tokens -> [13347, 856, 836, 374, 3279, 323, 358, 1093, 9120, 21869, 4447, 13, 1115, 374, 264, 1296, 315, 4037, 2065, 0]\n" + ] + } + ], + "source": [ + "# Tokenization per model\n", + "models = [\"gpt-4o-mini\", \"gpt-4o\", \"gpt-3.5-turbo\"]\n", + "\n", + "encodings = {}\n", + "for m in models:\n", + " try:\n", + " encodings[m] = tiktoken.encoding_for_model(m)\n", + " print(f\"βœ… {m}: {encodings[m].name}\")\n", + " except Exception as e:\n", + " print(f\"❌ {m}: {e}\")\n", + "\n", + "text = \"Hi my name is Ed and I like banoffee pie. This is a test of tokenization!\"\n", + "print(f\"\\nText length: {len(text)} chars\")\n", + "\n", + "for m, enc in encodings.items():\n", + " toks = enc.encode(text)\n", + " print(f\"\\n{m}: {len(toks)} tokens -> {toks}\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Text: Hello world!\n", + " gpt-4o-mini: 3 tokens, est input cost $0.000000\n", + " gpt-4o: 3 tokens, est input cost $0.000015\n", + " gpt-3.5-turbo: 3 tokens, est input cost $0.000002\n", + "\n", + "Text: This is a longer text that will have more tokens and cost more money to process.\n", + " gpt-4o-mini: 17 tokens, est input cost $0.000003\n", + " gpt-4o: 17 tokens, est input cost $0.000085\n", + " gpt-3.5-turbo: 17 tokens, est input cost $0.000009\n" + ] + } + ], + "source": [ + "# Token counting and simple cost estimation\n", + "PRICING = {\n", + " \"gpt-4o-mini\": {\"input\": 0.00015, \"output\": 0.0006},\n", + " \"gpt-4o\": {\"input\": 0.005, \"output\": 0.015},\n", + " \"gpt-3.5-turbo\": {\"input\": 0.0005, \"output\": 0.0015},\n", + "}\n", + "\n", + "def count_tokens(text, model=\"gpt-4o-mini\"):\n", + " enc = tiktoken.encoding_for_model(model)\n", + " return len(enc.encode(text))\n", + "\n", + "def estimate_cost(tokens, model=\"gpt-4o-mini\", kind=\"input\"):\n", + " if model not in PRICING:\n", + " return 0.0\n", + " return (tokens / 1000) * PRICING[model][kind]\n", + "\n", + "samples = [\n", + " \"Hello world!\",\n", + " \"This is a longer text that will have more tokens and cost more money to process.\",\n", + "]\n", + "\n", + "for s in samples:\n", + " print(f\"\\nText: {s}\")\n", + " for m in PRICING.keys():\n", + " t = count_tokens(s, m)\n", + " c = estimate_cost(t, m, \"input\")\n", + " print(f\" {m}: {t} tokens, est input cost ${c:.6f}\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Token-based chunks:\n", + " Chunk 1: 677 chars\n", + " Chunk 2: 690 chars\n", + " Chunk 3: 700 chars\n", + " Chunk 4: 670 chars\n", + " Chunk 5: 688 chars\n", + " Chunk 6: 711 chars\n", + " Chunk 7: 670 chars\n", + " Chunk 8: 238 chars\n", + "\n", + "Sentence-based chunks:\n", + " Chunk 1: 637 chars\n", + " Chunk 2: 698 chars\n", + " Chunk 3: 582 chars\n", + " Chunk 4: 637 chars\n", + " Chunk 5: 698 chars\n", + " Chunk 6: 582 chars\n" + ] + } + ], + "source": [ + "# Chunking helpers\n", + "import re\n", + "\n", + "def chunk_by_tokens(text, model=\"gpt-4o-mini\", max_tokens=300, overlap=30):\n", + " enc = tiktoken.encoding_for_model(model)\n", + " toks = enc.encode(text)\n", + " chunks = []\n", + " start = 0\n", + " while start < len(toks):\n", + " end = min(start + max_tokens, len(toks))\n", + " chunk_text = enc.decode(toks[start:end])\n", + " chunks.append(chunk_text)\n", + " if end == len(toks):\n", + " break\n", + " start = max(0, end - overlap)\n", + " return chunks\n", + "\n", + "def chunk_by_sentences(text, model=\"gpt-4o-mini\", max_tokens=300):\n", + " enc = tiktoken.encoding_for_model(model)\n", + " sentences = re.split(r\"(?<=[.!?])\\s+\", text)\n", + " chunks, current = [], \"\"\n", + " for s in sentences:\n", + " candidate = (current + \" \" + s).strip() if current else s\n", + " if len(enc.encode(candidate)) <= max_tokens:\n", + " current = candidate\n", + " else:\n", + " if current:\n", + " chunks.append(current)\n", + " current = s\n", + " if current:\n", + " chunks.append(current)\n", + " return chunks\n", + "\n", + "# Try with a long text\n", + "long_text = (\n", + " \"Artificial Intelligence (AI) has become one of the most transformative technologies of the 21st century. \"\n", + " \"It enables machines to perform tasks that typically require human intelligence. \"\n", + " \"Machine learning, a subset of AI, allows systems to learn from data. \"\n", + " \"Deep learning uses neural networks with multiple layers. \"\n", + " \"AI powers recommendations, autonomous vehicles, and medical diagnostics. \"\n", + ") * 10\n", + "\n", + "print(\"Token-based chunks:\")\n", + "for i, ch in enumerate(chunk_by_tokens(long_text, max_tokens=120)):\n", + " print(f\" Chunk {i+1}: {len(ch)} chars\")\n", + "\n", + "print(\"\\nSentence-based chunks:\")\n", + "for i, ch in enumerate(chunk_by_sentences(long_text, max_tokens=120)):\n", + " print(f\" Chunk {i+1}: {len(ch)} chars\")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.12" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/week1/community-contributions/week1-assignment-Joshua/day5_business_brochure_generator.ipynb b/week1/community-contributions/week1-assignment-Joshua/day5_business_brochure_generator.ipynb new file mode 100644 index 0000000..bad841d --- /dev/null +++ b/week1/community-contributions/week1-assignment-Joshua/day5_business_brochure_generator.ipynb @@ -0,0 +1,418 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Week 1 Day 5 - Business Brochure Generator (Community Contribution)\n", + "\n", + "This notebook implements a business solution that generates company brochures by:\n", + "- Intelligently selecting relevant links using LLM\n", + "- Aggregating content from multiple pages\n", + "- Generating professional brochures with different styles\n", + "- Supporting both OpenAI and Ollama models\n" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Setup complete!\n" + ] + } + ], + "source": [ + "# Setup and imports\n", + "import os\n", + "import json\n", + "import requests\n", + "from bs4 import BeautifulSoup\n", + "from urllib.parse import urljoin\n", + "from IPython.display import Markdown, display\n", + "from openai import OpenAI\n", + "from dotenv import load_dotenv\n", + "\n", + "load_dotenv(override=True)\n", + "\n", + "# Initialize OpenAI client\n", + "openai = OpenAI()\n", + "\n", + "# Headers for web scraping\n", + "HEADERS = {\n", + " \"User-Agent\": (\n", + " \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) \"\n", + " \"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36\"\n", + " )\n", + "}\n", + "\n", + "print(\"Setup complete!\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Web scraping utilities ready!\n" + ] + } + ], + "source": [ + "# Web scraping utilities\n", + "def fetch_website_contents(url, char_limit=2000):\n", + " \"\"\"Fetch and clean website content\"\"\"\n", + " try:\n", + " response = requests.get(url, headers=HEADERS, timeout=10)\n", + " response.raise_for_status()\n", + " html = response.text\n", + " except Exception as e:\n", + " print(f\"Error fetching {url}: {e}\")\n", + " return f\"Error: Could not fetch website content\"\n", + "\n", + " soup = BeautifulSoup(html, \"html.parser\")\n", + " \n", + " # Remove script and style elements\n", + " for script in soup([\"script\", \"style\"]):\n", + " script.decompose()\n", + " \n", + " title = soup.title.get_text(strip=True) if soup.title else \"No title found\"\n", + " text = soup.get_text()\n", + " \n", + " # Clean up whitespace\n", + " lines = (line.strip() for line in text.splitlines())\n", + " chunks = (phrase.strip() for line in lines for phrase in line.split(\" \"))\n", + " text = ' '.join(chunk for chunk in chunks if chunk)\n", + " \n", + " return (f\"{title}\\\\n\\\\n{text}\").strip()[:char_limit]\n", + "\n", + "def fetch_website_links(url):\n", + " \"\"\"Fetch all links from a website\"\"\"\n", + " try:\n", + " response = requests.get(url, headers=HEADERS, timeout=10)\n", + " response.raise_for_status()\n", + " html = response.text\n", + " except Exception as e:\n", + " print(f\"Error fetching links from {url}: {e}\")\n", + " return []\n", + " \n", + " soup = BeautifulSoup(html, \"html.parser\")\n", + " links = []\n", + " \n", + " for a in soup.select(\"a[href]\"):\n", + " href = a.get(\"href\")\n", + " if href:\n", + " # Convert relative URLs to absolute\n", + " if href.startswith((\"http://\", \"https://\")):\n", + " links.append(href)\n", + " else:\n", + " links.append(urljoin(url, href))\n", + " \n", + " return list(set(links)) # Remove duplicates\n", + "\n", + "print(\"Web scraping utilities ready!\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Intelligent link selection ready!\n" + ] + } + ], + "source": [ + "# Intelligent link selection using LLM\n", + "def select_relevant_links(url, model=\"gpt-4o-mini\"):\n", + " \"\"\"Use LLM to select relevant links for brochure generation\"\"\"\n", + " print(f\"πŸ” Analyzing links for {url}...\")\n", + " \n", + " # Get all links\n", + " links = fetch_website_links(url)\n", + " print(f\"Found {len(links)} total links\")\n", + " \n", + " # Create prompt for link selection\n", + " link_system_prompt = \"\"\"\n", + " You are provided with a list of links found on a webpage.\n", + " You are able to decide which of the links would be most relevant to include in a brochure about the company,\n", + " such as links to an About page, or a Company page, or Careers/Jobs pages.\n", + " You should respond in JSON as in this example:\n", + "\n", + " {\n", + " \"links\": [\n", + " {\"type\": \"about page\", \"url\": \"https://full.url/goes/here/about\"},\n", + " {\"type\": \"careers page\", \"url\": \"https://another.full.url/careers\"}\n", + " ]\n", + " }\n", + " \"\"\"\n", + " \n", + " user_prompt = f\"\"\"\n", + " Here is the list of links on the website {url} -\n", + " Please decide which of these are relevant web links for a brochure about the company, \n", + " respond with the full https URL in JSON format.\n", + " Do not include Terms of Service, Privacy, email links.\n", + "\n", + " Links (some might be relative links):\n", + "\n", + " {chr(10).join(links[:50])} # Limit to first 50 links to avoid token limits\n", + " \"\"\"\n", + " \n", + " try:\n", + " response = openai.chat.completions.create(\n", + " model=model,\n", + " messages=[\n", + " {\"role\": \"system\", \"content\": link_system_prompt},\n", + " {\"role\": \"user\", \"content\": user_prompt}\n", + " ],\n", + " response_format={\"type\": \"json_object\"}\n", + " )\n", + " result = response.choices[0].message.content\n", + " links_data = json.loads(result)\n", + " print(f\"βœ… Selected {len(links_data['links'])} relevant links\")\n", + " return links_data\n", + " \n", + " except Exception as e:\n", + " print(f\"❌ Error selecting links: {e}\")\n", + " return {\"links\": []}\n", + "\n", + "print(\"Intelligent link selection ready!\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Content aggregation ready!\n" + ] + } + ], + "source": [ + "# Content aggregation\n", + "def fetch_page_and_all_relevant_links(url, model=\"gpt-4o-mini\"):\n", + " \"\"\"Fetch main page content and all relevant linked pages\"\"\"\n", + " print(f\"πŸ“„ Fetching content for {url}...\")\n", + " \n", + " # Get main page content\n", + " main_content = fetch_website_contents(url)\n", + " \n", + " # Get relevant links\n", + " relevant_links = select_relevant_links(url, model)\n", + " \n", + " # Build comprehensive content\n", + " result = f\"## Landing Page:\\\\n\\\\n{main_content}\\\\n## Relevant Links:\\\\n\"\n", + " \n", + " for link in relevant_links['links']:\n", + " print(f\" πŸ“„ Fetching {link['type']}: {link['url']}\")\n", + " try:\n", + " content = fetch_website_contents(link[\"url\"])\n", + " result += f\"\\\\n\\\\n### Link: {link['type']}\\\\n\"\n", + " result += content\n", + " except Exception as e:\n", + " print(f\" ❌ Error fetching {link['url']}: {e}\")\n", + " result += f\"\\\\n\\\\n### Link: {link['type']} (Error)\\\\n\"\n", + " result += f\"Error fetching content: {e}\"\n", + " \n", + " return result\n", + "\n", + "print(\"Content aggregation ready!\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Professional brochure generation ready!\n" + ] + } + ], + "source": [ + "# Professional brochure generation\n", + "def create_company_brochure(company_name, url, model=\"gpt-4o-mini\", style=\"professional\"):\n", + " \"\"\"Generate a professional company brochure\"\"\"\n", + " print(f\"🏒 Creating brochure for {company_name}...\")\n", + " \n", + " # Get all content\n", + " all_content = fetch_page_and_all_relevant_links(url, model)\n", + " \n", + " # Truncate if too long (to avoid token limits)\n", + " if len(all_content) > 5000:\n", + " all_content = all_content[:5000] + \"\\\\n\\\\n[Content truncated...]\"\n", + " \n", + " # Define brochure system prompt based on style\n", + " if style == \"professional\":\n", + " brochure_system_prompt = \"\"\"\n", + " You are an assistant that analyzes the contents of several relevant pages from a company website\n", + " and creates a short brochure about the company for prospective customers, investors and recruits.\n", + " Respond in markdown without code blocks.\n", + " Include details of company culture, customers and careers/jobs if you have the information.\n", + " \"\"\"\n", + " elif style == \"humorous\":\n", + " brochure_system_prompt = \"\"\"\n", + " You are an assistant that analyzes the contents of several relevant pages from a company website\n", + " and creates a short, humorous, entertaining, witty brochure about the company for prospective customers, investors and recruits.\n", + " Respond in markdown without code blocks.\n", + " Include details of company culture, customers and careers/jobs if you have the information.\n", + " \"\"\"\n", + " else:\n", + " brochure_system_prompt = \"\"\"\n", + " You are an assistant that analyzes the contents of several relevant pages from a company website\n", + " and creates a short brochure about the company.\n", + " Respond in markdown without code blocks.\n", + " \"\"\"\n", + " \n", + " user_prompt = f\"\"\"\n", + " You are looking at a company called: {company_name}\n", + " Here are the contents of its landing page and other relevant pages;\n", + " use this information to build a short brochure of the company in markdown without code blocks.\n", + "\n", + " {all_content}\n", + " \"\"\"\n", + " \n", + " try:\n", + " response = openai.chat.completions.create(\n", + " model=model,\n", + " messages=[\n", + " {\"role\": \"system\", \"content\": brochure_system_prompt},\n", + " {\"role\": \"user\", \"content\": user_prompt}\n", + " ],\n", + " temperature=0.7,\n", + " max_tokens=1000\n", + " )\n", + " brochure = response.choices[0].message.content\n", + " print(f\"βœ… Brochure generated successfully!\")\n", + " return brochure\n", + " \n", + " except Exception as e:\n", + " print(f\"❌ Error generating brochure: {e}\")\n", + " return f\"Error generating brochure: {e}\"\n", + "\n", + "def display_brochure(company_name, url, model=\"gpt-4o-mini\", style=\"professional\"):\n", + " \"\"\"Display a company brochure\"\"\"\n", + " brochure = create_company_brochure(company_name, url, model, style)\n", + " display(Markdown(f\"# {company_name} Brochure\\\\n\\\\n{brochure}\"))\n", + "\n", + "print(\"Professional brochure generation ready!\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Testing brochure generation for Radio Africa Group...\n", + "🏒 Creating brochure for Radio Africa Group...\n", + "πŸ“„ Fetching content for https://radioafricagroup.co.ke/...\n", + "πŸ” Analyzing links for https://radioafricagroup.co.ke/...\n", + "Found 34 total links\n", + "βœ… Selected 5 relevant links\n", + " πŸ“„ Fetching about page: https://staging.radioafrica.digital/about-us/\n", + " πŸ“„ Fetching case studies page: https://radioafricagroup.co.ke/case-studies\n", + " πŸ“„ Fetching contact page: https://radioafricagroup.co.ke/contact\n", + " πŸ“„ Fetching careers page: https://staging.radioafrica.digital/careers\n", + "Error fetching https://staging.radioafrica.digital/careers: 404 Client Error: Not Found for url: https://staging.radioafrica.digital/careers\n", + " πŸ“„ Fetching services page: https://radioafricagroup.co.ke/services.html\n", + "Error fetching https://radioafricagroup.co.ke/services.html: 404 Client Error: Not Found for url: https://radioafricagroup.co.ke/services.html\n", + "βœ… Brochure generated successfully!\n" + ] + }, + { + "data": { + "text/markdown": [ + "# Radio Africa Group Brochure\\n\\n# Radio Africa Group Brochure\n", + "\n", + "## About Us\n", + "Radio Africa Group (RAG) is a leading media company based in Kenya, renowned for its diverse range of platforms that include six national radio stations, one television station, and a national newspaper. Our flagship brands such as Kiss FM, Classic 105, Radio Jambo, The Star newspaper, and Kiss TV reach millions of Kenyans daily, making us a cornerstone of the country's media landscape.\n", + "\n", + "At RAG, we pride ourselves on being at the forefront of Africa's marketing and communication industry. We are dedicated to innovation, creativity, and collaboration, striving to shape better futures through impactful storytelling and entertainment.\n", + "\n", + "## Our Mission\n", + "We aim to amplify Kenyan voices, champion local talent, and deliver meaningful journalism that connects citizens to national conversations. With a focus on digital transformation and strategic partnerships, we are committed to leading the evolution of modern African media.\n", + "\n", + "## Company Culture\n", + "At Radio Africa Group, our culture is built on creativity, collaboration, and a shared passion for media. We celebrate our employees' milestones, as seen in our recent surprise birthday celebration for CEO Martin Khafafa, fostering a family-like environment that values each individual's contribution. We believe in pushing boundaries and nurturing talent, creating a dynamic workplace where innovation thrives.\n", + "\n", + "## Our Audience\n", + "Our diverse clientele includes listeners and viewers across Kenya, with a special focus on engaging younger audiences through music, talk shows, podcasts, and live streaming. We aim to build meaningful connections between brands and their target audiences, utilizing our deep insights and cutting-edge technology.\n", + "\n", + "## Careers at RAG\n", + "We are always on the lookout for talented individuals who share our passion for media and innovation. Joining Radio Africa Group means becoming part of a vibrant team that values creativity, growth, and professional development. If you're interested in shaping the future of media in Africa, explore career opportunities with us!\n", + "\n", + "## Contact Us\n", + "For inquiries, advertising opportunities, or to learn more about our services, reach out to us at:\n", + "\n", + "**Radio Africa Group** \n", + "Lion Place, Westlands \n", + "Nairobi, Kenya \n", + "Phone: +254 711 046200 \n", + "Email: [info@radioafricagroup.co.ke](mailto:info@radioafricagroup.co.ke) \n", + "Operating Hours: Mon-Fri: 10:00 AM - 09:00 PM \n", + "\n", + "Join us at Radio Africa Group, where we are transforming the media landscape and connecting communities across Kenya!" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Test the brochure generator\n", + "COMPANY_NAME = \"Radio Africa Group\"\n", + "COMPANY_URL = \"https://radioafricagroup.co.ke/\"\n", + "\n", + "print(f\"Testing brochure generation for {COMPANY_NAME}...\")\n", + "display_brochure(COMPANY_NAME, COMPANY_URL, style=\"professional\")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.12" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/week1/community-contributions/week1-assignment-Joshua/week1_exercise_technical_question_answerer.ipynb b/week1/community-contributions/week1-assignment-Joshua/week1_exercise_technical_question_answerer.ipynb new file mode 100644 index 0000000..2026426 --- /dev/null +++ b/week1/community-contributions/week1-assignment-Joshua/week1_exercise_technical_question_answerer.ipynb @@ -0,0 +1,295 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Week 1 Exercise - Technical Question Answerer (Community Contribution)\n", + "\n", + "This notebook demonstrates the complete learnings from Week 1 by building a technical question answerer that:\n", + "\n", + "- Uses OpenAI GPT-4o-mini with **streaming** responses\n", + "- Uses Ollama Llama 3.2 for **local inference**\n", + "- Provides **side-by-side comparison** of responses\n", + "- Demonstrates **Chat Completions API** understanding\n", + "- Shows **model comparison** techniques\n", + "- Implements **error handling** for both APIs\n", + "\n", + "This tool will be useful throughout the course for technical questions!\n" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Setup complete! Ready to answer technical questions.\n" + ] + } + ], + "source": [ + "# Imports and setup\n", + "import os\n", + "import json\n", + "from dotenv import load_dotenv\n", + "from openai import OpenAI\n", + "from IPython.display import Markdown, display, update_display\n", + "import ollama\n", + "\n", + "# Load environment variables\n", + "load_dotenv(override=True)\n", + "\n", + "# Initialize OpenAI client\n", + "openai = OpenAI()\n", + "\n", + "# Constants\n", + "MODEL_GPT = 'gpt-4o-mini'\n", + "MODEL_LLAMA = 'llama3.2'\n", + "\n", + "print(\"Setup complete! Ready to answer technical questions.\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Question to analyze:\n", + "\n", + "Please explain what this code does and why:\n", + "yield from {book.get(\"author\") for book in books if book.get(\"author\")}\n", + "\n" + ] + } + ], + "source": [ + "# Technical Question - You can modify this\n", + "question = \"\"\"\n", + "Please explain what this code does and why:\n", + "yield from {book.get(\"author\") for book in books if book.get(\"author\")}\n", + "\"\"\"\n", + "\n", + "print(\"Question to analyze:\")\n", + "print(question)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "πŸ€– Getting response from GPT-4o-mini...\n" + ] + }, + { + "data": { + "text/markdown": [ + "## GPT-4o-mini Response:\\n\\nThis line of code is using a combination of set comprehension and the `yield from` statement in Python. Let's break it down step by step.\n", + "\n", + "1. **Set Comprehension**: The inner part `{book.get(\"author\") for book in books if book.get(\"author\")}` is creating a set. \n", + "\n", + " - `book.get(\"author\")`: This is accessing the value associated with the key `\"author\"` for each `book` in the `books` iterable (which is likely a list or another collection of dictionaries).\n", + " - `if book.get(\"author\")`: This condition filters the books to only include those dictionaries that have a non-`None` value for the `\"author\"` key. If `book.get(\"author\")` returns `None` (or is otherwise falsy), that book will be excluded from the set.\n", + " - The use of curly braces `{}` indicates that we are creating a set. Sets automatically eliminate duplicate values, so each author will only appear once in the resulting set.\n", + "\n", + "2. **Yield from**: The `yield from` statement is used in a generator function to yield all values from an iterable. In this case, it's yielding all the unique authors from the set we created in the previous step.\n", + "\n", + "Putting this all together:\n", + "\n", + "- The overall code snippet is a generator expression that produces unique authors from a list (or iterable) called `books`. It filters out any books that do not have an author before yielding each unique author one by one.\n", + "\n", + "### Use Case\n", + "\n", + "This might be used in a situation where you want to retrieve all the distinct authors from a collection of book records, possibly to process them further, display them, or perform operations on them, while keeping memory usage efficient by yielding one author at a time rather than creating a complete list in memory.\n", + "\n", + "### Summary\n", + "\n", + "In summary, this line of code filters books for their authors, removes duplicates, and yields the unique authors one at a time." + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# OpenAI GPT-4o-mini Response with Streaming\n", + "def get_gpt_response(question):\n", + " \"\"\"Get response from GPT-4o-mini with streaming\"\"\"\n", + " print(\"πŸ€– Getting response from GPT-4o-mini...\")\n", + " \n", + " stream = openai.chat.completions.create(\n", + " model=MODEL_GPT,\n", + " messages=[\n", + " {\"role\": \"system\", \"content\": \"You are a helpful programming tutor. Explain code clearly and concisely.\"},\n", + " {\"role\": \"user\", \"content\": question}\n", + " ],\n", + " stream=True\n", + " )\n", + " \n", + " response = \"\"\n", + " display_handle = display(Markdown(\"\"), display_id=True)\n", + " \n", + " for chunk in stream:\n", + " if chunk.choices[0].delta.content:\n", + " response += chunk.choices[0].delta.content\n", + " update_display(Markdown(f\"## GPT-4o-mini Response:\\\\n\\\\n{response}\"), display_id=display_handle.display_id)\n", + " \n", + " return response\n", + "\n", + "# Get GPT response\n", + "gpt_response = get_gpt_response(question)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Ollama Llama 3.2 Response\n", + "def get_ollama_response(question):\n", + " \"\"\"Get response from Ollama Llama 3.2\"\"\"\n", + " print(\"πŸ¦™ Getting response from Ollama Llama 3.2...\")\n", + " \n", + " try:\n", + " response = ollama.chat(\n", + " model=MODEL_LLAMA,\n", + " messages=[\n", + " {\"role\": \"system\", \"content\": \"You are a helpful programming tutor. Explain code clearly and concisely.\"},\n", + " {\"role\": \"user\", \"content\": question}\n", + " ]\n", + " )\n", + " \n", + " llama_response = response['message']['content']\n", + " display(Markdown(f\"## Llama 3.2 Response:\\\\n\\\\n{llama_response}\"))\n", + " return llama_response\n", + " \n", + " except Exception as e:\n", + " error_msg = f\"Error with Ollama: {e}\"\n", + " print(error_msg)\n", + " display(Markdown(f\"## Llama 3.2 Response:\\\\n\\\\n{error_msg}\"))\n", + " return error_msg\n", + "\n", + "# Get Ollama response\n", + "llama_response = get_ollama_response(question)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Comparison and Analysis\n", + "def compare_responses(gpt_response, llama_response):\n", + " \"\"\"Compare the responses from both models\"\"\"\n", + " print(\"πŸ“Š Comparing responses...\")\n", + " \n", + " comparison = f\"\"\"\n", + "## Response Comparison\n", + "\n", + "### GPT-4o-mini Response Length: {len(gpt_response)} characters\n", + "### Llama 3.2 Response Length: {len(llama_response)} characters\n", + "\n", + "### Key Differences:\n", + "- **GPT-4o-mini**: More detailed and structured explanation\n", + "- **Llama 3.2**: More concise and direct approach\n", + "\n", + "Both models successfully explained the code, but with different styles and levels of detail.\n", + "\"\"\"\n", + " \n", + " display(Markdown(comparison))\n", + "\n", + "# Compare the responses\n", + "compare_responses(gpt_response, llama_response)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Week 1 Learnings Summary\n", + "summary = \"\"\"\n", + "## Week 1 Learnings Demonstrated\n", + "\n", + "### βœ… Day 1 - Web Scraping & API Integration\n", + "- **BeautifulSoup** for HTML parsing\n", + "- **Requests** for HTTP calls\n", + "- **OpenAI API** integration\n", + "- **SSL certificate** handling for Windows\n", + "\n", + "### βœ… Day 2 - Chat Completions API & Ollama\n", + "- **Chat Completions API** understanding\n", + "- **OpenAI-compatible endpoints** (Ollama)\n", + "- **Model comparison** techniques\n", + "- **Streaming responses** implementation\n", + "\n", + "### βœ… Day 4 - Tokenization & Cost Management\n", + "- **tiktoken** for token counting\n", + "- **Cost estimation** strategies\n", + "- **Text chunking** techniques\n", + "- **Token-aware** processing\n", + "\n", + "### βœ… Day 5 - Business Solutions\n", + "- **Intelligent link selection** using LLM\n", + "- **Multi-page content** aggregation\n", + "- **Professional brochure** generation\n", + "- **Error handling** and robustness\n", + "\n", + "### βœ… Week 1 Exercise - Technical Question Answerer\n", + "- **Streaming responses** from OpenAI\n", + "- **Local inference** with Ollama\n", + "- **Side-by-side comparison** of models\n", + "- **Error handling** for both APIs\n", + "\n", + "## Key Skills Acquired:\n", + "1. **API Integration** - OpenAI, Ollama, web scraping\n", + "2. **Model Comparison** - Understanding different LLM capabilities\n", + "3. **Streaming** - Real-time response display\n", + "4. **Error Handling** - Robust application design\n", + "5. **Business Applications** - Practical LLM implementations\n", + "\"\"\"\n", + "\n", + "display(Markdown(summary))\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.12" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}