diff --git a/.gitignore b/.gitignore
index a47cefc..8a77daf 100644
--- a/.gitignore
+++ b/.gitignore
@@ -189,3 +189,6 @@ nohup.out
*.png
scraper_cache/
+
+# WandB local sync data.
+wandb/
diff --git a/community-contributions/Keshvi_Web2Quiz/Web2Quiz.ipynb b/community-contributions/Keshvi_Web2Quiz/Web2Quiz.ipynb
new file mode 100644
index 0000000..1609d81
--- /dev/null
+++ b/community-contributions/Keshvi_Web2Quiz/Web2Quiz.ipynb
@@ -0,0 +1,300 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "source": [
+ "## Web2Quiz: Generator Quiz from webpage content."
+ ],
+ "metadata": {
+ "id": "n3vd295elWxh"
+ },
+ "id": "n3vd295elWxh"
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "f4484fcf-8b39-4c3f-9674-37970ed71988",
+ "metadata": {
+ "id": "f4484fcf-8b39-4c3f-9674-37970ed71988"
+ },
+ "outputs": [],
+ "source": [
+ "#.env upload\n",
+ "from google.colab import files\n",
+ "uploaded = files.upload()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "!pip install dotenv\n"
+ ],
+ "metadata": {
+ "id": "VTpN_jVbMKuk"
+ },
+ "id": "VTpN_jVbMKuk",
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "import os\n",
+ "from dotenv import load_dotenv"
+ ],
+ "metadata": {
+ "id": "twYi9eJwL2h1"
+ },
+ "id": "twYi9eJwL2h1",
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "load_dotenv(override=True)\n",
+ "api_key = os.getenv('OPENROUTER_KEY')\n",
+ "\n",
+ "# Check the key\n",
+ "if not api_key:\n",
+ " print(\"No API key was found - please head over to the troubleshooting notebook in this folder to identify & fix!\")\n",
+ "# elif not api_key.startswith(\"sk-proj-\"):\n",
+ "# print(\"An API key was found, but it doesn't start sk-proj-; please check you're using the right key - see troubleshooting notebook\")\n",
+ "elif api_key.strip() != api_key:\n",
+ " print(\"An API key was found, but it looks like it might have space or tab characters at the start or end - please remove them - see troubleshooting notebook\")\n",
+ "else:\n",
+ " print(\"API key found and looks good so far!\")\n"
+ ],
+ "metadata": {
+ "id": "NRnUTEkZL2eZ"
+ },
+ "id": "NRnUTEkZL2eZ",
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "!pip install openai"
+ ],
+ "metadata": {
+ "id": "RRuKJ_pzL2be"
+ },
+ "id": "RRuKJ_pzL2be",
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "!pip install requests beautifulsoup4\n",
+ "!pip install selenium"
+ ],
+ "metadata": {
+ "id": "DWsPpdjOVPTW"
+ },
+ "id": "DWsPpdjOVPTW",
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "from bs4 import BeautifulSoup\n",
+ "import requests\n",
+ "from tempfile import mkdtemp\n",
+ "from selenium import webdriver\n",
+ "from selenium.webdriver.chrome.options import Options\n",
+ "from selenium.webdriver.support.ui import WebDriverWait\n",
+ "from selenium.webdriver.support import expected_conditions as EC\n",
+ "from selenium.webdriver.common.by import By\n",
+ "\n",
+ "class Website:\n",
+ " def __init__(self, url, use_selenium=False):\n",
+ " \"\"\"\n",
+ " Create Website object from the given URL.\n",
+ " If use_selenium=True, fetch page with Selenium.\n",
+ " Otherwise, use requests + BeautifulSoup.\n",
+ " \"\"\"\n",
+ " self.url = url\n",
+ " self.title = \"\"\n",
+ " self.text = \"\"\n",
+ " self.use_selenium = use_selenium\n",
+ "\n",
+ " if self.use_selenium:\n",
+ " html = self._fetch_with_selenium()\n",
+ " else:\n",
+ " html = self._fetch_with_requests()\n",
+ "\n",
+ " if not html:\n",
+ " self.title = \"Error fetching page\"\n",
+ " self.text = \"Could not retrieve HTML content.\"\n",
+ " return\n",
+ "\n",
+ " soup = BeautifulSoup(html, \"html.parser\")\n",
+ " self.title = soup.title.string if soup.title else \"No title found\"\n",
+ "\n",
+ " # content_div = soup.find('div', id='content')\n",
+ " if soup.body:\n",
+ " for irrelevant in soup.body([\"script\", \"style\", \"img\", \"input\", \"header\", \"footer\", \"nav\", \"aside\"]):\n",
+ " irrelevant.decompose()\n",
+ " self.text = soup.body.get_text(separator=\"\\n\", strip=True)\n",
+ " else:\n",
+ " self.text = \"No body tag found in the HTML.\"\n",
+ "\n",
+ " # Basic html scrapper\n",
+ " def _fetch_with_requests(self):\n",
+ " \"\"\"Fetch HTML using requests.\"\"\"\n",
+ " try:\n",
+ " headers = {\"User-Agent\": \"Mozilla/5.0\"}\n",
+ " response = requests.get(self.url, headers=headers, timeout=10)\n",
+ " response.raise_for_status()\n",
+ " return response.text\n",
+ " except requests.exceptions.RequestException as e:\n",
+ " print(f\"Error fetching with requests: {e}\")\n",
+ " return None\n",
+ "\n",
+ " # Dynamic html scrapper\n",
+ " def _fetch_with_selenium(self):\n",
+ " \"\"\"Fetch HTML using Selenium with improved options.\"\"\"\n",
+ " options = Options()\n",
+ " options.add_argument(\"--no-sandbox\")\n",
+ " options.add_argument(\"--disable-dev-shm-usage\")\n",
+ " options.add_argument(\"--headless\")\n",
+ " options.add_argument(f\"--user-data-dir={mkdtemp()}\")\n",
+ "\n",
+ " driver = None\n",
+ " try:\n",
+ " driver = webdriver.Chrome(options=options)\n",
+ " driver.get(self.url)\n",
+ "\n",
+ " WebDriverWait(driver, 10).until(\n",
+ " EC.presence_of_element_located((By.TAG_NAME, \"body\"))\n",
+ " )\n",
+ "\n",
+ " html = driver.page_source\n",
+ " return html\n",
+ " except Exception as e:\n",
+ " print(f\"An error occurred during Selenium fetch: {e}\")\n",
+ " return None\n",
+ " finally:\n",
+ " if driver:\n",
+ " driver.quit()\n",
+ "\n"
+ ],
+ "metadata": {
+ "id": "PzBP0tXXcrP-"
+ },
+ "id": "PzBP0tXXcrP-",
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "site1 = Website(\"https://en.wikipedia.org/wiki/Integration_testing\", use_selenium=False)\n",
+ "print(\"Title:\", site1.title)\n",
+ "print(\"Text preview:\", site1.text[:200])\n",
+ "\n",
+ "site2 = Website(\"https://www.tpointtech.com/java-for-loop\", use_selenium=True)\n",
+ "print(\"Title:\", site2.title)\n",
+ "print(\"Text preview:\", site2.text[:200])"
+ ],
+ "metadata": {
+ "id": "vsNmh5b5c6Gq"
+ },
+ "id": "vsNmh5b5c6Gq",
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "# Step 1: Create your prompts\n",
+ "system_prompt = f\"You are a MCQ quiz generator. Analyze the provided TEXT and filter CONTENT relevent to {site1.title}. Then based on the relevant CONTENT generate 10 MCQs. List all correct options at the end.\"\n",
+ "user_prompt = f\"Below is provided TEXT : \\n{site1.text}\"\n",
+ "\n",
+ "# Step 2: Make the messages list\n",
+ "messages = [\n",
+ " {\"role\": \"system\", \"content\": system_prompt},\n",
+ " {\"role\": \"user\", \"content\": user_prompt}\n",
+ "]\n",
+ "\n",
+ "# Step 3: Call OpenAI\n",
+ "openai = OpenAI(base_url=\"https://openrouter.ai/api/v1\", api_key=api_key)\n",
+ "\n",
+ "# Step 4: print the result\n",
+ "response = openai.chat.completions.create(model=\"qwen/qwen2.5-vl-72b-instruct:free\", messages=messages)\n",
+ "print(response.choices[0].message.content)"
+ ],
+ "metadata": {
+ "collapsed": true,
+ "id": "BYdc1w70QFD2"
+ },
+ "id": "BYdc1w70QFD2",
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "# Step 1: Create your prompts\n",
+ "system_prompt = f\"You are a MCQ quiz generator. Analyze the provided TEXT and filter CONTENT relevent to {site2.title}. Then based on the relevant CONTENT generate 10 MCQs. List all correct options at the end.\"\n",
+ "user_prompt = f\"Below is provided TEXT : \\n{site2.text}\"\n",
+ "\n",
+ "# Step 2: Make the messages list\n",
+ "messages = [\n",
+ " {\"role\": \"system\", \"content\": system_prompt},\n",
+ " {\"role\": \"user\", \"content\": user_prompt}\n",
+ "]\n",
+ "\n",
+ "# Step 3: Call OpenAI\n",
+ "openai = OpenAI(base_url=\"https://openrouter.ai/api/v1\", api_key=api_key)\n",
+ "\n",
+ "# Step 4: print the result\n",
+ "response = openai.chat.completions.create(model=\"qwen/qwen2.5-vl-72b-instruct:free\", messages=messages)\n",
+ "print(response.choices[0].message.content)"
+ ],
+ "metadata": {
+ "id": "Rv8vxFHtQFBm"
+ },
+ "id": "Rv8vxFHtQFBm",
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [],
+ "metadata": {
+ "id": "o5tIkQ95_2Hc"
+ },
+ "id": "o5tIkQ95_2Hc",
+ "execution_count": null,
+ "outputs": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.12"
+ },
+ "colab": {
+ "provenance": []
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
\ No newline at end of file
diff --git a/community-contributions/Market_Research_Agent.ipynb b/community-contributions/Market_Research_Agent.ipynb
new file mode 100644
index 0000000..52dfdf4
--- /dev/null
+++ b/community-contributions/Market_Research_Agent.ipynb
@@ -0,0 +1,650 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "d15d8294-3328-4e07-ad16-8a03e9bbfdb9",
+ "metadata": {},
+ "source": [
+ "# YOUR FIRST LAB\n",
+ "### Please read this section. This is valuable to get you prepared, even if it's a long read -- it's important stuff.\n",
+ "\n",
+ "## Your first Frontier LLM Project\n",
+ "\n",
+ "Let's build a useful LLM solution - in a matter of minutes.\n",
+ "\n",
+ "By the end of this course, you will have built an autonomous Agentic AI solution with 7 agents that collaborate to solve a business problem. All in good time! We will start with something smaller...\n",
+ "\n",
+ "Our goal is to code a new kind of Web Browser. Give it a URL, and it will respond with a summary. The Reader's Digest of the internet!!\n",
+ "\n",
+ "Before starting, you should have completed the setup for [PC](../SETUP-PC.md) or [Mac](../SETUP-mac.md) and you hopefully launched this jupyter lab from within the project root directory, with your environment activated.\n",
+ "\n",
+ "## If you're new to Jupyter Lab\n",
+ "\n",
+ "Welcome to the wonderful world of Data Science experimentation! Once you've used Jupyter Lab, you'll wonder how you ever lived without it. Simply click in each \"cell\" with code in it, such as the cell immediately below this text, and hit Shift+Return to execute that cell. As you wish, you can add a cell with the + button in the toolbar, and print values of variables, or try out variations. \n",
+ "\n",
+ "I've written a notebook called [Guide to Jupyter](Guide%20to%20Jupyter.ipynb) to help you get more familiar with Jupyter Labs, including adding Markdown comments, using `!` to run shell commands, and `tqdm` to show progress.\n",
+ "\n",
+ "## If you're new to the Command Line\n",
+ "\n",
+ "Please see these excellent guides: [Command line on PC](https://chatgpt.com/share/67b0acea-ba38-8012-9c34-7a2541052665) and [Command line on Mac](https://chatgpt.com/canvas/shared/67b0b10c93a081918210723867525d2b). \n",
+ "\n",
+ "## If you'd prefer to work in IDEs\n",
+ "\n",
+ "If you're more comfortable in IDEs like VSCode, Cursor or PyCharm, they both work great with these lab notebooks too. \n",
+ "If you'd prefer to work in VSCode, [here](https://chatgpt.com/share/676f2e19-c228-8012-9911-6ca42f8ed766) are instructions from an AI friend on how to configure it for the course.\n",
+ "\n",
+ "## If you'd like to brush up your Python\n",
+ "\n",
+ "I've added a notebook called [Intermediate Python](Intermediate%20Python.ipynb) to get you up to speed. But you should give it a miss if you already have a good idea what this code does: \n",
+ "`yield from {book.get(\"author\") for book in books if book.get(\"author\")}`\n",
+ "\n",
+ "## I am here to help\n",
+ "\n",
+ "If you have any problems at all, please do reach out. \n",
+ "I'm available through the platform, or at ed@edwarddonner.com, or at https://www.linkedin.com/in/eddonner/ if you'd like to connect (and I love connecting!) \n",
+ "And this is new to me, but I'm also trying out X/Twitter at [@edwarddonner](https://x.com/edwarddonner) - if you're on X, please show me how it's done ๐ \n",
+ "\n",
+ "## More troubleshooting\n",
+ "\n",
+ "Please see the [troubleshooting](troubleshooting.ipynb) notebook in this folder to diagnose and fix common problems. At the very end of it is a diagnostics script with some useful debug info.\n",
+ "\n",
+ "## For foundational technical knowledge (eg Git, APIs, debugging) \n",
+ "\n",
+ "If you're relatively new to programming -- I've got your back! While it's ideal to have some programming experience for this course, there's only one mandatory prerequisite: plenty of patience. ๐ I've put together a set of self-study guides that cover Git and GitHub, APIs and endpoints, beginner python and more.\n",
+ "\n",
+ "This covers Git and GitHub; what they are, the difference, and how to use them: \n",
+ "https://github.com/ed-donner/agents/blob/main/guides/03_git_and_github.ipynb\n",
+ "\n",
+ "This covers technical foundations: \n",
+ "ChatGPT vs API; taking screenshots; Environment Variables; Networking basics; APIs and endpoints: \n",
+ "https://github.com/ed-donner/agents/blob/main/guides/04_technical_foundations.ipynb\n",
+ "\n",
+ "This covers Python for beginners, and making sure that a `NameError` never trips you up: \n",
+ "https://github.com/ed-donner/agents/blob/main/guides/06_python_foundations.ipynb\n",
+ "\n",
+ "This covers the essential techniques for figuring out errors: \n",
+ "https://github.com/ed-donner/agents/blob/main/guides/08_debugging.ipynb\n",
+ "\n",
+ "And you'll find other useful guides in the same folder in GitHub. Some information applies to my other Udemy course (eg Async Python) but most of it is very relevant for LLM engineering.\n",
+ "\n",
+ "## If this is old hat!\n",
+ "\n",
+ "If you're already comfortable with today's material, please hang in there; you can move swiftly through the first few labs - we will get much more in depth as the weeks progress. Ultimately we will fine-tune our own LLM to compete with OpenAI!\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | \n",
+ " \n",
+ " Please read - important note\n",
+ " The way I collaborate with you may be different to other courses you've taken. I prefer not to type code while you watch. Rather, I execute Jupyter Labs, like this, and give you an intuition for what's going on. My suggestion is that you carefully execute this yourself, after watching the lecture. Add print statements to understand what's going on, and then come up with your own variations. If you have a Github account, use this to showcase your variations. Not only is this essential practice, but it demonstrates your skills to others, including perhaps future clients or employers...\n",
+ " | \n",
+ "
\n",
+ "
\n",
+ "\n",
+ " \n",
+ " \n",
+ " \n",
+ " | \n",
+ " \n",
+ " This code is a live resource - keep an eye out for my emails\n",
+ " I push updates to the code regularly. As people ask questions, I add more examples or improved commentary. As a result, you'll notice that the code below isn't identical to the videos. Everything from the videos is here; but I've also added better explanations and new models like DeepSeek. Consider this like an interactive book.
\n",
+ " I try to send emails regularly with important updates related to the course. You can find this in the 'Announcements' section of Udemy in the left sidebar. You can also choose to receive my emails via your Notification Settings in Udemy. I'm respectful of your inbox and always try to add value with my emails!\n",
+ " \n",
+ " | \n",
+ "
\n",
+ "
\n",
+ "\n",
+ " \n",
+ " \n",
+ " \n",
+ " | \n",
+ " \n",
+ " Business value of these exercises\n",
+ " A final thought. While I've designed these notebooks to be educational, I've also tried to make them enjoyable. We'll do fun things like have LLMs tell jokes and argue with each other. But fundamentally, my goal is to teach skills you can apply in business. I'll explain business implications as we go, and it's worth keeping this in mind: as you build experience with models and techniques, think of ways you could put this into action at work today. Please do contact me if you'd like to discuss more or if you have ideas to bounce off me.\n",
+ " | \n",
+ "
\n",
+ "
"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "4e2a9393-7767-488e-a8bf-27c12dca35bd",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# imports\n",
+ "\n",
+ "import os\n",
+ "import requests\n",
+ "from dotenv import load_dotenv\n",
+ "from bs4 import BeautifulSoup\n",
+ "from IPython.display import Markdown, display\n",
+ "from openai import OpenAI\n",
+ "\n",
+ "# If you get an error running this cell, then please head over to the troubleshooting notebook!"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "6900b2a8-6384-4316-8aaa-5e519fca4254",
+ "metadata": {},
+ "source": [
+ "# Connecting to OpenAI (or Ollama)\n",
+ "\n",
+ "The next cell is where we load in the environment variables in your `.env` file and connect to OpenAI. \n",
+ "\n",
+ "If you'd like to use free Ollama instead, please see the README section \"Free Alternative to Paid APIs\", and if you're not sure how to do this, there's a full solution in the solutions folder (day1_with_ollama.ipynb).\n",
+ "\n",
+ "## Troubleshooting if you have problems:\n",
+ "\n",
+ "Head over to the [troubleshooting](troubleshooting.ipynb) notebook in this folder for step by step code to identify the root cause and fix it!\n",
+ "\n",
+ "If you make a change, try restarting the \"Kernel\" (the python process sitting behind this notebook) by Kernel menu >> Restart Kernel and Clear Outputs of All Cells. Then try this notebook again, starting at the top.\n",
+ "\n",
+ "Or, contact me! Message me or email ed@edwarddonner.com and we will get this to work.\n",
+ "\n",
+ "Any concerns about API costs? See my notes in the README - costs should be minimal, and you can control it at every point. You can also use Ollama as a free alternative, which we discuss during Day 2."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "7b87cadb-d513-4303-baee-a37b6f938e4d",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Load environment variables in a file called .env\n",
+ "\n",
+ "load_dotenv(override=True)\n",
+ "api_key = os.getenv('OPENAI_API_KEY')\n",
+ "\n",
+ "# Check the key\n",
+ "\n",
+ "if not api_key:\n",
+ " print(\"No API key was found - please head over to the troubleshooting notebook in this folder to identify & fix!\")\n",
+ "elif not api_key.startswith(\"sk-proj-\"):\n",
+ " print(\"An API key was found, but it doesn't start sk-proj-; please check you're using the right key - see troubleshooting notebook\")\n",
+ "elif api_key.strip() != api_key:\n",
+ " print(\"An API key was found, but it looks like it might have space or tab characters at the start or end - please remove them - see troubleshooting notebook\")\n",
+ "else:\n",
+ " print(\"API key found and looks good so far!\")\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "019974d9-f3ad-4a8a-b5f9-0a3719aea2d3",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "openai = OpenAI()\n",
+ "\n",
+ "# If this doesn't work, try Kernel menu >> Restart Kernel and Clear Outputs Of All Cells, then run the cells from the top of this notebook down.\n",
+ "# If it STILL doesn't work (horrors!) then please see the Troubleshooting notebook in this folder for full instructions"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "442fc84b-0815-4f40-99ab-d9a5da6bda91",
+ "metadata": {},
+ "source": [
+ "# Let's make a quick call to a Frontier model to get started, as a preview!"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "a58394bf-1e45-46af-9bfd-01e24da6f49a",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# To give you a preview -- calling OpenAI with these messages is this easy. Any problems, head over to the Troubleshooting notebook.\n",
+ "\n",
+ "message = \"Hello, GPT! This is my first ever message to you! Hi!\"\n",
+ "response = openai.chat.completions.create(model=\"gpt-4o-mini\", messages=[{\"role\":\"user\", \"content\":message}])\n",
+ "print(response.choices[0].message.content)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "2aa190e5-cb31-456a-96cc-db109919cd78",
+ "metadata": {},
+ "source": [
+ "## OK onwards with our first project"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "c5e793b2-6775-426a-a139-4848291d0463",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# A class to represent a Webpage\n",
+ "# If you're not familiar with Classes, check out the \"Intermediate Python\" notebook\n",
+ "\n",
+ "# Some websites need you to use proper headers when fetching them:\n",
+ "headers = {\n",
+ " \"User-Agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36\"\n",
+ "}\n",
+ "\n",
+ "class Website:\n",
+ "\n",
+ " def __init__(self, url):\n",
+ " \"\"\"\n",
+ " Create this Website object from the given url using the BeautifulSoup library\n",
+ " \"\"\"\n",
+ " self.url = url\n",
+ " response = requests.get(url, headers=headers)\n",
+ " soup = BeautifulSoup(response.content, 'html.parser')\n",
+ " self.title = soup.title.string if soup.title else \"No title found\"\n",
+ " for irrelevant in soup.body([\"script\", \"style\", \"img\", \"input\"]):\n",
+ " irrelevant.decompose()\n",
+ " self.text = soup.body.get_text(separator=\"\\n\", strip=True)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "2ef960cf-6dc2-4cda-afb3-b38be12f4c97",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Let's try one out. Change the website and add print statements to follow along.\n",
+ "\n",
+ "ed = Website(\"https://edwarddonner.com\")\n",
+ "print(ed.title)\n",
+ "print(ed.text)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "509a1ee7-de00-4c83-8dd8-017dcc638850",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "rudra=Website(\"https://github.com/RudraDudhat2509/\")\n",
+ "print(rudra.title)\n",
+ "print(rudra.text)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "6a478a0c-2c53-48ff-869c-4d08199931e1",
+ "metadata": {},
+ "source": [
+ "## Types of prompts\n",
+ "\n",
+ "You may know this already - but if not, you will get very familiar with it!\n",
+ "\n",
+ "Models like GPT4o have been trained to receive instructions in a particular way.\n",
+ "\n",
+ "They expect to receive:\n",
+ "\n",
+ "**A system prompt** that tells them what task they are performing and what tone they should use\n",
+ "\n",
+ "**A user prompt** -- the conversation starter that they should reply to"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "abdb8417-c5dc-44bc-9bee-2e059d162699",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Define our system prompt - you can experiment with this later, changing the last sentence to 'Respond in markdown in Spanish.\"\n",
+ "\n",
+ "system_prompt = \"You are an assistant that analyzes the contents of a website \\\n",
+ "and provides a short summary, ignoring text that might be navigation related. \\\n",
+ "Respond in markdown. Always use Points and simple english. Never use hyphens. Stick to the point\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "f0275b1b-7cfe-4f9d-abfa-7650d378da0c",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# A function that writes a User Prompt that asks for summaries of websites:\n",
+ "\n",
+ "def user_prompt_for(website):\n",
+ " user_prompt = f\"You are looking at a website titled {website.title}\"\n",
+ " user_prompt += \"\\nThe contents of this website is as follows; \\\n",
+ "please provide a short summary of this website in markdown. \\\n",
+ "If it includes news or announcements, then summarize these too.\\n\\n\"\n",
+ " user_prompt += website.text\n",
+ " return user_prompt"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "26448ec4-5c00-4204-baec-7df91d11ff2e",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "print(user_prompt_for(ed))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "ea211b5f-28e1-4a86-8e52-c0b7677cadcc",
+ "metadata": {},
+ "source": [
+ "## Messages\n",
+ "\n",
+ "The API from OpenAI expects to receive messages in a particular structure.\n",
+ "Many of the other APIs share this structure:\n",
+ "\n",
+ "```python\n",
+ "[\n",
+ " {\"role\": \"system\", \"content\": \"system message goes here\"},\n",
+ " {\"role\": \"user\", \"content\": \"user message goes here\"}\n",
+ "]\n",
+ "```\n",
+ "To give you a preview, the next 2 cells make a rather simple call - we won't stretch the mighty GPT (yet!)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "f25dcd35-0cd0-4235-9f64-ac37ed9eaaa5",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "messages = [\n",
+ " {\"role\": \"system\", \"content\": \"You are a snarky assistant\"},\n",
+ " {\"role\": \"user\", \"content\": \"What is 2 + 2?\"}\n",
+ "]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "21ed95c5-7001-47de-a36d-1d6673b403ce",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# To give you a preview -- calling OpenAI with system and user messages:\n",
+ "\n",
+ "response = openai.chat.completions.create(model=\"gpt-4o-mini\", messages=messages)\n",
+ "print(response.choices[0].message.content)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "d06e8d78-ce4c-4b05-aa8e-17050c82bb47",
+ "metadata": {},
+ "source": [
+ "## And now let's build useful messages for GPT-4o-mini, using a function"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "0134dfa4-8299-48b5-b444-f2a8c3403c88",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# See how this function creates exactly the format above\n",
+ "\n",
+ "def messages_for(website):\n",
+ " return [\n",
+ " {\"role\": \"system\", \"content\": system_prompt},\n",
+ " {\"role\": \"user\", \"content\": user_prompt_for(website)}\n",
+ " ]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "36478464-39ee-485c-9f3f-6a4e458dbc9c",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Try this out, and then try for a few more websites\n",
+ "\n",
+ "messages_for(ed)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "16f49d46-bf55-4c3e-928f-68fc0bf715b0",
+ "metadata": {},
+ "source": [
+ "## Time to bring it together - the API for OpenAI is very simple!"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "905b9919-aba7-45b5-ae65-81b3d1d78e34",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# And now: call the OpenAI API. You will get very familiar with this!\n",
+ "\n",
+ "def summarize(url):\n",
+ " website = Website(url)\n",
+ " response = openai.chat.completions.create(\n",
+ " model = \"gpt-4o-mini\",\n",
+ " messages = messages_for(website)\n",
+ " )\n",
+ " return response.choices[0].message.content"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "05e38d41-dfa4-4b20-9c96-c46ea75d9fb5",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "summarize(\"https://edwarddonner.com\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "3d926d59-450e-4609-92ba-2d6f244f1342",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# A function to display this nicely in the Jupyter output, using markdown\n",
+ "\n",
+ "def display_summary(url):\n",
+ " summary = summarize(url)\n",
+ " display(Markdown(summary))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "3018853a-445f-41ff-9560-d925d1774b2f",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "display_summary(\"https://edwarddonner.com\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "b3bcf6f4-adce-45e9-97ad-d9a5d7a3a624",
+ "metadata": {},
+ "source": [
+ "# Let's try more websites\n",
+ "\n",
+ "Note that this will only work on websites that can be scraped using this simplistic approach.\n",
+ "\n",
+ "Websites that are rendered with Javascript, like React apps, won't show up. See the community-contributions folder for a Selenium implementation that gets around this. You'll need to read up on installing Selenium (ask ChatGPT!)\n",
+ "\n",
+ "Also Websites protected with CloudFront (and similar) may give 403 errors - many thanks Andy J for pointing this out.\n",
+ "\n",
+ "But many websites will work just fine!"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "45d83403-a24c-44b5-84ac-961449b4008f",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "display_summary(\"https://cnn.com\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "75e9fd40-b354-4341-991e-863ef2e59db7",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "display_summary(\"https://github.com/RudraDudhat2509\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "c951be1a-7f1b-448f-af1f-845978e47e2c",
+ "metadata": {},
+ "source": [
+ "\n",
+ " \n",
+ " \n",
+ " \n",
+ " | \n",
+ " \n",
+ " Business applications\n",
+ " In this exercise, you experienced calling the Cloud API of a Frontier Model (a leading model at the frontier of AI) for the first time. We will be using APIs like OpenAI at many stages in the course, in addition to building our own LLMs.\n",
+ "\n",
+ "More specifically, we've applied this to Summarization - a classic Gen AI use case to make a summary. This can be applied to any business vertical - summarizing the news, summarizing financial performance, summarizing a resume in a cover letter - the applications are limitless. Consider how you could apply Summarization in your business, and try prototyping a solution.\n",
+ " | \n",
+ "
\n",
+ "
\n",
+ "\n",
+ "\n",
+ " \n",
+ " \n",
+ " \n",
+ " | \n",
+ " \n",
+ " Before you continue - now try yourself\n",
+ " Use the cell below to make your own simple commercial example. Stick with the summarization use case for now. Here's an idea: write something that will take the contents of an email, and will suggest an appropriate short subject line for the email. That's the kind of feature that might be built into a commercial email tool.\n",
+ " | \n",
+ "
\n",
+ "
"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "00743dac-0e70-45b7-879a-d7293a6f68a6",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Step 1: Create your prompts\n",
+ "\n",
+ "system_prompt = \"\"\"You are to act like a Mckinsey Consultant specializing in market research. \n",
+ "1) You are to follow legal guidelines and never give immoral advice. \n",
+ "2) Your job is to maximise profits for your clients by analysing their companies initiatives and giving out recommendations for newer initiatives.\\n \n",
+ "3) Follow industry frameworks for reponses always give simple answers and stick to the point.\n",
+ "4) If possible try to see what competitors exist and what market gap can your clients company exploit.\n",
+ "5) Further more, USe SWOT, Porters 5 forces to summarize your recommendations, Give confidence score with every recommendations\n",
+ "6) Try to give unique solutions by seeing what the market gap is, if market gap is ambiguious skip this step\n",
+ "7) add an estimate of what rate the revenue of the comapany will increase at provided they follow the guidelines, give conservating estimates keeping in account non ideal conditions.\n",
+ "8) if the website isnt of a company or data isnt available, give out an error message along the lines of more data required for analysis\"\"\"\n",
+ "\n",
+ "def makereq(url):\n",
+ " website=Website(url)\n",
+ " user_prompt=f\"This is my companies website: {website.title}. Could you help me increase profits by giving me recommendations on what i should do. here is the content of my website:\\n\"\n",
+ " user_prompt+=website.text;\n",
+ " return [\n",
+ " {\"role\": \"system\", \"content\": system_prompt},\n",
+ " {\"role\": \"user\", \"content\": user_prompt}\n",
+ " ]\n",
+ "def recommend(url):\n",
+ " response = openai.chat.completions.create(\n",
+ " model = \"gpt-4o-mini\",\n",
+ " messages = makereq(url))\n",
+ " display(Markdown(response.choices[0].message.content))\n",
+ " \n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "f4484fcf-8b39-4c3f-9674-37970ed71988",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "recommend(\"https://www.swiggy.com/corporate/\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "db1be9b9-b32e-4e8d-83df-0b6f822ac7b2",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "recommend(\"https://playvalorant.com/en-us/\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "d9089b4a-67ee-456e-b35d-ca00c2f9f73a",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "recommend(\"https://nexora-labs.com/\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "1e042d74-456a-4ec4-bdb8-4b08603b5e66",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "recommend(\"https://github.com/RudraDudhat2509/\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "29187b86-1e35-41bc-bb54-60b3d804b96e",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.13"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/community-contributions/SyntheticDataGenerator_PT.ipynb b/community-contributions/SyntheticDataGenerator_PT.ipynb
new file mode 100644
index 0000000..18cf4c6
--- /dev/null
+++ b/community-contributions/SyntheticDataGenerator_PT.ipynb
@@ -0,0 +1,141 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "d08b387c-53fb-46d2-b083-5eebc3c97e1b",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "!pip install -q --upgrade torch==2.5.1+cu124 torchvision==0.20.1+cu124 torchaudio==2.5.1+cu124 --index-url https://download.pytorch.org/whl/cu124\n",
+ "!pip install -q requests bitsandbytes==0.46.0 transformers==4.48.3 accelerate==1.3.0 openai"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "4f1851b2-890c-427b-8e70-b998efa04c67",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# imports\n",
+ "\n",
+ "import os\n",
+ "import requests\n",
+ "from IPython.display import Markdown, display, update_display\n",
+ "from openai import OpenAI\n",
+ "from google.colab import drive\n",
+ "from huggingface_hub import login\n",
+ "from google.colab import userdata\n",
+ "from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer, BitsAndBytesConfig\n",
+ "import torch"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "c2d334b5-453e-4213-8e1c-905d504d2dc1",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "LLAMA = \"meta-llama/Meta-Llama-3.1-8B-Instruct\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "c1b3684c-c170-45f2-a83d-7e6e2ca1e23b",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "hf_token = userdata.get('HF_TOKEN')\n",
+ "login(hf_token, add_to_git_credential=True)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "8c1b6dae-3213-4d68-8fa1-d195704790dc",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "openai_api_key = userdata.get('OPENAI_API_KEY')\n",
+ "openai = OpenAI(api_key=openai_api_key)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "988974c7-814c-478a-be7b-0928b0efdbab",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "system_message = \"You are an assistant that produces synthetic test data. The fields, data type of the field like numeric, date, alphanumeric etc., will be provided. Generate data considering all cases, if it is a workflow audit data then consider all touchpoint movements. Do not provide a python script to generate the data. Provide the data as a json with arrays.\"\n",
+ "user_prompt = \"\"\"Create a synthetic dataset for testing. \n",
+ "Column names and type - \n",
+ "ID: 10 digit number\n",
+ "TRACKING_ID: 13 character alphanumeric\n",
+ "CASE REPORT DATE : DD-MMM-YYYY HH:MM:SS\n",
+ "NOTIFICATION DATE : DD-MMM-YYYY HH:MM:SS\n",
+ "IN SCOPE : (Yes/No)\n",
+ "\"\"\"\n",
+ "\n",
+ "messages = [\n",
+ " {\"role\": \"system\", \"content\": system_message},\n",
+ " {\"role\": \"user\", \"content\": user_prompt}\n",
+ " ]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "40cebc04-abf0-4c61-8b18-f98d3c1fe680",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "quant_config = BitsAndBytesConfig(\n",
+ " load_in_4bit=True,\n",
+ " bnb_4bit_use_double_quant=True,\n",
+ " bnb_4bit_compute_dtype=torch.bfloat16,\n",
+ " bnb_4bit_quant_type=\"nf4\"\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "710ba1af-8e12-4635-933b-00df8d2e3f9d",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "tokenizer = AutoTokenizer.from_pretrained(LLAMA)\n",
+ "tokenizer.pad_token = tokenizer.eos_token\n",
+ "inputs = tokenizer.apply_chat_template(messages, return_tensors=\"pt\").to(\"cuda\")\n",
+ "streamer = TextStreamer(tokenizer)\n",
+ "model = AutoModelForCausalLM.from_pretrained(LLAMA, device_map=\"auto\", quantization_config=quant_config)\n",
+ "outputs = model.generate(inputs, max_new_tokens=2000, streamer=streamer)"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.13.5"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/community-contributions/day2ollamatest.ipynb b/community-contributions/day2ollamatest.ipynb
new file mode 100644
index 0000000..f50b35d
--- /dev/null
+++ b/community-contributions/day2ollamatest.ipynb
@@ -0,0 +1,97 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "4d02ac4b-9cab-42bb-b8a3-123d53913471",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import os\n",
+ "import requests\n",
+ "from bs4 import BeautifulSoup\n",
+ "from IPython.display import Markdown, display\n",
+ "import ollama\n",
+ "\n",
+ "MODEL = \"llama3.2\"\n",
+ "\n",
+ "# Optional headers to avoid request blocks\n",
+ "HEADERS = {\n",
+ " \"User-Agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64)\"\n",
+ "}\n",
+ "\n",
+ "\n",
+ "class Website:\n",
+ " def __init__(self, url):\n",
+ " \"\"\"\n",
+ " Create this Website object from the given url using the BeautifulSoup library\n",
+ " \"\"\"\n",
+ " self.url = url\n",
+ " response = requests.get(url, headers=HEADERS)\n",
+ " soup = BeautifulSoup(response.content, 'html.parser')\n",
+ " self.title = soup.title.string if soup.title else \"No title found\"\n",
+ " if soup.body:\n",
+ " for irrelevant in soup.body([\"script\", \"style\", \"img\", \"input\"]):\n",
+ " irrelevant.decompose()\n",
+ " self.text = soup.body.get_text(separator=\"\\n\", strip=True)\n",
+ " else:\n",
+ " self.text = \"\"\n",
+ "\n",
+ "\n",
+ "system_prompt = \"\"\"You are an assistant that analyzes the contents of a website \n",
+ "and provides a short summary, ignoring navigation text. Respond in markdown.\"\"\"\n",
+ "\n",
+ "\n",
+ "def user_prompt_for(website):\n",
+ " return f\"\"\"You are looking at a website titled {website.title}.\n",
+ "The contents of this website are as follows. Please provide a short summary in markdown. \n",
+ "If it includes news or announcements, summarize these too.\n",
+ "\n",
+ "{website.text}\n",
+ "\"\"\"\n",
+ "\n",
+ "\n",
+ "def summarize(url):\n",
+ " website = Website(url)\n",
+ " response = ollama.chat(\n",
+ " model=MODEL,\n",
+ " messages=[\n",
+ " {\"role\": \"system\", \"content\": system_prompt},\n",
+ " {\"role\": \"user\", \"content\": user_prompt_for(website)}\n",
+ " ]\n",
+ " )\n",
+ " return response['message']['content']\n",
+ "\n",
+ "\n",
+ "def display_summary(url):\n",
+ " summary = summarize(url)\n",
+ " display(Markdown(summary))\n",
+ "\n",
+ "\n",
+ "# Example usage\n",
+ "display_summary(\"https://edwarddonner.com\")\n"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.13"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/community-contributions/fitness-nutrition-planner-agent/README.md b/community-contributions/fitness-nutrition-planner-agent/README.md
new file mode 100644
index 0000000..a447792
--- /dev/null
+++ b/community-contributions/fitness-nutrition-planner-agent/README.md
@@ -0,0 +1,101 @@
+
+# Fitness & Nutrition Planner Agent (Community Contribution)
+
+A tool-using agent that generates a **7โday vegetarian-friendly meal plan** with **calorie/macro targets** and a **consolidated grocery list**. It supports **targeted swaps** (e.g., "swap Tuesday lunch") while honoring dietary patterns, allergies, and dislikes.
+
+> **Disclaimer**: This project is for educational purposes and is **not** medical advice. Consult a licensed professional for medical or specialized dietary needs.
+
+---
+
+## โจ Features
+- Calculates **TDEE** and **macro targets** via MifflinโSt Jeor + activity factors.
+- Builds a **7โday plan** (breakfast/lunch/dinner) respecting dietary constraints.
+- Produces an aggregated **grocery list** for the week.
+- Supports **swap** of any single meal while keeping macros reasonable.
+- Minimal **Streamlit UI** for demos.
+- Extensible **tool-based architecture** to plug real recipe APIs/DBs.
+
+---
+
+## ๐งฑ Architecture
+- **Agent core**: OpenAI function-calling (tools) with a simple orchestration loop.
+- **Tools**:
+ 1. `calc_calories_and_macros` โ computes targets.
+ 2. `compose_meal_plan` โ creates the 7โday plan.
+ 3. `grocery_list_from_plan` โ consolidates ingredients/quantities.
+ 4. `swap_meal` โ replaces one meal (by kcal proximity and constraints).
+- **Recipe source**: a tiny in-memory recipe DB for demo; replace with a real API or your own dataset.
+
+---
+
+## ๐ Quickstart
+
+### 1) Install
+```bash
+pip install openai streamlit pydantic python-dotenv
+```
+
+### 2) Configure
+Create a `.env` file in this folder:
+```
+OPENAI_API_KEY=your_key_here
+OPENAI_MODEL=gpt-4o-mini
+```
+
+### 3) Run CLI (example)
+```bash
+python agent.py
+```
+
+### 4) Run UI
+```bash
+streamlit run app.py
+```
+
+---
+
+## ๐งช Sample Profile (from issue author)
+See `sample_profile.json` for the exact values used to produce `demo_output.md`.
+- **Sex**: female
+- **Age**: 45
+- **Height**: 152 cm (~5 ft)
+- **Weight**: 62 kg
+- **Activity**: light
+- **Goal**: maintain
+- **Diet**: vegetarian
+
+---
+
+## ๐ง Extend
+- Replace the in-memory recipes with:
+ - A real **recipe API** (e.g., Spoonacular) or
+ - Your **own dataset** (CSV/DB) + filters/tags
+- Add price lookups to produce a **budget-aware** grocery list.
+- Add **adherence tracking** and charts.
+- Integrate **wearables** or daily steps to refine TDEE dynamically.
+- Add **snacks** for days slightly under target kcals.
+
+---
+
+## ๐ก๏ธ Safety Notes
+- The agent warns for extreme deficits but does **not** diagnose conditions.
+- For calorie targets below commonly recommended minimums (e.g., ~1200 kcal/day for many adults), advise consulting a professional.
+
+---
+
+## ๐ Project Layout
+```
+fitness-nutrition-planner-agent/
+โโ README.md
+โโ agent.py
+โโ app.py
+โโ sample_profile.json
+โโ demo_output.md
+```
+
+---
+
+## ๐ค How to contribute
+- Keep notebooks (if any) with **cleared outputs**.
+- Follow the course repoโs contribution guidelines.
+- Include screenshots or a short Loom/YT demo link in your PR description.
diff --git a/community-contributions/fitness-nutrition-planner-agent/agent.py b/community-contributions/fitness-nutrition-planner-agent/agent.py
new file mode 100644
index 0000000..75bcd10
--- /dev/null
+++ b/community-contributions/fitness-nutrition-planner-agent/agent.py
@@ -0,0 +1,411 @@
+
+# agent.py
+import os, math, json, copy
+from dataclasses import dataclass
+from typing import List, Dict, Any, Optional, Tuple
+from pydantic import BaseModel, Field, ValidationError
+from dotenv import load_dotenv
+from openai import OpenAI
+
+load_dotenv()
+
+# ------------------------------
+# Data models
+# ------------------------------
+class UserProfile(BaseModel):
+ sex: str = Field(..., description="male or female")
+ age: int
+ height_cm: float
+ weight_kg: float
+ activity_level: str = Field(..., description="sedentary, light, moderate, active, very_active")
+ goal: str = Field(..., description="lose, maintain, gain")
+ dietary_pattern: Optional[str] = Field(None, description="e.g., vegetarian, vegan, halal, kosher")
+ allergies: List[str] = Field(default_factory=list)
+ dislikes: List[str] = Field(default_factory=list)
+ daily_meals: int = 3
+ cuisine_prefs: List[str] = Field(default_factory=list)
+ time_per_meal_minutes: int = 30
+ budget_level: Optional[str] = Field(None, description="low, medium, high")
+
+class MacroTargets(BaseModel):
+ tdee: int
+ target_kcal: int
+ protein_g: int
+ carbs_g: int
+ fat_g: int
+
+class Meal(BaseModel):
+ name: str
+ ingredients: List[Dict[str, Any]] # {item, qty, unit}
+ kcal: int
+ protein_g: int
+ carbs_g: int
+ fat_g: int
+ tags: List[str] = Field(default_factory=list)
+ instructions: Optional[str] = None
+
+class DayPlan(BaseModel):
+ day: str
+ meals: List[Meal]
+ totals: MacroTargets
+
+class WeekPlan(BaseModel):
+ days: List[DayPlan]
+ meta: Dict[str, Any]
+
+# ------------------------------
+# Tiny in-memory recipe โDBโ
+# (extend/replace with a real source)
+# ------------------------------
+RECIPE_DB: List[Meal] = [
+ Meal(
+ name="Greek Yogurt Parfait",
+ ingredients=[{"item":"nonfat greek yogurt","qty":200,"unit":"g"},
+ {"item":"berries","qty":150,"unit":"g"},
+ {"item":"granola","qty":30,"unit":"g"},
+ {"item":"honey","qty":10,"unit":"g"}],
+ kcal=380, protein_g=30, carbs_g=52, fat_g=8,
+ tags=["vegetarian","breakfast","5-min","no-cook"]
+ ),
+ Meal(
+ name="Tofu Veggie Stir-Fry with Rice",
+ ingredients=[{"item":"firm tofu","qty":150,"unit":"g"},
+ {"item":"mixed vegetables","qty":200,"unit":"g"},
+ {"item":"soy sauce (low sodium)","qty":15,"unit":"ml"},
+ {"item":"olive oil","qty":10,"unit":"ml"},
+ {"item":"brown rice (cooked)","qty":200,"unit":"g"}],
+ kcal=650, protein_g=28, carbs_g=85, fat_g=20,
+ tags=["vegan","gluten-free","dinner","20-min","stovetop","soy"]
+ ),
+ Meal(
+ name="Chicken Quinoa Bowl",
+ ingredients=[{"item":"chicken breast","qty":140,"unit":"g"},
+ {"item":"quinoa (cooked)","qty":185,"unit":"g"},
+ {"item":"spinach","qty":60,"unit":"g"},
+ {"item":"olive oil","qty":10,"unit":"ml"},
+ {"item":"lemon","qty":0.5,"unit":"unit"}],
+ kcal=620, protein_g=45, carbs_g=55, fat_g=20,
+ tags=["gluten-free","dinner","25-min","high-protein","poultry"]
+ ),
+ Meal(
+ name="Lentil Soup + Wholegrain Bread",
+ ingredients=[{"item":"lentils (cooked)","qty":200,"unit":"g"},
+ {"item":"vegetable broth","qty":400,"unit":"ml"},
+ {"item":"carrot","qty":80,"unit":"g"},
+ {"item":"celery","qty":60,"unit":"g"},
+ {"item":"onion","qty":60,"unit":"g"},
+ {"item":"wholegrain bread","qty":60,"unit":"g"}],
+ kcal=520, protein_g=25, carbs_g=78, fat_g=8,
+ tags=["vegan","lunch","30-min","budget"]
+ ),
+ Meal(
+ name="Salmon, Potatoes & Greens",
+ ingredients=[{"item":"salmon fillet","qty":150,"unit":"g"},
+ {"item":"potatoes","qty":200,"unit":"g"},
+ {"item":"broccoli","qty":150,"unit":"g"},
+ {"item":"olive oil","qty":10,"unit":"ml"}],
+ kcal=680, protein_g=42, carbs_g=52, fat_g=30,
+ tags=["gluten-free","dinner","omega-3","fish"]
+ ),
+ Meal(
+ name="Cottage Cheese Bowl",
+ ingredients=[{"item":"low-fat cottage cheese","qty":200,"unit":"g"},
+ {"item":"pineapple","qty":150,"unit":"g"},
+ {"item":"chia seeds","qty":15,"unit":"g"}],
+ kcal=380, protein_g=32, carbs_g=35, fat_g=10,
+ tags=["vegetarian","snack","5-min","high-protein","dairy"]
+ ),
+]
+
+# ------------------------------
+# Tool implementations
+# ------------------------------
+ACTIVITY_FACTORS = {
+ "sedentary": 1.2,
+ "light": 1.375,
+ "moderate": 1.55,
+ "active": 1.725,
+ "very_active": 1.9
+}
+
+def mifflin_st_jeor(weight_kg: float, height_cm: float, age: int, sex: str) -> float:
+ # BMR (kcal/day)
+ if sex.lower().startswith("m"):
+ return 10*weight_kg + 6.25*height_cm - 5*age + 5
+ else:
+ return 10*weight_kg + 6.25*height_cm - 5*age - 161
+
+def compute_targets(profile: UserProfile) -> MacroTargets:
+ bmr = mifflin_st_jeor(profile.weight_kg, profile.height_cm, profile.age, profile.sex)
+ tdee = int(round(bmr * ACTIVITY_FACTORS.get(profile.activity_level, 1.2)))
+ # goal adjustment
+ if profile.goal == "lose":
+ target_kcal = max(1200, int(tdee - 400)) # conservative deficit
+ elif profile.goal == "gain":
+ target_kcal = int(tdee + 300)
+ else:
+ target_kcal = tdee
+
+ # Macro split (modifiable): P 30%, C 40%, F 30%
+ protein_kcal = target_kcal * 0.30
+ carbs_kcal = target_kcal * 0.40
+ fat_kcal = target_kcal * 0.30
+ protein_g = int(round(protein_kcal / 4))
+ carbs_g = int(round(carbs_kcal / 4))
+ fat_g = int(round(fat_kcal / 9))
+
+ return MacroTargets(tdee=tdee, target_kcal=target_kcal,
+ protein_g=protein_g, carbs_g=carbs_g, fat_g=fat_g)
+
+def _allowed(meal: Meal, profile: UserProfile) -> bool:
+ # dietary patterns/allergies/dislikes filters (simple; extend as needed)
+ diet = (profile.dietary_pattern or "").lower()
+ if diet == "vegetarian" and ("fish" in meal.tags or "poultry" in meal.tags):
+ return False
+ if diet == "vegan" and ("dairy" in meal.tags or "fish" in meal.tags or "poultry" in meal.tags):
+ return False
+ # allergies & dislikes
+ for a in profile.allergies:
+ if a and a.lower() in meal.name.lower(): return False
+ if any(a.lower() in (ing["item"]).lower() for ing in meal.ingredients): return False
+ if a.lower() in " ".join(meal.tags).lower(): return False
+ for d in profile.dislikes:
+ if d and d.lower() in meal.name.lower(): return False
+ if any(d.lower() in (ing["item"]).lower() for ing in meal.ingredients): return False
+ return True
+
+def meal_db_search(profile: UserProfile, tags: Optional[List[str]] = None) -> List[Meal]:
+ tags = tags or []
+ out = []
+ for m in RECIPE_DB:
+ if not _allowed(m, profile):
+ continue
+ if tags and not any(t in m.tags for t in tags):
+ continue
+ out.append(m)
+ return out or [] # may be empty; agent should handle
+
+def compose_meal_plan(profile: UserProfile, targets: MacroTargets) -> WeekPlan:
+ # naive heuristic: pick meals that roughly match per-meal macro budget
+ per_meal_kcal = targets.target_kcal / profile.daily_meals
+ days = []
+ weekdays = ["Mon","Tue","Wed","Thu","Fri","Sat","Sun"]
+
+ # simple pools
+ breakfasts = meal_db_search(profile, tags=["breakfast","no-cook","5-min"])
+ lunches = meal_db_search(profile, tags=["lunch","budget"])
+ dinners = meal_db_search(profile, tags=["dinner","high-protein"])
+
+ # fallback to any allowed meals if pools too small
+ allowed_all = meal_db_search(profile)
+ if len(breakfasts) < 2: breakfasts = allowed_all
+ if len(lunches) < 2: lunches = allowed_all
+ if len(dinners) < 2: dinners = allowed_all
+
+ for i, day in enumerate(weekdays):
+ day_meals = []
+ for slot in range(profile.daily_meals):
+ pool = breakfasts if slot == 0 else (lunches if slot == 1 else dinners)
+ # pick the meal closest in kcal to per_meal_kcal
+ pick = min(pool, key=lambda m: abs(m.kcal - per_meal_kcal))
+ day_meals.append(copy.deepcopy(pick))
+ # compute totals
+ kcal = sum(m.kcal for m in day_meals)
+ p = sum(m.protein_g for m in day_meals)
+ c = sum(m.carbs_g for m in day_meals)
+ f = sum(m.fat_g for m in day_meals)
+ day_targets = MacroTargets(tdee=targets.tdee, target_kcal=int(round(kcal)),
+ protein_g=p, carbs_g=c, fat_g=f)
+ days.append(DayPlan(day=day, meals=day_meals, totals=day_targets))
+ return WeekPlan(days=days, meta={"per_meal_target_kcal": int(round(per_meal_kcal))})
+
+def grocery_list_from_plan(plan: WeekPlan) -> List[Dict[str, Any]]:
+ # aggregate identical ingredients
+ agg: Dict[Tuple[str,str], float] = {}
+ units: Dict[Tuple[str,str], str] = {}
+ for d in plan.days:
+ for m in d.meals:
+ for ing in m.ingredients:
+ key = (ing["item"].lower(), ing.get("unit",""))
+ agg[key] = agg.get(key, 0) + float(ing.get("qty", 0))
+ units[key] = ing.get("unit","")
+ items = []
+ for (item, unit), qty in sorted(agg.items()):
+ items.append({"item": item, "qty": round(qty, 2), "unit": unit})
+ return items
+
+def swap_meal(plan: WeekPlan, day: str, meal_index: int, profile: UserProfile) -> WeekPlan:
+ # replace one meal by closest-kcal allowed alternative that isn't the same
+ day_idx = next((i for i,d in enumerate(plan.days) if d.day.lower().startswith(day[:3].lower())), None)
+ if day_idx is None: return plan
+ current_meal = plan.days[day_idx].meals[meal_index]
+ candidates = [m for m in meal_db_search(profile) if m.name != current_meal.name]
+ if not candidates: return plan
+ pick = min(candidates, key=lambda m: abs(m.kcal - current_meal.kcal))
+ plan.days[day_idx].meals[meal_index] = copy.deepcopy(pick)
+ # recalc day totals
+ d = plan.days[day_idx]
+ kcal = sum(m.kcal for m in d.meals)
+ p = sum(m.protein_g for m in d.meals)
+ c = sum(m.carbs_g for m in d.meals)
+ f = sum(m.fat_g for m in d.meals)
+ d.totals = MacroTargets(tdee=d.totals.tdee, target_kcal=kcal, protein_g=p, carbs_g=c, fat_g=f)
+ return plan
+
+# ------------------------------
+# Agent (LLM + tools)
+# ------------------------------
+SYS_PROMPT = """You are FitnessPlanner, an agentic planner that:
+- Respects dietary patterns, allergies, dislikes, budget, time limits.
+- Uses tools to compute targets, assemble a 7-day plan, produce a grocery list, and swap meals on request.
+- If a request is unsafe (extreme deficits, medical conditions), warn and suggest professional guidance.
+- Keep responses concise and structured (headings + bullet lists)."""
+
+# Tool registry for function-calling
+def get_tools_schema():
+ return [
+ {
+ "type": "function",
+ "function": {
+ "name": "calc_calories_and_macros",
+ "description": "Compute TDEE and macro targets from the user's profile.",
+ "parameters": {
+ "type":"object",
+ "properties": {"profile":{"type":"object"}},
+ "required":["profile"]
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "compose_meal_plan",
+ "description": "Create a 7-day meal plan matching targets and constraints.",
+ "parameters": {
+ "type":"object",
+ "properties": {
+ "profile":{"type":"object"},
+ "targets":{"type":"object"}
+ },
+ "required":["profile","targets"]
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "grocery_list_from_plan",
+ "description": "Make a consolidated grocery list from a week plan.",
+ "parameters": {
+ "type":"object",
+ "properties": {"plan":{"type":"object"}},
+ "required":["plan"]
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "swap_meal",
+ "description": "Swap a single meal in the plan while keeping macros reasonable.",
+ "parameters": {
+ "type":"object",
+ "properties": {
+ "plan":{"type":"object"},
+ "day":{"type":"string"},
+ "meal_index":{"type":"integer","description":"0=breakfast,1=lunch,2=dinner"},
+ "profile":{"type":"object"}
+ },
+ "required":["plan","day","meal_index","profile"]
+ }
+ }
+ }
+ ]
+
+class FitnessPlannerAgent:
+ def __init__(self, model: Optional[str] = None):
+ self.client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
+ self.model = model or os.getenv("OPENAI_MODEL", "gpt-4o-mini")
+ self.plan_cache: Optional[WeekPlan] = None
+ self.targets_cache: Optional[MacroTargets] = None
+
+ # Tool dispatch
+ def _call_tool(self, name: str, args: Dict[str, Any]) -> str:
+ if name == "calc_calories_and_macros":
+ profile = UserProfile(**args["profile"])
+ targets = compute_targets(profile)
+ self.targets_cache = targets
+ return targets.model_dump_json()
+ elif name == "compose_meal_plan":
+ profile = UserProfile(**args["profile"])
+ targets = MacroTargets(**args["targets"])
+ plan = compose_meal_plan(profile, targets)
+ self.plan_cache = plan
+ return plan.model_dump_json()
+ elif name == "grocery_list_from_plan":
+ plan = WeekPlan(**args["plan"])
+ items = grocery_list_from_plan(plan)
+ return json.dumps(items)
+ elif name == "swap_meal":
+ plan = WeekPlan(**args["plan"])
+ profile = UserProfile(**args["profile"])
+ day = args["day"]
+ idx = args["meal_index"]
+ new_plan = swap_meal(plan, day, idx, profile)
+ self.plan_cache = new_plan
+ return new_plan.model_dump_json()
+ else:
+ return json.dumps({"error":"unknown tool"})
+
+ def chat(self, user_message: str, profile: Optional[UserProfile] = None) -> str:
+ messages = [{"role":"system","content":SYS_PROMPT}]
+ if profile:
+ messages.append({"role":"user","content":f"User profile: {profile.model_dump_json()}"} )
+ messages.append({"role":"user","content":user_message})
+
+ # First call
+ resp = self.client.chat.completions.create(
+ model=self.model,
+ messages=messages,
+ tools=get_tools_schema(),
+ tool_choice="auto",
+ temperature=0.3
+ )
+
+ # Handle tool calls (simple, single-step or brief multi-step)
+ messages_llm = messages + [{"role":"assistant","content":resp.choices[0].message.content or "",
+ "tool_calls":resp.choices[0].message.tool_calls}]
+ if resp.choices[0].message.tool_calls:
+ for tc in resp.choices[0].message.tool_calls:
+ name = tc.function.name
+ args = json.loads(tc.function.arguments or "{}")
+ out = self._call_tool(name, args)
+ messages_llm.append({
+ "role":"tool",
+ "tool_call_id":tc.id,
+ "name":name,
+ "content":out
+ })
+
+ # Finalization
+ resp2 = self.client.chat.completions.create(
+ model=self.model,
+ messages=messages_llm,
+ temperature=0.2
+ )
+ return resp2.choices[0].message.content
+
+ return resp.choices[0].message.content
+
+# ------------------------------
+# Quick CLI demo
+# ------------------------------
+if __name__ == "__main__":
+ profile = UserProfile(
+ sex="female", age=45, height_cm=152, weight_kg=62,
+ activity_level="light", goal="maintain",
+ dietary_pattern="vegetarian", allergies=[], dislikes=[],
+ daily_meals=3, cuisine_prefs=["mediterranean"], time_per_meal_minutes=25, budget_level="medium"
+ )
+ agent = FitnessPlannerAgent()
+ print(agent.chat("Create my 7-day plan and grocery list.", profile))
diff --git a/community-contributions/fitness-nutrition-planner-agent/app.py b/community-contributions/fitness-nutrition-planner-agent/app.py
new file mode 100644
index 0000000..a1f1102
--- /dev/null
+++ b/community-contributions/fitness-nutrition-planner-agent/app.py
@@ -0,0 +1,75 @@
+
+# app.py
+import json
+import streamlit as st
+from agent import FitnessPlannerAgent, UserProfile, WeekPlan
+
+st.set_page_config(page_title="Fitness & Nutrition Planner Agent", layout="wide")
+
+st.title("๐๏ธ Fitness & Nutrition Planner Agent")
+
+with st.sidebar:
+ st.header("Your Profile")
+ sex = st.selectbox("Sex", ["female","male"])
+ age = st.number_input("Age", 18, 90, 45)
+ height_cm = st.number_input("Height (cm)", 120, 220, 152)
+ weight_kg = st.number_input("Weight (kg)", 35.0, 200.0, 62.0)
+ activity_level = st.selectbox("Activity Level", ["sedentary","light","moderate","active","very_active"], index=1)
+ goal = st.selectbox("Goal", ["lose","maintain","gain"], index=1)
+ dietary_pattern = st.selectbox("Dietary Pattern", ["none","vegetarian","vegan","halal","kosher"], index=1)
+ if dietary_pattern == "none": dietary_pattern = None
+ allergies = st.text_input("Allergies (comma-separated)", "")
+ dislikes = st.text_input("Dislikes (comma-separated)", "")
+ daily_meals = st.slider("Meals per day", 2, 5, 3)
+ time_per_meal_minutes = st.slider("Time per meal (min)", 5, 90, 25)
+ budget_level = st.selectbox("Budget", ["medium","low","high"], index=0)
+ cuisine_prefs = st.text_input("Cuisine prefs (comma-separated)", "mediterranean")
+
+ build_btn = st.button("Generate 7-Day Plan")
+
+agent = FitnessPlannerAgent()
+
+if build_btn:
+ profile = UserProfile(
+ sex=sex, age=int(age), height_cm=float(height_cm), weight_kg=float(weight_kg),
+ activity_level=activity_level, goal=goal, dietary_pattern=dietary_pattern,
+ allergies=[a.strip() for a in allergies.split(",") if a.strip()],
+ dislikes=[d.strip() for d in dislikes.split(",") if d.strip()],
+ daily_meals=int(daily_meals), cuisine_prefs=[c.strip() for c in cuisine_prefs.split(",") if c.strip()],
+ time_per_meal_minutes=int(time_per_meal_minutes), budget_level=budget_level
+ )
+ st.session_state["profile_json"] = profile.model_dump_json()
+ with st.spinner("Planning your week..."):
+ result = agent.chat("Create my 7-day plan and grocery list.", profile)
+ st.session_state["last_response"] = result
+
+if "last_response" in st.session_state:
+ st.subheader("Plan & Groceries")
+ st.markdown(st.session_state["last_response"])
+
+st.divider()
+st.subheader("Tweaks")
+col1, col2, col3 = st.columns(3)
+with col1:
+ day = st.selectbox("Day to change", ["Mon","Tue","Wed","Thu","Fri","Sat","Sun"])
+with col2:
+ meal_index = st.selectbox("Meal slot", ["Breakfast (0)","Lunch (1)","Dinner (2)"])
+ meal_index = int(meal_index[-2]) # 0/1/2
+with col3:
+ swap_btn = st.button("Swap Meal")
+
+if swap_btn and agent.plan_cache:
+ profile_json = st.session_state.get("profile_json")
+ if not profile_json:
+ st.warning("Please generate a plan first.")
+ else:
+ new_plan_json = agent._call_tool("swap_meal", {
+ "plan": agent.plan_cache.model_dump(),
+ "day": day,
+ "meal_index": meal_index,
+ "profile": json.loads(profile_json)
+ })
+ agent.plan_cache = WeekPlan(**json.loads(new_plan_json))
+ summary = agent.chat(f"Update summary for {day}: show the swapped meal and new day totals.")
+ st.session_state["last_response"] = summary
+ st.markdown(summary)
diff --git a/community-contributions/fitness-nutrition-planner-agent/demo_output.md b/community-contributions/fitness-nutrition-planner-agent/demo_output.md
new file mode 100644
index 0000000..841fb1e
--- /dev/null
+++ b/community-contributions/fitness-nutrition-planner-agent/demo_output.md
@@ -0,0 +1,84 @@
+
+# Demo Output (Sample Profile)
+
+**Profile**: female, 45, 152 cm, 62 kg, activity: light, goal: maintain, diet: vegetarian
+
+## Targets
+- TDEE โ **1680 kcal/day**
+- Macros (30/40/30): **Protein 126 g**, **Carbs 168 g**, **Fat 56 g**
+
+> These are estimates using MifflinโSt Jeor and a light activity factor. Not medical advice.
+
+---
+
+## Example 7-Day Plan (Breakfast / Lunch / Dinner)
+
+**Mon**
+- Greek Yogurt Parfait (380 kcal, 30P/52C/8F)
+- Lentil Soup + Wholegrain Bread (520 kcal, 25P/78C/8F)
+- Tofu Veggie Stir-Fry with Rice (650 kcal, 28P/85C/20F)
+- **Totals** โ 1550 kcal, 83P, 215C, 36F
+
+**Tue**
+- Cottage Cheese Bowl (380 kcal, 32P/35C/10F)
+- Lentil Soup + Wholegrain Bread (520 kcal, 25P/78C/8F)
+- Tofu Veggie Stir-Fry with Rice (650 kcal, 28P/85C/20F)
+- **Totals** โ 1550 kcal, 85P, 198C, 38F
+
+**Wed**
+- Greek Yogurt Parfait
+- Lentil Soup + Wholegrain Bread
+- Tofu Veggie Stir-Fry with Rice
+- **Totals** โ 1550 kcal
+
+**Thu**
+- Cottage Cheese Bowl
+- Lentil Soup + Wholegrain Bread
+- Tofu Veggie Stir-Fry with Rice
+- **Totals** โ 1550 kcal
+
+**Fri**
+- Greek Yogurt Parfait
+- Lentil Soup + Wholegrain Bread
+- Tofu Veggie Stir-Fry with Rice
+- **Totals** โ 1550 kcal
+
+**Sat**
+- Cottage Cheese Bowl
+- Lentil Soup + Wholegrain Bread
+- Tofu Veggie Stir-Fry with Rice
+- **Totals** โ 1550 kcal
+
+**Sun**
+- Greek Yogurt Parfait
+- Lentil Soup + Wholegrain Bread
+- Tofu Veggie Stir-Fry with Rice
+- **Totals** โ 1550 kcal
+
+> Notes: The demo DB is intentionally small. In practice, plug in a larger vegetarian recipe set for more variety. Add snacks if you'd like to reach ~1680 kcal/day.
+
+---
+
+## Grocery List (aggregated, approx for 7 days)
+
+- nonfat greek yogurt โ **1400 g**
+- berries โ **1050 g**
+- granola โ **210 g**
+- honey โ **70 g**
+- lentils (cooked) โ **1400 g**
+- vegetable broth โ **2800 ml**
+- carrot โ **560 g**
+- celery โ **420 g**
+- onion โ **420 g**
+- wholegrain bread โ **420 g**
+- firm tofu โ **1050 g**
+- mixed vegetables โ **1400 g**
+- soy sauce (low sodium) โ **105 ml**
+- olive oil โ **140 ml**
+- brown rice (cooked) โ **1400 g**
+- low-fat cottage cheese โ **600 g**
+- pineapple โ **450 g**
+- chia seeds โ **45 g**
+
+**Tip:** Use the appโs *Swap Meal* to replace any item (e.g., swap Wed dinner).
+
diff --git a/community-contributions/fitness-nutrition-planner-agent/sample_profile.json b/community-contributions/fitness-nutrition-planner-agent/sample_profile.json
new file mode 100644
index 0000000..0f54a9d
--- /dev/null
+++ b/community-contributions/fitness-nutrition-planner-agent/sample_profile.json
@@ -0,0 +1,17 @@
+{
+ "sex": "female",
+ "age": 45,
+ "height_cm": 152,
+ "weight_kg": 62,
+ "activity_level": "light",
+ "goal": "maintain",
+ "dietary_pattern": "vegetarian",
+ "allergies": [],
+ "dislikes": [],
+ "daily_meals": 3,
+ "cuisine_prefs": [
+ "mediterranean"
+ ],
+ "time_per_meal_minutes": 25,
+ "budget_level": "medium"
+}
\ No newline at end of file
diff --git a/community-contributions/openai-twenty-questions/README.md b/community-contributions/openai-twenty-questions/README.md
new file mode 100644
index 0000000..3fcbd94
--- /dev/null
+++ b/community-contributions/openai-twenty-questions/README.md
@@ -0,0 +1,5 @@
+# openai-twenty-questions
+Chatgpt script that acts as Thinker and Guesser to play the 20 questions game
+
+1. pip install openai==0.28
+2. Run python twenty.py and it will Think of a word and try to guess it in 20 tries
diff --git a/community-contributions/openai-twenty-questions/twenty.py b/community-contributions/openai-twenty-questions/twenty.py
new file mode 100644
index 0000000..5f098cd
--- /dev/null
+++ b/community-contributions/openai-twenty-questions/twenty.py
@@ -0,0 +1,100 @@
+import openai
+import os
+import time
+
+# openai.api_key = os.getenv("OPENAI_API_KEY")
+# openai.api_key = "<>"
+
+# Models: You can use "gpt-4o", "gpt-4-turbo", or "gpt-3.5-turbo" โ but we'll use "gpt-4o" or "gpt-4o-mini" for both players
+MODEL = "gpt-4o-mini"
+
+def call_chatgpt(messages):
+ response = openai.ChatCompletion.create(
+ model=MODEL,
+ messages=messages,
+ temperature=0.7
+ )
+ return response.choices[0].message["content"].strip()
+
+# Step 1: Thinker chooses a secret object
+thinker_messages = [
+ {"role": "system", "content": "You are playing 20 Questions. Think of an object or thing and just one word. Keep it secret and reply only with: 'I have thought of something. Let's begin.'"},
+]
+thinker_reply = call_chatgpt(thinker_messages)
+print("Thinker:", thinker_reply)
+
+# For simulation purposes, letโs ask the thinker what the object is (in real game, this is hidden)
+reveal_object_prompt = [
+ {"role": "system", "content": "You are playing 20 Questions. Think of an object or thing and just one word. Now tell me (just for logging) what you are thinking of. Reply only with the thing."}
+]
+object_answer = call_chatgpt(reveal_object_prompt)
+print("๐ Secret Object:", object_answer)
+
+# Step 2: Guesser starts asking questions
+guesser_messages = [
+ {"role": "system", "content": f"You are playing 20 Questions. Ask yes/no questions to figure out what the object is. Do not repeat questions. The object is kept secret by the other player. Begin by asking your first question."},
+]
+
+# Letโs keep track of Q&A
+history = []
+q_count = 1
+
+for i in range(1, 11):
+ print(f"\n๐ Round {q_count}")
+ q_count += 1
+ # Guesser asks a question
+ question = call_chatgpt(guesser_messages)
+ print("Guesser:", question)
+ history.append(("Guesser", question))
+
+ # Thinker responds (yes/no)
+ thinker_round = [
+ {"role": "system", "content": f"You are playing 20 Questions. The secret object is: {object_answer}."},
+ {"role": "user", "content": f"The other player asked: {question}. Respond only with 'Yes', 'No', or 'I don't know'."}
+ ]
+ answer = call_chatgpt(thinker_round)
+ print("Thinker:", answer)
+ history.append(("Thinker", answer))
+
+ # Add to conversation history for guesser
+ guesser_messages.append({"role": "assistant", "content": question})
+ guesser_messages.append({"role": "user", "content": answer})
+
+
+ print(f"\n๐ Round {q_count}")
+ q_count += 1
+ # Check if guesser wants to guess
+ guess_check_prompt = guesser_messages + [
+ {"role": "user", "content": "Based on the answers so far, do you want to guess? If yes, say: 'Is it ?'. If not, ask the next yes/no question."}
+ ]
+ next_move_question = call_chatgpt(guess_check_prompt)
+ print("Guesser next move:", next_move_question)
+ history.append(("Guesser", next_move_question))
+
+ if next_move_question.lower().startswith("is it a"):
+ # Thinker validates guess
+ guess = next_move_question[8:].strip(" ?.")
+ guess = next_move_question[8:].strip(" ?")
+
+ if guess.lower() == object_answer.lower():
+ print("Guesser guessed correctly!")
+ break
+ # Thinker responds (yes/no)
+ thinker_round = [
+ {"role": "system", "content": f"You are playing 20 Questions. The secret object is: {object_answer}."},
+ {"role": "user", "content": f"The other player asked: {next_move_question}. Respond only with 'Yes', 'No', or 'I don't know'."}
+ ]
+ answer = call_chatgpt(thinker_round)
+ print("Thinker next move:", answer)
+ history.append(("Thinker", answer))
+
+ # Add to conversation history for guesser
+ guesser_messages.append({"role": "assistant", "content": next_move_question})
+ guesser_messages.append({"role": "user", "content": answer})
+
+ # Prepare for next round
+ guesser_messages.append({"role": "assistant", "content": next_move_question})
+ question = next_move_question
+
+else:
+ print("โ Guesser used all 20 questions without guessing correctly.")
\ No newline at end of file
diff --git a/community-contributions/playwright-enhanced-scraper/enhanced_web_scraper.ipynb b/community-contributions/playwright-enhanced-scraper/enhanced_web_scraper.ipynb
new file mode 100644
index 0000000..8e7baf6
--- /dev/null
+++ b/community-contributions/playwright-enhanced-scraper/enhanced_web_scraper.ipynb
@@ -0,0 +1,731 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "d15d8294-3328-4e07-ad16-8a03e9bbfdb9",
+ "metadata": {},
+ "source": [
+ "# YOUR FIRST LAB\n",
+ "### Please read this section. This is valuable to get you prepared, even if it's a long read -- it's important stuff.\n",
+ "\n",
+ "## Your first Frontier LLM Project\n",
+ "\n",
+ "Let's build a useful LLM solution - in a matter of minutes.\n",
+ "\n",
+ "By the end of this course, you will have built an autonomous Agentic AI solution with 7 agents that collaborate to solve a business problem. All in good time! We will start with something smaller...\n",
+ "\n",
+ "Our goal is to code a new kind of Web Browser. Give it a URL, and it will respond with a summary. The Reader's Digest of the internet!!\n",
+ "\n",
+ "Before starting, you should have completed the setup for [PC](../SETUP-PC.md) or [Mac](../SETUP-mac.md) and you hopefully launched this jupyter lab from within the project root directory, with your environment activated.\n",
+ "\n",
+ "## If you're new to Jupyter Lab\n",
+ "\n",
+ "Welcome to the wonderful world of Data Science experimentation! Once you've used Jupyter Lab, you'll wonder how you ever lived without it. Simply click in each \"cell\" with code in it, such as the cell immediately below this text, and hit Shift+Return to execute that cell. As you wish, you can add a cell with the + button in the toolbar, and print values of variables, or try out variations. \n",
+ "\n",
+ "I've written a notebook called [Guide to Jupyter](Guide%20to%20Jupyter.ipynb) to help you get more familiar with Jupyter Labs, including adding Markdown comments, using `!` to run shell commands, and `tqdm` to show progress.\n",
+ "\n",
+ "## If you're new to the Command Line\n",
+ "\n",
+ "Please see these excellent guides: [Command line on PC](https://chatgpt.com/share/67b0acea-ba38-8012-9c34-7a2541052665) and [Command line on Mac](https://chatgpt.com/canvas/shared/67b0b10c93a081918210723867525d2b). \n",
+ "\n",
+ "## If you'd prefer to work in IDEs\n",
+ "\n",
+ "If you're more comfortable in IDEs like VSCode, Cursor or PyCharm, they both work great with these lab notebooks too. \n",
+ "If you'd prefer to work in VSCode, [here](https://chatgpt.com/share/676f2e19-c228-8012-9911-6ca42f8ed766) are instructions from an AI friend on how to configure it for the course.\n",
+ "\n",
+ "## If you'd like to brush up your Python\n",
+ "\n",
+ "I've added a notebook called [Intermediate Python](Intermediate%20Python.ipynb) to get you up to speed. But you should give it a miss if you already have a good idea what this code does: \n",
+ "`yield from {book.get(\"author\") for book in books if book.get(\"author\")}`\n",
+ "\n",
+ "## I am here to help\n",
+ "\n",
+ "If you have any problems at all, please do reach out. \n",
+ "I'm available through the platform, or at ed@edwarddonner.com, or at https://www.linkedin.com/in/eddonner/ if you'd like to connect (and I love connecting!) \n",
+ "And this is new to me, but I'm also trying out X/Twitter at [@edwarddonner](https://x.com/edwarddonner) - if you're on X, please show me how it's done ๐ \n",
+ "\n",
+ "## More troubleshooting\n",
+ "\n",
+ "Please see the [troubleshooting](troubleshooting.ipynb) notebook in this folder to diagnose and fix common problems. At the very end of it is a diagnostics script with some useful debug info.\n",
+ "\n",
+ "## For foundational technical knowledge (eg Git, APIs, debugging) \n",
+ "\n",
+ "If you're relatively new to programming -- I've got your back! While it's ideal to have some programming experience for this course, there's only one mandatory prerequisite: plenty of patience. ๐ I've put together a set of self-study guides that cover Git and GitHub, APIs and endpoints, beginner python and more.\n",
+ "\n",
+ "This covers Git and GitHub; what they are, the difference, and how to use them: \n",
+ "https://github.com/ed-donner/agents/blob/main/guides/03_git_and_github.ipynb\n",
+ "\n",
+ "This covers technical foundations: \n",
+ "ChatGPT vs API; taking screenshots; Environment Variables; Networking basics; APIs and endpoints: \n",
+ "https://github.com/ed-donner/agents/blob/main/guides/04_technical_foundations.ipynb\n",
+ "\n",
+ "This covers Python for beginners, and making sure that a `NameError` never trips you up: \n",
+ "https://github.com/ed-donner/agents/blob/main/guides/06_python_foundations.ipynb\n",
+ "\n",
+ "This covers the essential techniques for figuring out errors: \n",
+ "https://github.com/ed-donner/agents/blob/main/guides/08_debugging.ipynb\n",
+ "\n",
+ "And you'll find other useful guides in the same folder in GitHub. Some information applies to my other Udemy course (eg Async Python) but most of it is very relevant for LLM engineering.\n",
+ "\n",
+ "## If this is old hat!\n",
+ "\n",
+ "If you're already comfortable with today's material, please hang in there; you can move swiftly through the first few labs - we will get much more in depth as the weeks progress. Ultimately we will fine-tune our own LLM to compete with OpenAI!\n",
+ "\n",
+ "\n",
+ " \n",
+ " \n",
+ " \n",
+ " | \n",
+ " \n",
+ " Please read - important note\n",
+ " The way I collaborate with you may be different to other courses you've taken. I prefer not to type code while you watch. Rather, I execute Jupyter Labs, like this, and give you an intuition for what's going on. My suggestion is that you carefully execute this yourself, after watching the lecture. Add print statements to understand what's going on, and then come up with your own variations. If you have a Github account, use this to showcase your variations. Not only is this essential practice, but it demonstrates your skills to others, including perhaps future clients or employers...\n",
+ " | \n",
+ "
\n",
+ "
\n",
+ "\n",
+ " \n",
+ " \n",
+ " \n",
+ " | \n",
+ " \n",
+ " This code is a live resource - keep an eye out for my emails\n",
+ " I push updates to the code regularly. As people ask questions, I add more examples or improved commentary. As a result, you'll notice that the code below isn't identical to the videos. Everything from the videos is here; but I've also added better explanations and new models like DeepSeek. Consider this like an interactive book.
\n",
+ " I try to send emails regularly with important updates related to the course. You can find this in the 'Announcements' section of Udemy in the left sidebar. You can also choose to receive my emails via your Notification Settings in Udemy. I'm respectful of your inbox and always try to add value with my emails!\n",
+ " \n",
+ " | \n",
+ "
\n",
+ "
\n",
+ "\n",
+ " \n",
+ " \n",
+ " \n",
+ " | \n",
+ " \n",
+ " Business value of these exercises\n",
+ " A final thought. While I've designed these notebooks to be educational, I've also tried to make them enjoyable. We'll do fun things like have LLMs tell jokes and argue with each other. But fundamentally, my goal is to teach skills you can apply in business. I'll explain business implications as we go, and it's worth keeping this in mind: as you build experience with models and techniques, think of ways you could put this into action at work today. Please do contact me if you'd like to discuss more or if you have ideas to bounce off me.\n",
+ " | \n",
+ "
\n",
+ "
"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "4e2a9393-7767-488e-a8bf-27c12dca35bd",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# imports\n",
+ "\n",
+ "import os\n",
+ "import requests\n",
+ "from dotenv import load_dotenv\n",
+ "from bs4 import BeautifulSoup\n",
+ "from IPython.display import Markdown, display\n",
+ "from openai import OpenAI\n",
+ "\n",
+ "# If you get an error running this cell, then please head over to the troubleshooting notebook!"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "6900b2a8-6384-4316-8aaa-5e519fca4254",
+ "metadata": {},
+ "source": [
+ "# Connecting to OpenAI (or Ollama)\n",
+ "\n",
+ "The next cell is where we load in the environment variables in your `.env` file and connect to OpenAI. \n",
+ "\n",
+ "If you'd like to use free Ollama instead, please see the README section \"Free Alternative to Paid APIs\", and if you're not sure how to do this, there's a full solution in the solutions folder (day1_with_ollama.ipynb).\n",
+ "\n",
+ "## Troubleshooting if you have problems:\n",
+ "\n",
+ "Head over to the [troubleshooting](troubleshooting.ipynb) notebook in this folder for step by step code to identify the root cause and fix it!\n",
+ "\n",
+ "If you make a change, try restarting the \"Kernel\" (the python process sitting behind this notebook) by Kernel menu >> Restart Kernel and Clear Outputs of All Cells. Then try this notebook again, starting at the top.\n",
+ "\n",
+ "Or, contact me! Message me or email ed@edwarddonner.com and we will get this to work.\n",
+ "\n",
+ "Any concerns about API costs? See my notes in the README - costs should be minimal, and you can control it at every point. You can also use Ollama as a free alternative, which we discuss during Day 2."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "7b87cadb-d513-4303-baee-a37b6f938e4d",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Load environment variables in a file called .env\n",
+ "\n",
+ "load_dotenv(override=True)\n",
+ "api_key = os.getenv('OPENAI_API_KEY')\n",
+ "\n",
+ "# Check the key\n",
+ "\n",
+ "if not api_key:\n",
+ " print(\"No API key was found - please head over to the troubleshooting notebook in this folder to identify & fix!\")\n",
+ "elif not api_key.startswith(\"sk-proj-\"):\n",
+ " print(\"An API key was found, but it doesn't start sk-proj-; please check you're using the right key - see troubleshooting notebook\")\n",
+ "elif api_key.strip() != api_key:\n",
+ " print(\"An API key was found, but it looks like it might have space or tab characters at the start or end - please remove them - see troubleshooting notebook\")\n",
+ "else:\n",
+ " print(\"API key found and looks good so far!\")\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "019974d9-f3ad-4a8a-b5f9-0a3719aea2d3",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "openai = OpenAI()\n",
+ "\n",
+ "# If this doesn't work, try Kernel menu >> Restart Kernel and Clear Outputs Of All Cells, then run the cells from the top of this notebook down.\n",
+ "# If it STILL doesn't work (horrors!) then please see the Troubleshooting notebook in this folder for full instructions"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "442fc84b-0815-4f40-99ab-d9a5da6bda91",
+ "metadata": {},
+ "source": [
+ "# Let's make a quick call to a Frontier model to get started, as a preview!"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "a58394bf-1e45-46af-9bfd-01e24da6f49a",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# To give you a preview -- calling OpenAI with these messages is this easy. Any problems, head over to the Troubleshooting notebook.\n",
+ "\n",
+ "message = \"Hello, GPT! This is my first ever message to you! Hi!\"\n",
+ "response = openai.chat.completions.create(model=\"gpt-4o-mini\", messages=[{\"role\":\"user\", \"content\":message}])\n",
+ "print(response.choices[0].message.content)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "2aa190e5-cb31-456a-96cc-db109919cd78",
+ "metadata": {},
+ "source": [
+ "## OK onwards with our first project"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "c5e793b2-6775-426a-a139-4848291d0463",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# A class to represent a Webpage\n",
+ "# If you're not familiar with Classes, check out the \"Intermediate Python\" notebook\n",
+ "\n",
+ "# Some websites need you to use proper headers when fetching them:\n",
+ "headers = {\n",
+ " \"User-Agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36\"\n",
+ "}\n",
+ "\n",
+ "class Website:\n",
+ "\n",
+ " def __init__(self, url):\n",
+ " \"\"\"\n",
+ " Create this Website object from the given url using the BeautifulSoup library\n",
+ " \"\"\"\n",
+ " self.url = url\n",
+ " response = requests.get(url, headers=headers)\n",
+ " soup = BeautifulSoup(response.content, 'html.parser')\n",
+ " self.title = soup.title.string if soup.title else \"No title found\"\n",
+ " for irrelevant in soup.body([\"script\", \"style\", \"img\", \"input\"]):\n",
+ " irrelevant.decompose()\n",
+ " self.text = soup.body.get_text(separator=\"\\n\", strip=True)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "2ef960cf-6dc2-4cda-afb3-b38be12f4c97",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Let's try one out. Change the website and add print statements to follow along.\n",
+ "\n",
+ "ed = Website(\"https://edwarddonner.com\")\n",
+ "print(ed.title)\n",
+ "print(ed.text)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "6a478a0c-2c53-48ff-869c-4d08199931e1",
+ "metadata": {},
+ "source": [
+ "## Types of prompts\n",
+ "\n",
+ "You may know this already - but if not, you will get very familiar with it!\n",
+ "\n",
+ "Models like GPT4o have been trained to receive instructions in a particular way.\n",
+ "\n",
+ "They expect to receive:\n",
+ "\n",
+ "**A system prompt** that tells them what task they are performing and what tone they should use\n",
+ "\n",
+ "**A user prompt** -- the conversation starter that they should reply to"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "abdb8417-c5dc-44bc-9bee-2e059d162699",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Define our system prompt - you can experiment with this later, changing the last sentence to 'Respond in markdown in Spanish.\"\n",
+ "\n",
+ "system_prompt = \"You are an assistant that analyzes the contents of a website \\\n",
+ "and provides a short summary, ignoring text that might be navigation related. \\\n",
+ "Respond in markdown.\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "f0275b1b-7cfe-4f9d-abfa-7650d378da0c",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# A function that writes a User Prompt that asks for summaries of websites:\n",
+ "\n",
+ "def user_prompt_for(website):\n",
+ " user_prompt = f\"You are looking at a website titled {website.title}\"\n",
+ " user_prompt += \"\\nThe contents of this website is as follows; \\\n",
+ "please provide a short summary of this website in markdown. \\\n",
+ "If it includes news or announcements, then summarize these too.\\n\\n\"\n",
+ " user_prompt += website.text\n",
+ " return user_prompt"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "26448ec4-5c00-4204-baec-7df91d11ff2e",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "print(user_prompt_for(ed))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "ea211b5f-28e1-4a86-8e52-c0b7677cadcc",
+ "metadata": {},
+ "source": [
+ "## Messages\n",
+ "\n",
+ "The API from OpenAI expects to receive messages in a particular structure.\n",
+ "Many of the other APIs share this structure:\n",
+ "\n",
+ "```python\n",
+ "[\n",
+ " {\"role\": \"system\", \"content\": \"system message goes here\"},\n",
+ " {\"role\": \"user\", \"content\": \"user message goes here\"}\n",
+ "]\n",
+ "```\n",
+ "To give you a preview, the next 2 cells make a rather simple call - we won't stretch the mighty GPT (yet!)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "f25dcd35-0cd0-4235-9f64-ac37ed9eaaa5",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "messages = [\n",
+ " {\"role\": \"system\", \"content\": \"You are a snarky assistant\"},\n",
+ " {\"role\": \"user\", \"content\": \"What is 2 + 2?\"}\n",
+ "]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "21ed95c5-7001-47de-a36d-1d6673b403ce",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# To give you a preview -- calling OpenAI with system and user messages:\n",
+ "\n",
+ "response = openai.chat.completions.create(model=\"gpt-4o-mini\", messages=messages)\n",
+ "print(response.choices[0].message.content)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "d06e8d78-ce4c-4b05-aa8e-17050c82bb47",
+ "metadata": {},
+ "source": [
+ "## And now let's build useful messages for GPT-4o-mini, using a function"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "0134dfa4-8299-48b5-b444-f2a8c3403c88",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# See how this function creates exactly the format above\n",
+ "\n",
+ "def messages_for(website):\n",
+ " return [\n",
+ " {\"role\": \"system\", \"content\": system_prompt},\n",
+ " {\"role\": \"user\", \"content\": user_prompt_for(website)}\n",
+ " ]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "36478464-39ee-485c-9f3f-6a4e458dbc9c",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Try this out, and then try for a few more websites\n",
+ "\n",
+ "messages_for(ed)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "16f49d46-bf55-4c3e-928f-68fc0bf715b0",
+ "metadata": {},
+ "source": [
+ "## Time to bring it together - the API for OpenAI is very simple!"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "905b9919-aba7-45b5-ae65-81b3d1d78e34",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# And now: call the OpenAI API. You will get very familiar with this!\n",
+ "\n",
+ "def summarize(url):\n",
+ " website = Website(url)\n",
+ " response = openai.chat.completions.create(\n",
+ " model = \"gpt-4o-mini\",\n",
+ " messages = messages_for(website)\n",
+ " )\n",
+ " return response.choices[0].message.content"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "05e38d41-dfa4-4b20-9c96-c46ea75d9fb5",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "summarize(\"https://edwarddonner.com\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "3d926d59-450e-4609-92ba-2d6f244f1342",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# A function to display this nicely in the Jupyter output, using markdown\n",
+ "\n",
+ "def display_summary(url):\n",
+ " summary = summarize(url)\n",
+ " display(Markdown(summary))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "3018853a-445f-41ff-9560-d925d1774b2f",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "display_summary(\"https://edwarddonner.com\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "b3bcf6f4-adce-45e9-97ad-d9a5d7a3a624",
+ "metadata": {},
+ "source": [
+ "# Let's try more websites\n",
+ "\n",
+ "Note that this will only work on websites that can be scraped using this simplistic approach.\n",
+ "\n",
+ "Websites that are rendered with Javascript, like React apps, won't show up. See the community-contributions folder for a Selenium implementation that gets around this. You'll need to read up on installing Selenium (ask ChatGPT!)\n",
+ "\n",
+ "Also Websites protected with CloudFront (and similar) may give 403 errors - many thanks Andy J for pointing this out.\n",
+ "\n",
+ "But many websites will work just fine!"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "45d83403-a24c-44b5-84ac-961449b4008f",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "display_summary(\"https://cnn.com\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "75e9fd40-b354-4341-991e-863ef2e59db7",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "display_summary(\"https://anthropic.com\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "c951be1a-7f1b-448f-af1f-845978e47e2c",
+ "metadata": {},
+ "source": [
+ "\n",
+ " \n",
+ " \n",
+ " \n",
+ " | \n",
+ " \n",
+ " Business applications\n",
+ " In this exercise, you experienced calling the Cloud API of a Frontier Model (a leading model at the frontier of AI) for the first time. We will be using APIs like OpenAI at many stages in the course, in addition to building our own LLMs.\n",
+ "\n",
+ "More specifically, we've applied this to Summarization - a classic Gen AI use case to make a summary. This can be applied to any business vertical - summarizing the news, summarizing financial performance, summarizing a resume in a cover letter - the applications are limitless. Consider how you could apply Summarization in your business, and try prototyping a solution.\n",
+ " | \n",
+ "
\n",
+ "
\n",
+ "\n",
+ "\n",
+ " \n",
+ " \n",
+ " \n",
+ " | \n",
+ " \n",
+ " Before you continue - now try yourself\n",
+ " Use the cell below to make your own simple commercial example. Stick with the summarization use case for now. Here's an idea: write something that will take the contents of an email, and will suggest an appropriate short subject line for the email. That's the kind of feature that might be built into a commercial email tool.\n",
+ " | \n",
+ "
\n",
+ "
"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "00743dac-0e70-45b7-879a-d7293a6f68a6",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Step 1: Create your prompts\n",
+ "\n",
+ "system_prompt = \"something here\"\n",
+ "user_prompt = \"\"\"\n",
+ " Lots of text\n",
+ " Can be pasted here\n",
+ "\"\"\"\n",
+ "\n",
+ "# Step 2: Make the messages list\n",
+ "\n",
+ "messages = [] # fill this in\n",
+ "\n",
+ "# Step 3: Call OpenAI\n",
+ "\n",
+ "response =\n",
+ "\n",
+ "# Step 4: print the result\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "36ed9f14-b349-40e9-a42c-b367e77f8bda",
+ "metadata": {},
+ "source": [
+ "## An extra exercise for those who enjoy web scraping\n",
+ "\n",
+ "You may notice that if you try `display_summary(\"https://openai.com\")` - it doesn't work! That's because OpenAI has a fancy website that uses Javascript. There are many ways around this that some of you might be familiar with. For example, Selenium is a hugely popular framework that runs a browser behind the scenes, renders the page, and allows you to query it. If you have experience with Selenium, Playwright or similar, then feel free to improve the Website class to use them. In the community-contributions folder, you'll find an example Selenium solution from a student (thank you!)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "eeab24dc-5f90-4570-b542-b0585aca3eb6",
+ "metadata": {},
+ "source": [
+ "# Sharing your code\n",
+ "\n",
+ "I'd love it if you share your code afterwards so I can share it with others! You'll notice that some students have already made changes (including a Selenium implementation) which you will find in the community-contributions folder. If you'd like add your changes to that folder, submit a Pull Request with your new versions in that folder and I'll merge your changes.\n",
+ "\n",
+ "If you're not an expert with git (and I am not!) then GPT has given some nice instructions on how to submit a Pull Request. It's a bit of an involved process, but once you've done it once it's pretty clear. As a pro-tip: it's best if you clear the outputs of your Jupyter notebooks (Edit >> Clean outputs of all cells, and then Save) for clean notebooks.\n",
+ "\n",
+ "Here are good instructions courtesy of an AI friend: \n",
+ "https://chatgpt.com/share/677a9cb5-c64c-8012-99e0-e06e88afd293"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "f4484fcf-8b39-4c3f-9674-37970ed71988",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from playwright.sync_api import sync_playwright\n",
+ "import time \n",
+ "import asyncio\n",
+ "from playwright.async_api import async_playwright\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "fd3fdc92",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "44099289",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "class Website:\n",
+ " def __init__(self, url):\n",
+ " self.url = url\n",
+ " self.title = None\n",
+ " self.text = None\n",
+ "\n",
+ " @classmethod\n",
+ " async def create(cls, url):\n",
+ " website = cls(url)\n",
+ " retries = 3 # Add retry logic\n",
+ " for attempt in range(retries):\n",
+ " try:\n",
+ " await website.initialize()\n",
+ " return website\n",
+ " except TimeoutError as e:\n",
+ " if attempt == retries - 1: # Last attempt\n",
+ " raise\n",
+ " print(f\"Attempt {attempt + 1} failed, retrying...\")\n",
+ " await asyncio.sleep(2) # Wait between retries\n",
+ "\n",
+ " async def initialize(self):\n",
+ " async with async_playwright() as p:\n",
+ " # Launch with stealth mode settings\n",
+ " browser = await p.chromium.launch(\n",
+ " headless=True,\n",
+ " args=[\n",
+ " '--disable-blink-features=AutomationControlled',\n",
+ " '--disable-dev-shm-usage',\n",
+ " '--no-sandbox'\n",
+ " ]\n",
+ " )\n",
+ " \n",
+ " # Create context with stealth settings\n",
+ " context = await browser.new_context(\n",
+ " user_agent='Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36',\n",
+ " viewport={'width': 1920, 'height': 1080},\n",
+ " java_script_enabled=True,\n",
+ " bypass_csp=True, # Bypass Content Security Policy\n",
+ " extra_http_headers={\n",
+ " 'Accept-Language': 'en-US,en;q=0.9',\n",
+ " 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8'\n",
+ " }\n",
+ " )\n",
+ " \n",
+ " page = await context.new_page()\n",
+ " \n",
+ " try:\n",
+ " # Navigate with progressive waits\n",
+ " await page.goto(self.url, timeout=90000) # 90 second timeout\n",
+ " \n",
+ " # Wait for either real content or Cloudflare challenge\n",
+ " try:\n",
+ " # Wait for actual content first\n",
+ " await page.wait_for_selector('main', timeout=10000)\n",
+ " except:\n",
+ " # If main content not found, wait for Cloudflare to clear\n",
+ " await page.wait_for_load_state('networkidle', timeout=30000)\n",
+ " await page.wait_for_selector('body', state='visible', timeout=30000)\n",
+ " \n",
+ " # Get content after all waits\n",
+ " self.title = await page.title()\n",
+ " content = await page.content()\n",
+ " \n",
+ " soup = BeautifulSoup(content, 'html.parser')\n",
+ " for irrelevant in soup.find_all([\"script\", \"style\", \"img\", \"input\"]):\n",
+ " irrelevant.decompose()\n",
+ " self.text = soup.body.get_text(separator=\"\\n\", strip=True) if soup.body else \"\"\n",
+ " \n",
+ " finally:\n",
+ " await browser.close()\n",
+ "\n",
+ "# Modified summarize function to use the factory method\n",
+ "async def summarize(url):\n",
+ " website = await Website.create(url)\n",
+ " response = openai.chat.completions.create(\n",
+ " model=\"gpt-4o-mini\",\n",
+ " messages=messages_for(website)\n",
+ " )\n",
+ " return response.choices[0].message.content\n",
+ "\n",
+ "# Display function remains the same\n",
+ "async def display_summary(url):\n",
+ " summary = await summarize(url)\n",
+ " display(Markdown(summary))\n",
+ "\n",
+ "# Usage\n",
+ "await display_summary(\"https://openai.com\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "78e0d270",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "llms",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.13"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/environment.yml b/environment.yml
index 470b64b..3d39950 100644
--- a/environment.yml
+++ b/environment.yml
@@ -44,3 +44,4 @@ dependencies:
- twilio
- pydub
- protobuf==3.20.2
+ - wandb
diff --git a/requirements.txt b/requirements.txt
index edcb3de..9d62d7d 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -36,3 +36,4 @@ speedtest-cli
sentence_transformers
feedparser
protobuf==3.20.2
+wandb
diff --git a/week1/community-contributions/Business_Use_Case_Resume_Upgrader.ipynb b/week1/community-contributions/Business_Use_Case_Resume_Upgrader.ipynb
new file mode 100644
index 0000000..173dafd
--- /dev/null
+++ b/week1/community-contributions/Business_Use_Case_Resume_Upgrader.ipynb
@@ -0,0 +1,179 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "606e9c73-50fe-46b9-8df3-ae2246c00a3e",
+ "metadata": {},
+ "source": [
+ "# Business Use Case - LLM based Resume Upgrader"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "919f6546-80ec-4d4c-8a80-00228f50e4a0",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# imports\n",
+ "\n",
+ "import os\n",
+ "from openai import OpenAI\n",
+ "from dotenv import load_dotenv\n",
+ "from IPython.display import Markdown, display"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "b2f5b02c-f782-4578-8a91-07891c39ceb0",
+ "metadata": {},
+ "source": [
+ "steps to perform\n",
+ "-> load API key from env file\n",
+ "-> create a function to call llm api\n",
+ "-> create messages for system prompt and user prompt\n",
+ "-> display the llm output"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "31aaa20e-4996-43cb-b43a-a1aef80fd391",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "load_dotenv()\n",
+ "api_key = os.getenv('OPENAI_API_KEY')\n",
+ "# error handling\n",
+ "\n",
+ "if not api_key:\n",
+ " print(\"No API key was found - please head over to the troubleshooting notebook in this folder to identify & fix!\")\n",
+ "elif not api_key.startswith(\"sk-proj-\"):\n",
+ " print(\"An API key was found, but it doesn't start sk-proj-; please check you're using the right key - see troubleshooting notebook\")\n",
+ "elif api_key.strip() != api_key:\n",
+ " print(\"An API key was found, but it looks like it might have space or tab characters at the start or end - please remove them - see troubleshooting notebook\")\n",
+ "else:\n",
+ " print(\"API key found and looks good so far!\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "92f65c91-ca7f-47e6-9fd7-d63b278ba264",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "openai = OpenAI()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "98fc7bac-07c8-4801-9225-8f843837f3c2",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# system prompt\n",
+ "\n",
+ "system_prompt = \"\"\"You are a helpful resume editor assistant that provides required assistance in changing a resume to match the given job descrption role \\\n",
+ "You are given a resume and job description, your job is to understand the resume and job description to suggest upto 6 missing key words in the resume. Then you have to \n",
+ "suggest how the user can improve his resume by giving upto 3 example sentences using the suggest keywords to fit into their resume.\n",
+ "by using the following structure provide your response \\\n",
+ "Sturcture:\n",
+ "Job role : [Job Role]:\n",
+ "Candidate Name : [Candidate Name]\n",
+ "Missing Key words in Resume Based on Given job description:\n",
+ " - [] Missing key words\n",
+ " -[] Missing key words\n",
+ "\n",
+ "\n",
+ "Suggestion:\n",
+ " - [] # write a sentence including the key words to put them in the resume\n",
+ " - [] # write a sentence including the key words to put them in the resume\n",
+ "\n",
+ "Guidelines:\n",
+ "- give proper keyword suggestions which are essential for the job function. Do not give any unnecesary suggestions\n",
+ "- Keep the suggested sentences less that 50 words\n",
+ "- \n",
+ "\"\"\"\n",
+ "user_prompt = f'Give me suggestions on how to improve my resume and for the given job description '\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "0d9c40b5-8e27-41b9-8b88-2c83e7d2b3ec",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# call openai api\n",
+ "def resume_upgrader(resume:str, job_description:str):\n",
+ " user_prompt = f'Give me suggestions on how to improve my resume {resume} and for the given job description {job_description}'\n",
+ " messages = [\n",
+ " {'role': 'system', 'content': system_prompt},\n",
+ " {'role': 'user', 'content': user_prompt}\n",
+ " ]\n",
+ " try:\n",
+ " \n",
+ " response = openai.chat.completions.create(model =\"gpt-4o-mini\", messages = messages)\n",
+ " return response.choices[0].message.content\n",
+ " except:\n",
+ " print('got error while retting the response from api')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "5aa29465-c119-4178-90f1-3ebdc9eeb11a",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def print_api_response(response_markdown):\n",
+ " \"\"\"Print the markdown response\"\"\"\n",
+ " display(Markdown(response_markdown))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "82a92034-6722-4e78-a901-b4ef2b9cbb84",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "resume = input(\"Paste your resume in here\")\n",
+ "job_description = input(\"paste your job descritpion here\")\n",
+ "response = resume_upgrader(resume, job_description)\n",
+ "print_api_response(response)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "d0be536f-e890-473f-8c68-767bc0e3b47c",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.13"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/week1/community-contributions/D2-property-rental-assistant/README.md b/week1/community-contributions/D2-property-rental-assistant/README.md
new file mode 100644
index 0000000..374cd58
--- /dev/null
+++ b/week1/community-contributions/D2-property-rental-assistant/README.md
@@ -0,0 +1,189 @@
+# AI Property Rental Assistant
+
+An intelligent property rental assistant Jupyter notebook that scrapes real estate listings from OnTheMarket and uses a local LLM (DeepSeek R1) to analyze and recommend properties based on user requirements.
+
+## Features
+
+- **Web Scraping**: Automatically fetches property listings from OnTheMarket
+- **AI-Powered Analysis**: Uses DeepSeek R1 model via Ollama for intelligent recommendations
+- **Personalized Recommendations**: Filters and ranks properties based on:
+ - Budget constraints
+ - Number of bedrooms
+ - Tenant type (student, family, professional)
+ - Location preferences
+- **Clean Output**: Returns formatted markdown with top 3-5 property recommendations
+- **Smart Filtering**: Handles cases where no suitable properties are found with helpful suggestions
+
+## Prerequisites
+
+- Python 3.7+
+- Ollama installed and running locally
+- DeepSeek R1 14B model pulled in Ollama
+
+## Installation
+
+1. **Clone the repository**
+```bash
+git clone
+cd property-rental-assistant
+```
+
+2. **Install required Python packages**
+```bash
+pip install requests beautifulsoup4 ollama ipython jupyter
+```
+
+3. **Install and setup Ollama**
+```bash
+# Install Ollama (macOS/Linux)
+curl -fsSL https://ollama.ai/install.sh | sh
+
+# For Windows, download from: https://ollama.ai/download
+```
+
+4. **Pull the DeepSeek R1 model**
+```bash
+ollama pull deepseek-r1:14b
+```
+
+5. **Start Ollama server**
+```bash
+ollama serve
+```
+
+## Usage
+
+### Running the Notebook
+
+1. **Start Jupyter Notebook**
+```bash
+jupyter notebook
+```
+
+2. **Open the notebook**
+Navigate to `property_rental_assistant.ipynb` in the Jupyter interface
+
+3. **Run all cells**
+Click `Cell` โ `Run All` or use `Shift + Enter` to run cells individually
+
+### Customizing Search Parameters
+
+Modify the `user_needs` variable in the notebook:
+```python
+user_needs = "I'm a student looking for a 2-bedroom house in Durham under ยฃ2,000/month"
+```
+
+Other examples:
+- `"Family of 4 looking for 3-bedroom house with garden in Durham, budget ยฃ2,500/month"`
+- `"Professional couple seeking modern 1-bed apartment near city center, max ยฃ1,500/month"`
+- `"Student group needs 4-bedroom house near Durham University, ยฃ600/month per person"`
+
+### Changing the Property Website
+
+Update the `website_url` variable in the notebook:
+```python
+website_url = "https://www.onthemarket.com/to-rent/property/durham/"
+```
+
+## Architecture
+
+```
+โโโโโโโโโโโโโโโโโโโ โโโโโโโโโโโโโโโโ โโโโโโโโโโโโโโโ
+โ OnTheMarket โโโโโโถโ Web Scraper โโโโโโถโ Ollama โ
+โ Website โ โ (BeautifulSoup)โ โ (DeepSeek R1)โ
+โโโโโโโโโโโโโโโโโโโ โโโโโโโโโโโโโโโโ โโโโโโโโโโโโโโโ
+ โ
+ โผ
+ โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
+ โ AI-Generated Recommendations โ
+ โ โข Top 5 matching properties โ
+ โ โข Filtered by requirements โ
+ โ โข Markdown formatted output โ
+ โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
+```
+
+## Project Structure
+
+```
+property-rental-assistant/
+โ
+โโโ property_rental_assistant.ipynb # Main Jupyter notebook
+โโโ README.md # This file
+```
+
+## ๐ง Configuration
+
+### Ollama API Settings
+```python
+OLLAMA_API = "http://localhost:11434/api/chat" # Default Ollama endpoint
+MODEL = "deepseek-r1:14b" # Model to use
+```
+
+### Web Scraping Settings
+```python
+headers = {
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
+}
+timeout = 10 # Request timeout in seconds
+```
+
+### Content Limits
+```python
+website.text[:4000] # Truncate content to 4000 chars for token limits
+```
+
+## How It Works
+
+1. **Web Scraping**: The `Website` class fetches and parses HTML content from the property listing URL
+2. **Content Cleaning**: Removes scripts, styles, and images to extract clean text
+3. **Prompt Engineering**: Combines system prompt with user requirements and scraped data
+4. **LLM Analysis**: Sends the prompt to DeepSeek R1 via Ollama API
+5. **Recommendation Generation**: The AI analyzes listings and returns top matches in markdown format
+
+## ๐ ๏ธ Troubleshooting
+
+### Ollama Connection Error
+```
+Error communicating with Ollama: [Errno 111] Connection refused
+```
+**Solution**: Ensure Ollama is running with `ollama serve`
+
+### Model Not Found
+```
+Error: model 'deepseek-r1:14b' not found
+```
+**Solution**: Pull the model with `ollama pull deepseek-r1:14b`
+
+### Web Scraping Blocked
+```
+Error fetching website: 403 Forbidden
+```
+**Solution**: The website may be blocking automated requests. Try:
+- Updating the User-Agent string
+- Adding delays between requests
+- Using a proxy or VPN
+
+### Insufficient Property Data
+If recommendations are poor quality, the scraper may not be capturing listing details properly. Check:
+- The website structure hasn't changed
+- The content truncation limit (4000 chars) isn't too restrictive
+
+## Future Enhancements
+
+- [ ] Support multiple property websites (Rightmove, Zoopla, SpareRoom)
+- [ ] Interactive CLI for dynamic user input
+- [ ] Property image analysis
+- [ ] Save search history and favorite properties
+- [ ] Email notifications for new matching properties
+- [ ] Price trend analysis
+- [ ] Commute time calculations to specified locations
+- [ ] Multi-language support
+- [ ] Web interface with Flask/FastAPI
+- [ ] Docker containerization
+
+## Acknowledgments
+
+- [Ollama](https://ollama.ai/) for local LLM hosting
+- [DeepSeek](https://www.deepseek.com/) for the R1 model
+- [BeautifulSoup](https://www.crummy.com/software/BeautifulSoup/) for web scraping
+- [OnTheMarket](https://www.onthemarket.com/) for property data
diff --git a/week1/community-contributions/D2-property-rental-assistant/day2.ipynb b/week1/community-contributions/D2-property-rental-assistant/day2.ipynb
new file mode 100644
index 0000000..4c8dc5e
--- /dev/null
+++ b/week1/community-contributions/D2-property-rental-assistant/day2.ipynb
@@ -0,0 +1,217 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "57112e5c-7b0f-4ba7-9022-ae21e8ac0f42",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# imports\n",
+ "\n",
+ "import requests\n",
+ "from bs4 import BeautifulSoup\n",
+ "from IPython.display import Markdown, display"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "3b71a051-fc0e-46a9-8b1b-b58f685e800d",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Constants\n",
+ "OLLAMA_API = \"http://localhost:11434/api/chat\"\n",
+ "HEADERS = {\"Content-Type\": \"application/json\"}\n",
+ "MODEL = \"deepseek-r1:14b\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "ed3be9dc-d459-46ac-a8eb-f9b932c4302f",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "headers = {\n",
+ " \"User-Agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36\"\n",
+ "}\n",
+ "\n",
+ "class Website:\n",
+ " def __init__(self, url):\n",
+ " self.url = url\n",
+ " try:\n",
+ " response = requests.get(url, headers=headers, timeout=10)\n",
+ " response.raise_for_status()\n",
+ " soup = BeautifulSoup(response.content, 'html.parser')\n",
+ " self.title = soup.title.string if soup.title else \"No title found\"\n",
+ " if soup.body:\n",
+ " for irrelevant in soup.body([\"script\", \"style\", \"img\", \"input\"]):\n",
+ " irrelevant.decompose()\n",
+ " self.text = soup.body.get_text(separator=\"\\n\", strip=True)\n",
+ " else:\n",
+ " self.text = \"No body content found\"\n",
+ " except requests.RequestException as e:\n",
+ " print(f\"Error fetching website: {e}\")\n",
+ " self.title = \"Error loading page\"\n",
+ " self.text = \"Could not load page content\""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "17ea76f8-38d9-40b9-8aba-eb957d690a0d",
+ "metadata": {},
+ "source": [
+ "## Without Ollama package"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "3a6fd698-8e59-4cd7-bb53-b9375e50f899",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def house_renting(system_prompt, user_prompt):\n",
+ " messages = [\n",
+ " {\"role\": \"system\", \"content\": system_prompt},\n",
+ " {\"role\": \"user\", \"content\": user_prompt}\n",
+ " ]\n",
+ " payload = {\n",
+ " \"model\": MODEL,\n",
+ " \"messages\": messages,\n",
+ " \"stream\": False\n",
+ " }\n",
+ " response = requests.post(OLLAMA_API, json=payload, headers=HEADERS)\n",
+ " return response.json()['message']['content']"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "c826a52c-d1d3-493a-8b7c-6e75b848b453",
+ "metadata": {},
+ "source": [
+ "## Introducing Ollama package "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "519e27da-eeff-4c1b-a8c6-e680fdf01da2",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import ollama\n",
+ "\n",
+ "def house_renting_ollama(system_prompt, user_prompt):\n",
+ " try:\n",
+ " messages = [\n",
+ " {\"role\": \"system\", \"content\": system_prompt},\n",
+ " {\"role\": \"user\", \"content\": user_prompt}\n",
+ " ]\n",
+ " response = ollama.chat(model=MODEL, messages=messages)\n",
+ " return response['message']['content']\n",
+ " except Exception as e:\n",
+ " return f\"Error communicating with Ollama: {e}\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "60e98b28-06d9-4303-b8ca-f7b798244eb4",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "system_prompt = \"\"\"\n",
+ "You are a helpful real estate assistant specializing in UK property rentals. Your job is to guide users in finding houses to rent, especially in Durham. Follow these rules:\n",
+ "1. Always ask clarifying questions if user input is vague. Determine location, budget, number of bedrooms, and tenant type (e.g. student, family, professional).\n",
+ "2. Use structured data provided from the website (like property listings) to identify relevant options.\n",
+ "3. If listings are provided, filter and rank them based on the user's preferences.\n",
+ "4. Recommend up to 5 top properties with rent price, bedroom count, key features, and location.\n",
+ "5. Always respond in markdown with clean formatting using headers, bold text, and bullet points.\n",
+ "6. If no listings match well, provide tips (e.g. \"try adjusting your budget or search radius\").\n",
+ "7. Stay concise, helpful, and adapt to whether the user is a student, family, couple, or solo tenant.\n",
+ "\"\"\"\n",
+ "\n",
+ "def user_prompt_for_renting(website, user_needs):\n",
+ " return f\"\"\"\n",
+ "I want to rent a house and here's what I'm looking for:\n",
+ "{user_needs}\n",
+ "\n",
+ "Here are the property listings I found on the website titled: \"{website.title}\".\n",
+ "\n",
+ "Please analyze them and recommend the best 3โ5 options that match my needs. If none are suitable, tell me why and offer suggestions.\n",
+ "\n",
+ "The page content is below:\n",
+ "{website.text[:4000]}\n",
+ "\"\"\" # content is truncated for token limits"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "ef420f4b-e3d2-4fbd-bf6f-811f2c8536e0",
+ "metadata": {},
+ "source": [
+ "## Ollama Package"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "1cf128af-4ece-41ab-b353-5c8564c7de1d",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "if __name__ == \"__main__\": \n",
+ " print(\"Starting AI Property Rental Assistant...\")\n",
+ " print(\"=\" * 50)\n",
+ " \n",
+ " website_url = \"https://www.onthemarket.com/to-rent/property/durham/\"\n",
+ " print(f\"๐ Scraping properties from: {website_url}\")\n",
+ " \n",
+ " website = Website(website_url)\n",
+ " print(f\"Website Title: {website.title}\")\n",
+ " print(f\"Content Length: {len(website.text)} characters\")\n",
+ " print(f\"Successfully scraped property listings\\n\")\n",
+ " \n",
+ " user_needs = \"I'm a student looking for a 2-bedroom house in Durham under ยฃ2,000/month\"\n",
+ " print(f\"User Requirements: {user_needs}\\n\")\n",
+ " \n",
+ " user_prompt = user_prompt_for_renting(website, user_needs)\n",
+ " print(\"Generating AI recommendations...\")\n",
+ " \n",
+ " # Choose which method to use (comment out the one you don't want)\n",
+ " \n",
+ " # Method 1: Using ollama Python library\n",
+ " output = house_renting_ollama(system_prompt, user_prompt)\n",
+ " \n",
+ " # Method 2: Using direct API call\n",
+ " # output = house_renting(system_prompt, user_prompt)\n",
+ " \n",
+ " display(Markdown(output))"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python [conda env:llms]",
+ "language": "python",
+ "name": "conda-env-llms-py"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.13"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/week1/community-contributions/Day-1_email_summarizers.ipynb b/week1/community-contributions/Day-1_email_summarizers.ipynb
new file mode 100644
index 0000000..d2a4597
--- /dev/null
+++ b/week1/community-contributions/Day-1_email_summarizers.ipynb
@@ -0,0 +1,103 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "d7a6bb51",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# import library\n",
+ "from openai import OpenAI\n",
+ "import os\n",
+ "from dotenv import load_dotenv\n",
+ "\n",
+ "# Load your API key from an .env file\n",
+ "load_dotenv(override=True)\n",
+ "api_key = os.getenv('OPENAI_API_KEY')\n",
+ "openai = OpenAI()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "7ac4cdf9",
+ "metadata": {
+ "vscode": {
+ "languageId": "plaintext"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "# Step 1: Create your prompts\n",
+ "system_prompt = \"you are a helpful assistant that suggests an appropriate short subject line for an email based on its contents.\"\n",
+ "\n",
+ "user_prompt = \"\"\"\n",
+ "Hi John,\n",
+ "I hope this email finds you well. I wanted to follow up on our meeting last week regarding the quarterly budget proposal.\n",
+ "After reviewing the numbers with my team, we've identified some areas where we can reduce costs by approximately 15% without impacting our core operations. This would involve consolidating some vendor contracts and optimizing our software licensing.\n",
+ "Could we schedule a meeting next week to discuss these findings in detail? I'm available Tuesday through Thursday afternoon.\n",
+ "Looking forward to hearing from you.\n",
+ "\n",
+ "Best regards,\n",
+ "Sarah\n",
+ "\"\"\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "a77ca09e",
+ "metadata": {
+ "vscode": {
+ "languageId": "plaintext"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "# Step 2: Make the messages list\n",
+ "messages = [\n",
+ " {\"role\": \"system\", \"content\": system_prompt},\n",
+ " {\"role\": \"user\", \"content\": user_prompt}\n",
+ "]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "8404f0fe",
+ "metadata": {
+ "vscode": {
+ "languageId": "plaintext"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "# Step 3: Call OpenAI\n",
+ "response = openai.chat.completions.create(model=\"gpt-4o-mini\", messages=messages)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "7a4875f7",
+ "metadata": {
+ "vscode": {
+ "languageId": "plaintext"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "# Step 4: Print the result\n",
+ "print(response.choices[0].message.content)"
+ ]
+ }
+ ],
+ "metadata": {
+ "language_info": {
+ "name": "python"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/week1/community-contributions/Day-2_exercise_with_ollama3.ipynb b/week1/community-contributions/Day-2_exercise_with_ollama3.ipynb
new file mode 100644
index 0000000..1168770
--- /dev/null
+++ b/week1/community-contributions/Day-2_exercise_with_ollama3.ipynb
@@ -0,0 +1,290 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "id": "135717e7",
+ "metadata": {
+ "vscode": {
+ "languageId": "plaintext"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "# imports\n",
+ "\n",
+ "import os\n",
+ "import requests\n",
+ "from dotenv import load_dotenv\n",
+ "from bs4 import BeautifulSoup\n",
+ "from IPython.display import Markdown, display\n",
+ "from openai import OpenAI\n",
+ "import ollama"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "29a9e634",
+ "metadata": {
+ "vscode": {
+ "languageId": "plaintext"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "# OPTION 1\n",
+ "# using openai\n",
+ "\n",
+ "# message = \"Hello, GPT! This is my first ever message to you! Hi!\"\n",
+ "# client = OpenAI(base_url=\"http://localhost:11434/v1\", api_key=\"not-needed\")\n",
+ "# response = openai.chat.completions.create(model=``, messages=[{\"role\":\"user\", \"content\":message}])\n",
+ "# print(response.choices[0].message.content)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "306993ed",
+ "metadata": {
+ "vscode": {
+ "languageId": "plaintext"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "# OPTION 2\n",
+ "# using Ollama\n",
+ "\n",
+ "message = \"Hello, GPT! This is my first ever message to you! Hi!\"\n",
+ "model=\"llama3\"\n",
+ "response=ollama.chat(model=model,messages=[{\"role\":\"user\",\"content\":message}])\n",
+ "print(response[\"message\"][\"content\"])\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "856f767b",
+ "metadata": {
+ "vscode": {
+ "languageId": "plaintext"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "# A class to represent a Webpage\n",
+ "# If you're not familiar with Classes, check out the \"Intermediate Python\" notebook\n",
+ "\n",
+ "# Some websites need you to use proper headers when fetching them:\n",
+ "headers = {\n",
+ " \"User-Agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36\"\n",
+ "}\n",
+ "\n",
+ "class Website:\n",
+ "\n",
+ " def __init__(self, url):\n",
+ " \"\"\"\n",
+ " Create this Website object from the given url using the BeautifulSoup library\n",
+ " \"\"\"\n",
+ " self.url = url\n",
+ " response = requests.get(url, headers=headers)\n",
+ " soup = BeautifulSoup(response.content, 'html.parser')\n",
+ " self.title = soup.title.string if soup.title else \"No title found\"\n",
+ " for irrelevant in soup.body([\"script\", \"style\", \"img\", \"input\"]):\n",
+ " irrelevant.decompose()\n",
+ " self.text = soup.body.get_text(separator=\"\\n\", strip=True)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "id": "4ce558dc",
+ "metadata": {
+ "vscode": {
+ "languageId": "plaintext"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "# Let's try one out. Change the website and add print statements to follow along.\n",
+ "\n",
+ "ed = Website(\"https://edwarddonner.com\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "id": "5e3956f8",
+ "metadata": {
+ "vscode": {
+ "languageId": "plaintext"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "# Define our system prompt - you can experiment with this later, changing the last sentence to 'Respond in markdown in Spanish.\"\n",
+ "\n",
+ "system_prompt = \"You are an assistant that analyzes the contents of a website \\\n",
+ "and provides a short summary, ignoring text that might be navigation related. \\\n",
+ "Respond in markdown.\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "id": "99d791b4",
+ "metadata": {
+ "vscode": {
+ "languageId": "plaintext"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "# A function that writes a User Prompt that asks for summaries of websites:\n",
+ "\n",
+ "def user_prompt_for(website):\n",
+ " user_prompt = f\"You are looking at a website titled {website.title}\"\n",
+ " user_prompt += \"\\nThe contents of this website is as follows; \\\n",
+ "please provide a short summary of this website in markdown. \\\n",
+ "If it includes news or announcements, then summarize these too.\\n\\n\"\n",
+ " user_prompt += website.text\n",
+ " return user_prompt"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "id": "5d89b748",
+ "metadata": {
+ "vscode": {
+ "languageId": "plaintext"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "# See how this function creates exactly the format above\n",
+ "\n",
+ "def messages_for(website):\n",
+ " return [\n",
+ " {\"role\": \"system\", \"content\": system_prompt},\n",
+ " {\"role\": \"user\", \"content\": user_prompt_for(website)}\n",
+ " ]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "id": "9a97d3e2",
+ "metadata": {
+ "vscode": {
+ "languageId": "plaintext"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "# And now: call the OpenAI API. You will get very familiar with this!\n",
+ "\n",
+ "def summarize(url):\n",
+ " website = Website(url)\n",
+ " response=ollama.chat(model=model,messages=messages_for(website))\n",
+ " return(response[\"message\"][\"content\"])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "ec13fe0a",
+ "metadata": {
+ "vscode": {
+ "languageId": "plaintext"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "summarize(\"https://edwarddonner.com\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "id": "e3ade092",
+ "metadata": {
+ "vscode": {
+ "languageId": "plaintext"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "# A function to display this nicely in the Jupyter output, using markdown\n",
+ "\n",
+ "def display_summary(url):\n",
+ " summary = summarize(url)\n",
+ " display(Markdown(summary))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "be2d49e6",
+ "metadata": {
+ "vscode": {
+ "languageId": "plaintext"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "display_summary(\"https://edwarddonner.com\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "1ccbf33b",
+ "metadata": {
+ "vscode": {
+ "languageId": "plaintext"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "display_summary(\"https://cnn.com\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "ae3d0eae",
+ "metadata": {
+ "vscode": {
+ "languageId": "plaintext"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "display_summary(\"https://anthropic.com\")"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.13"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/week1/community-contributions/Top Tech products.ipynb b/week1/community-contributions/Top Tech products.ipynb
new file mode 100644
index 0000000..53b4841
--- /dev/null
+++ b/week1/community-contributions/Top Tech products.ipynb
@@ -0,0 +1,181 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "bbd8585e-0a28-4fd9-80b5-690569f93e16",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#This notebook will help you to get top tech products with by providing category and subcategory"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "df039118-f462-4a8b-949e-53d3a726e292",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# imports\n",
+ "\n",
+ "import os\n",
+ "import requests\n",
+ "from dotenv import load_dotenv\n",
+ "from bs4 import BeautifulSoup\n",
+ "from IPython.display import Markdown, display\n",
+ "from openai import OpenAI\n",
+ "aa"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "e2ffd2e5-d061-446c-891e-15a6d1958ab6",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Load environment variables in a file called .env\n",
+ "\n",
+ "load_dotenv(override=True)\n",
+ "api_key = os.getenv('OPENAI_API_KEY')\n",
+ "\n",
+ "# Check the key\n",
+ "\n",
+ "if not api_key:\n",
+ " print(\"No API key was found - please head over to the troubleshooting notebook in this folder to identify & fix!\")\n",
+ "elif not api_key.startswith(\"sk-proj-\"):\n",
+ " print(\"An API key was found, but it doesn't start sk-proj-; please check you're using the right key - see troubleshooting notebook\")\n",
+ "elif api_key.strip() != api_key:\n",
+ " print(\"An API key was found, but it looks like it might have space or tab characters at the start or end - please remove them - see troubleshooting notebook\")\n",
+ "else:\n",
+ " print(\"API key found and looks good so far!\")\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "92e26007-521f-4ea2-9df9-edd77dd7e183",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "openai = OpenAI()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "27d21593-8feb-42e4-bbc0-2e949b51137d",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def tech_product(category_subcategory_budget):\n",
+ " parts = category_subcategory_budget.split('_')\n",
+ " return f\"{parts[0]}-{parts[1]}-{parts[2]}\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "dd978d25-5b84-4122-af7c-116f2bf72179",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def messages_for(products):\n",
+ " return [\n",
+ " {\"role\": \"system\", \"content\": \"you are a tech product expert and you need to suggest the best suited product available in India basis the input received in the form of category-subcategory-budget (in inr),\\\n",
+ " revert with category and subcategory and show the product links as well along with pros and cons, respond in markdown\"},\n",
+ " {\"role\": \"user\", \"content\": tech_product(products)}\n",
+ " ]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "b916db7a-81a4-41d9-87c2-a2346fd874d2",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "messages_for(\"phone_gaming_40000\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "3b4bb3f1-95de-4eb5-afe1-068744f93301",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def get_top_products(category_subcategory):\n",
+ " response = openai.chat.completions.create(\n",
+ " model = \"gpt-4o-mini\",\n",
+ " messages= messages_for(category_subcategory)\n",
+ " )\n",
+ " return response.choices[0].message.content \n",
+ " "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "c9272942-acfe-4fca-bd0a-3435c1ee6691",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "get_top_products('phone_gaming_30000')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "2c2b3b9a-aceb-4f00-8c8d-8f6837ab94fc",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def display_markdown(category_subcategory_budget):\n",
+ " output = get_top_products(category_subcategory_budget)\n",
+ " display(Markdown(output))\n",
+ " "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "6c135dd7-4ed4-48ee-ba3f-9b4ca1c32149",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "display_markdown('Console_gaming_100000')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "0ba06c55-7ef9-47eb-aeaf-3c4a7b29bccc",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.13"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/week1/community-contributions/ai-powered-marketing-brochures-gpt-5/.gitignore b/week1/community-contributions/ai-powered-marketing-brochures-gpt-5/.gitignore
new file mode 100644
index 0000000..290698f
--- /dev/null
+++ b/week1/community-contributions/ai-powered-marketing-brochures-gpt-5/.gitignore
@@ -0,0 +1,210 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[codz]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+# Usually these files are written by a python script from a template
+# before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py.cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+# For a library or package, you might want to ignore these files since the code is
+# intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+# However, in case of collaboration, if having platform-specific dependencies or dependencies
+# having no cross-platform support, pipenv may install dependencies that don't work, or not
+# install all needed dependencies.
+#Pipfile.lock
+
+# UV
+# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
+# This is especially recommended for binary packages to ensure reproducibility, and is more
+# commonly ignored for libraries.
+#uv.lock
+
+# poetry
+# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+# This is especially recommended for binary packages to ensure reproducibility, and is more
+# commonly ignored for libraries.
+# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+#poetry.toml
+
+# pdm
+# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+# pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
+# https://pdm-project.org/en/latest/usage/project/#working-with-version-control
+#pdm.lock
+#pdm.toml
+.pdm-python
+.pdm-build/
+
+# pixi
+# Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
+#pixi.lock
+# Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
+# in the .venv directory. It is recommended not to include this directory in version control.
+.pixi
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.envrc
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+
+# PyCharm
+# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+# and can be added to the global gitignore or merged into this file. For a more nuclear
+# option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+
+# Abstra
+# Abstra is an AI-powered process automation framework.
+# Ignore directories containing user credentials, local state, and settings.
+# Learn more at https://abstra.io/docs
+.abstra/
+
+# Visual Studio Code
+# Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
+# that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
+# and can be added to the global gitignore or merged into this file. However, if you prefer,
+# you could uncomment the following to ignore the entire vscode folder
+# .vscode/
+
+# Ruff stuff:
+.ruff_cache/
+
+# PyPI configuration file
+.pypirc
+
+# Cursor
+# Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to
+# exclude from AI features like autocomplete and code analysis. Recommended for sensitive data
+# refer to https://docs.cursor.com/context/ignore-files
+.cursorignore
+.cursorindexingignore
+
+# Marimo
+marimo/_static/
+marimo/_lsp/
+__marimo__/
+
+
+.*-env
\ No newline at end of file
diff --git a/week1/community-contributions/ai-powered-marketing-brochures-gpt-5/ai-brochure-creator.py b/week1/community-contributions/ai-powered-marketing-brochures-gpt-5/ai-brochure-creator.py
new file mode 100644
index 0000000..79f3246
--- /dev/null
+++ b/week1/community-contributions/ai-powered-marketing-brochures-gpt-5/ai-brochure-creator.py
@@ -0,0 +1,207 @@
+from ai_core import AICore
+from ai_brochure_config import AIBrochureConfig
+from extractor_of_relevant_links import ExtractorOfRelevantLinks
+from website import Website
+from openai.types.responses import Response
+from rich.console import Console
+from rich.markdown import Markdown
+from requests import Session
+from concurrent.futures import ThreadPoolExecutor, as_completed
+from json import loads
+
+class BrochureCreator(AICore[str]):
+ """
+ Builds a short Markdown brochure for a company or individual by:
+ - extracting relevant links from the website,
+ - inferring the entity name and status,
+ - and prompting the model using the collected page content.
+ """
+
+ @property
+ def _website(self) -> Website:
+ """Return the main Website instance to analyze."""
+ return self.__website
+
+ @property
+ def _extractor(self) -> ExtractorOfRelevantLinks:
+ """Return the helper responsible for extracting relevant links."""
+ return self.__extractor
+
+ def __init__(self, config: AIBrochureConfig, website: Website) -> None:
+ """
+ Initialize the brochure creator with configuration and target website.
+
+ Parameters:
+ config: AI and runtime configuration.
+ website: The root website to analyze and summarize.
+ """
+ system_behavior: str = ("You are an assistant that analyzes the contents of several relevant pages from a company website "
+ "and creates a short brochure about the company for prospective customers, investors and recruits. "
+ "Include details of company culture, customers and careers/jobs if information is available. ")
+ super().__init__(config, system_behavior)
+ self.__website: Website = website
+ self.__extractor: ExtractorOfRelevantLinks = ExtractorOfRelevantLinks(config, website)
+
+ def create_brochure(self) -> str:
+ """
+ Create a short Markdown brochure based on the website's content.
+
+ Returns:
+ A Markdown string with the brochure, or a fallback message if no relevant pages were found.
+ """
+ relevant_pages: list[dict[str, str | Website]] = self._get_relevant_pages()
+ if not relevant_pages:
+ return "No relevant pages found to create a brochure."
+
+ brochure_prompt_part: str = self._form_brochure_prompt(relevant_pages)
+ inferred_company_name, inferred_status = self._infer_entity(brochure_prompt_part)
+
+ full_brochure_prompt: str = self._form_full_prompt(inferred_company_name, inferred_status)
+ response: str = self.ask(full_brochure_prompt)
+ return response
+
+ def _get_relevant_pages(self) -> list[dict[str, str | Website]]:
+ """
+ Resolve relevant links into Website objects using a shared session and concurrency.
+ """
+ relevant_pages: list[dict[str, str | Website]] = []
+ relevant_links: list[dict[str, str]] = self._extractor.extract_relevant_links()["links"]
+ # Limit the number of pages to fetch to keep latency and token usage reasonable.
+ MAX_PAGES: int = 6
+ links_subset = relevant_links[:MAX_PAGES]
+
+ def build_page(item: dict[str, str], session: Session) -> dict[str, str | Website] | None:
+ try:
+ url = str(item["url"])
+ page_type = str(item["type"])
+ return {"type": page_type, "page": Website(url, session=session)}
+ except Exception:
+ return None
+
+ with Session() as session, ThreadPoolExecutor(max_workers=4) as executor:
+ futures = [executor.submit(build_page, link, session) for link in links_subset]
+ for fut in as_completed(futures):
+ res = fut.result()
+ if res:
+ relevant_pages.append(res)
+
+ return relevant_pages
+
+ def _truncate_text(self, text: str, limit: int) -> str:
+ """
+ Truncate text to 'limit' characters to reduce tokens and latency.
+ """
+ if len(text) <= limit:
+ return text
+ return text[: max(0, limit - 20)] + "... [truncated]"
+
+ def _form_brochure_prompt(self, relevant_pages: list[dict[str, str | Website]]) -> str:
+ """
+ Assemble a prompt that includes the main page and relevant pages' titles and text.
+
+ Parameters:
+ relevant_pages: List of page descriptors returned by _get_relevant_pages.
+
+ Returns:
+ A prompt string containing quoted sections per page.
+ """
+ QUOTE_DELIMITER: str = "\n\"\"\"\n"
+ MAX_MAIN_CHARS = 6000
+ MAX_PAGE_CHARS = 3000
+ prompt: str = (
+ f"Main page:{QUOTE_DELIMITER}"
+ f"Title: {self._website.title}\n"
+ f"Text:\n{self._truncate_text(self._website.text, MAX_MAIN_CHARS)}{QUOTE_DELIMITER}\n"
+ )
+
+ for page in relevant_pages:
+ if isinstance(page['page'], Website) and not page['page'].fetch_failed:
+ prompt += (
+ f"{page['type']}:{QUOTE_DELIMITER}"
+ f"Title: {page['page'].title}\n"
+ f"Text:\n{self._truncate_text(page['page'].text, MAX_PAGE_CHARS)}{QUOTE_DELIMITER}\n"
+ )
+
+ return prompt
+
+ def _infer_entity(self, brochure_prompt_part: str) -> tuple[str, str]:
+ """
+ Infer both the entity name and status in a single model call to reduce latency.
+ Returns:
+ (name, status) where status is 'company' or 'individual'.
+ """
+ prompt = (
+ "From the following website excerpts, infer the entity name and whether it is a company or an individual. "
+ "Respond strictly as JSON with keys 'name' and 'status' (status must be 'company' or 'individual').\n"
+ f"{brochure_prompt_part}"
+ )
+ raw = self.ask(prompt)
+ try:
+ data: dict[str, str] = loads(raw)
+ name: str = str(data.get("name", "")).strip() or "Unknown"
+ status: str = str(data.get("status", "")).strip().lower()
+ if status not in ("company", "individual"):
+ status = "company"
+ return name, status
+ except Exception:
+ # Fallback: use entire output as name, assume company
+ return raw.strip() or "Unknown", "company"
+
+ def _form_full_prompt(self, inferred_company_name: str, inferred_status: str) -> str:
+ """
+ Build the final brochure-generation prompt using the inferred entity and prior history.
+
+ Parameters:
+ inferred_company_name: The inferred entity name.
+ inferred_status: Either 'company' or 'individual'.
+
+ Returns:
+ A final prompt instructing the model to produce a Markdown brochure.
+ """
+ full_prompt: str = (f"You are looking at a {inferred_status} called {inferred_company_name}, to whom website {self._website.website_url} belongs.\n"
+ f"Build a short brochure about the {inferred_status}. Use the information from the website that is already stored in the history.\n"
+ "Your response must be in a Markdown format.")
+ return full_prompt
+
+ def ask(self, question: str) -> str:
+ """
+ Send a question to the model, update chat history, and return the text output.
+
+ Parameters:
+ question: The user prompt.
+
+ Returns:
+ The model output text.
+ """
+ self.history_manager.add_user_message(question)
+ response: Response = self._ai_api.responses.create(
+ model=self.config.model_name,
+ instructions=self.history_manager.system_behavior,
+ input=self.history_manager.chat_history,
+ reasoning={ "effort": "low" }
+ )
+ self.history_manager.add_assistant_message(response)
+ return response.output_text
+
+console: Console = Console()
+
+def display_markdown(content: str) -> None:
+ """
+ Render Markdown content to the console using rich.
+ """
+ console.print(Markdown(content))
+
+def show_summary(summary: str) -> None:
+ """
+ Print a Markdown summary if provided; otherwise print a fallback message.
+ """
+ if summary:
+ display_markdown(summary)
+ else:
+ console.print("No summary found.")
+
+if __name__ == "__main__":
+ website: Website = Website("")
+ brochure_creator: BrochureCreator = BrochureCreator(AIBrochureConfig(), website)
+ brochure: str = brochure_creator.create_brochure()
+ display_markdown(brochure)
\ No newline at end of file
diff --git a/week1/community-contributions/ai-powered-marketing-brochures-gpt-5/ai_brochure_config.py b/week1/community-contributions/ai-powered-marketing-brochures-gpt-5/ai_brochure_config.py
new file mode 100644
index 0000000..9a0e2bd
--- /dev/null
+++ b/week1/community-contributions/ai-powered-marketing-brochures-gpt-5/ai_brochure_config.py
@@ -0,0 +1,59 @@
+import os
+from dotenv import load_dotenv
+
+class AIBrochureConfig:
+ """
+ Configuration class to load environment variables.
+ """
+
+ def __get_config_value(self, key: str):
+ """
+ Get the value of an environment variable.
+ """
+ if not key:
+ raise ValueError("Key must be provided")
+
+ value: str | None = os.getenv(key)
+ if not value:
+ raise ValueError(f"Environment variable '{key}' not found")
+
+ return value
+
+ def _get_str(self, key: str) -> str:
+ """
+ Get a string value from the environment variables.
+ """
+ return self.__get_config_value(key)
+
+ def _get_int(self, key: str) -> int:
+ """
+ Get an integer value from the environment variables.
+ """
+ value = self.__get_config_value(key)
+ try:
+ return int(value)
+ except ValueError:
+ raise ValueError(f"Environment variable '{key}' must be an integer")
+
+ @property
+ def openai_api_key(self) -> str:
+ """
+ Get the OpenAI API key from the environment variables.
+ """
+ if self.__openai_api_key == "":
+ self.__openai_api_key = self._get_str("OPENAI_API_KEY")
+ return self.__openai_api_key
+
+ @property
+ def model_name(self) -> str:
+ """
+ Get the model name from the environment variables.
+ """
+ if self.__model_name == "":
+ self.__model_name = self._get_str("MODEL_NAME")
+ return self.__model_name
+
+ def __init__(self) -> None:
+ load_dotenv(dotenv_path=".env")
+ self.__openai_api_key: str = ""
+ self.__model_name: str = ""
diff --git a/week1/community-contributions/ai-powered-marketing-brochures-gpt-5/ai_core.py b/week1/community-contributions/ai-powered-marketing-brochures-gpt-5/ai_core.py
new file mode 100644
index 0000000..e517f9d
--- /dev/null
+++ b/week1/community-contributions/ai-powered-marketing-brochures-gpt-5/ai_core.py
@@ -0,0 +1,181 @@
+import openai
+from abc import ABC, abstractmethod
+from ai_brochure_config import AIBrochureConfig
+from typing import Any, cast, Generic, TypeVar
+from openai.types.responses import ResponseInputItemParam, Response, ResponseOutputMessage
+
+TAiResponse = TypeVar('TAiResponse', default=Any)
+
+class HistoryManager:
+ """
+ Manage chat history and system behavior for a conversation with the model.
+ """
+ @property
+ def chat_history(self) -> list[ResponseInputItemParam]:
+ """
+ Return the accumulated conversation as a list of response input items.
+ """
+ return self.__chat_history
+
+ @property
+ def system_behavior(self) -> str:
+ """
+ Return the system behavior (instructions) used for this conversation.
+ """
+ return self.__system_behavior
+
+ def __init__(self, system_behavior: str) -> None:
+ """
+ Initialize the history manager.
+
+ Parameters:
+ system_behavior: The system instruction string for the conversation.
+ """
+ self.__chat_history: list[ResponseInputItemParam] = []
+ self.__system_behavior: str = system_behavior
+
+ def add_user_message(self, message: str) -> None:
+ """
+ Append a user message to the chat history.
+
+ Parameters:
+ message: The user text to add.
+ """
+ self.__chat_history.append({
+ "role": "user",
+ "content": [{"type": "input_text", "text": message}],
+ })
+
+ def add_assistant_message(self, output_message: Response) -> None:
+ """
+ Append the assistant's output to the chat history.
+
+ Parameters:
+ output_message: The model response to convert and store.
+ """
+ for out in output_message.output:
+ # Convert the Pydantic output model to an input item shape
+ self.__chat_history.append(
+ cast(ResponseInputItemParam, out.model_dump(exclude_unset=True))
+ )
+
+
+class AICore(ABC, Generic[TAiResponse]):
+ """
+ Abstract base class for AI core functionalities.
+ """
+ @property
+ def config(self) -> AIBrochureConfig:
+ """
+ Return the stored AIBrochureConfig for this instance.
+
+ Returns:
+ AIBrochureConfig: The current configuration used by this object.
+
+ Notes:
+ - This accessor returns the internal configuration reference. Mutating the returned
+ object may affect the internal state of this instance.
+ - To change the configuration, use the appropriate setter or factory method rather
+ than modifying the returned value in-place.
+ """
+ return self.__config
+
+ @config.setter
+ def config(self, config: AIBrochureConfig | None) -> None:
+ """
+ Set the instance configuration for the AI brochure generator.
+
+ Parameters
+ ----------
+ config : AIBrochureConfig | None
+ The configuration to assign to the instance. If None, the instance's
+ configuration will be reset to a newly created default AIBrochureConfig.
+
+ Returns
+ -------
+ None
+
+ Notes
+ -----
+ This method stores the provided configuration on a private attribute
+ """
+ if config is None:
+ self.__config = AIBrochureConfig()
+ else:
+ self.__config = config
+
+ @property
+ def _ai_api(self) -> openai.OpenAI:
+ """
+ Return the cached OpenAI API client, initializing it on first access.
+
+ This private helper lazily constructs and caches an openai.OpenAI client using
+ the API key found on self.config.openai_api_key. On the first call, if the
+ client has not yet been created, the method verifies that self.config is set,
+ creates the client with openai.OpenAI(api_key=...), stores it on
+ self.__ai_api, and returns it. Subsequent calls return the same cached
+ instance.
+
+ Returns:
+ openai.OpenAI: A configured OpenAI API client.
+
+ Raises:
+ ValueError: If self.config is None when attempting to initialize the client.
+
+ Notes:
+ - The method mutates self.__ai_api as a side effect (caching).
+ - The caller should treat this as a private implementation detail.
+ - Thread safety is not guaranteed; concurrent initialization may result in
+ multiple client instances if invoked from multiple threads simultaneously.
+ """
+ if self.__ai_api is None:
+ if self.config is None:
+ raise ValueError("Configuration must be set before accessing AI API")
+ self.__ai_api = openai.OpenAI(api_key=self.config.openai_api_key)
+ return self.__ai_api
+
+ @property
+ def history_manager(self) -> HistoryManager:
+ """
+ Return the history manager for this AI core instance.
+
+ This property provides access to the HistoryManager that tracks the chat
+ history and system behavior.
+
+ Returns:
+ HistoryManager: The current history manager. This property always returns
+ a HistoryManager instance and never None.
+ """
+ return self.__history_manager
+
+ def __init__(self, config: AIBrochureConfig, system_behavior: str) -> None:
+ """
+ Initializes the AI core with the provided configuration.
+
+ Parameters:
+ config (AIBrochureConfig): The configuration object for the AI core.
+ system_behavior (str): The behavior of the system.
+ """
+ # Initialize all instance-level attributes here
+ self.__config: AIBrochureConfig = config
+ self.__history_manager: HistoryManager = HistoryManager(system_behavior)
+ self.__ai_api: openai.OpenAI | None = None
+
+ if __debug__:
+ # Sanity check: confirm attributes are initialized
+ assert hasattr(self, "_AICore__config")
+ assert hasattr(self, "_AICore__history_manager")
+ assert hasattr(self, "_AICore__ai_api")
+
+ @abstractmethod
+ def ask(self, question: str) -> TAiResponse:
+ """
+ Ask a question to the AI model.
+
+ Parameters:
+ question: The question to ask.
+
+ Returns:
+ TAiResponse: The model's response type defined by the subclass.
+ """
+ pass
\ No newline at end of file
diff --git a/week1/community-contributions/ai-powered-marketing-brochures-gpt-5/extractor_of_relevant_links.py b/week1/community-contributions/ai-powered-marketing-brochures-gpt-5/extractor_of_relevant_links.py
new file mode 100644
index 0000000..e94fa38
--- /dev/null
+++ b/week1/community-contributions/ai-powered-marketing-brochures-gpt-5/extractor_of_relevant_links.py
@@ -0,0 +1,91 @@
+from ai_brochure_config import AIBrochureConfig
+from website import Website
+from ai_core import AICore
+from openai.types.responses import Response
+from json import loads
+
+RelevantLinksDict = dict[str, list[dict[str, str]]]
+
+class ExtractorOfRelevantLinks(AICore[RelevantLinksDict]):
+ """
+ Extractor for relevant links from a website.
+ """
+
+ @property
+ def website(self) -> Website:
+ """Return the root Website whose links are being analyzed."""
+ return self.__website
+
+ def __init__(self, config: AIBrochureConfig, website: Website) -> None:
+ """
+ Initialize the extractor with configuration and target website.
+
+ Parameters:
+ config: AI and runtime configuration.
+ website: The Website from which links were collected.
+ """
+ system_behavior: str = ("You are an expert in creation of online advertisement materials."
+ "You are going to be provided with a list of links found on a website."
+ "You are able to decide which of the links would be most relevant to include in a brochure about the company,"
+ "such as links to an About page or a Company page or Careers/Jobs pages.\n"
+ "You should respond in JSON as in this example:")
+ system_behavior += """
+ {
+ "links": [
+ {"type": "about page", "url": "https://www.example.com/about"},
+ {"type": "company page", "url": "https://www.another_example.net/company"},
+ {"type": "careers page", "url": "https://ex.one_more_example.org/careers"}
+ ]
+ }
+ """
+ super().__init__(config, system_behavior)
+ self.__website: Website = website
+
+ def get_links_user_prompt(self) -> str:
+ """
+ Build a user prompt listing discovered links and instructions for relevance filtering.
+
+ Returns:
+ A string to send to the model listing links and guidance.
+ """
+ starter_part: str = (f"Here is a list of links found on the website of {self.website.website_url} - "
+ "please decide which of these links are relevant web links for a brochure about company."
+ "Respond with full HTTPS URLs. Avoid including Terms of Service, Privacy, email links.\n"
+ "Links (some might be relative links):\n")
+
+ links_part: str = "\n".join(f"- {link}" for link in self.website.links_on_page) if self.website.links_on_page else "No links found."
+
+ return starter_part + links_part
+
+ def extract_relevant_links(self) -> RelevantLinksDict:
+ """
+ Request the model to select relevant links for brochure creation.
+
+ Returns:
+ A dictionary with a 'links' array containing objects with 'type' and 'url'.
+ """
+ user_prompt = self.get_links_user_prompt()
+ response = self.ask(user_prompt)
+ return response
+
+ def ask(self, question: str) -> RelevantLinksDict:
+ """
+ Send a question to the model and parse the JSON response.
+
+ Parameters:
+ question: The prompt to submit.
+
+ Returns:
+ RelevantLinksDict: Parsed JSON containing selected links.
+ """
+ self.history_manager.add_user_message(question)
+
+ response: Response = self._ai_api.responses.create(
+ model=self.config.model_name,
+ instructions=self.history_manager.system_behavior,
+ reasoning={ "effort": "low" },
+ input=self.history_manager.chat_history
+ )
+
+ self.history_manager.add_assistant_message(response)
+ return loads(response.output_text)
\ No newline at end of file
diff --git a/week1/community-contributions/ai-powered-marketing-brochures-gpt-5/requirements.txt b/week1/community-contributions/ai-powered-marketing-brochures-gpt-5/requirements.txt
new file mode 100644
index 0000000..9747210
--- /dev/null
+++ b/week1/community-contributions/ai-powered-marketing-brochures-gpt-5/requirements.txt
@@ -0,0 +1,5 @@
+python-dotenv
+openai
+bs4
+requests
+rich
\ No newline at end of file
diff --git a/week1/community-contributions/ai-powered-marketing-brochures-gpt-5/website.py b/week1/community-contributions/ai-powered-marketing-brochures-gpt-5/website.py
new file mode 100644
index 0000000..ac9bb9d
--- /dev/null
+++ b/week1/community-contributions/ai-powered-marketing-brochures-gpt-5/website.py
@@ -0,0 +1,286 @@
+from ipaddress import ip_address, IPv4Address, IPv6Address
+from urllib.parse import ParseResult, urlparse
+from bs4 import BeautifulSoup, Tag
+from requests import get, RequestException, Session
+
+class Extractor:
+ """
+ Extracts and processes content from HTML response text using BeautifulSoup.
+ """
+ __soup: BeautifulSoup
+
+ __extracted_title: str = ""
+ @property
+ def extracted_title(self) -> str:
+ """
+ Returns the extracted title from the HTML content.
+ """
+ if not self.__extracted_title:
+ self.__extracted_title = self.get_title()
+ return self.__extracted_title
+
+ __extracted_text: str = ""
+ @property
+ def extracted_text(self) -> str:
+ """
+ Returns the extracted main text content from the HTML, excluding irrelevant tags.
+ """
+ if not self.__extracted_text:
+ self.__extracted_text = self.get_text()
+ return self.__extracted_text
+
+ __extracted_links_on_page: list[str] | None = None
+ @property
+ def extracted_links_on_page(self) -> list[str]:
+ """
+ Return all href values found on the page.
+
+ Notes:
+ - Only anchor tags with an href are included.
+ - Values are returned as-is (may be relative or absolute).
+ """
+ if self.__extracted_links_on_page is None:
+ self.__extracted_links_on_page = [str(a.get("href")) for a in self._soup.find_all('a', href=True) if isinstance(a, Tag)]
+ return self.__extracted_links_on_page
+
+ @property
+ def _soup(self) -> BeautifulSoup:
+ """
+ Returns the BeautifulSoup object for the HTML content.
+ """
+ return self.__soup
+
+ def __init__(self, response_text_content: str) -> None:
+ """
+ Initializes the Extractor with HTML response text.
+
+ Parameters:
+ response_text_content (str): The HTML response text to be processed.
+ """
+ self.__soup = BeautifulSoup(response_text_content, "html.parser")
+ self.__extracted_links_on_page = None
+
+ def get_title(self) -> str:
+ """
+ Extracts the title from the HTML content.
+ """
+ return self._soup.title.get_text() if self._soup.title is not None else "No title"
+
+ def get_text(self) -> str:
+ """
+ Extracts and cleans the main text content from the HTML, removing irrelevant tags.
+ """
+ for irrelevant in self._soup.find_all(["script", "style", "img", "figure", "video", "audio", "button", "svg", "canvas", "input", "form", "meta"]):
+ irrelevant.decompose()
+ raw_text: str = self._soup.get_text(separator="\n")
+ cleaned_text: str = " ".join(raw_text.split())
+ return cleaned_text if cleaned_text else "No content"
+
+class Website:
+ """
+ A class to represent a website.
+ """
+
+ __DEFAULT_ALLOWED_DOMAINS: list[str] = [".com", ".org", ".net"]
+
+ __title: str = ""
+ __website_url: str = ""
+ __text: str = ""
+ __allowed_domains: list[str] = []
+ __links_on_page: list[str] | None = None
+
+ @property
+ def title(self) -> str:
+ """
+ Returns the title of the website.
+ """
+ return self.__title
+
+ @property
+ def text(self) -> str:
+ """
+ Returns the main text content of the website.
+ """
+ return self.__text
+
+ @property
+ def website_url(self) -> str:
+ """
+ Returns the URL of the website.
+ """
+ return self.__website_url
+
+ @property
+ def links_on_page(self) -> list[str] | None:
+ """
+ Returns the list of links extracted from the website.
+ """
+ return self.__links_on_page
+
+ @property
+ def _allowed_domains(self) -> list[str]:
+ """
+ Returns the list of allowed domain suffixes.
+ """
+ return self.__allowed_domains
+
+ @_allowed_domains.setter
+ def _allowed_domains(self, value: list[str] | str) -> None:
+ """
+ Sets the list of allowed domain suffixes.
+ Filters out empty strings and ensures each suffix starts with a dot.
+ """
+ if isinstance(value, str):
+ value = [
+ item.strip() if item.strip().startswith(".") else f".{item.strip()}"
+ for item in value.split(",")
+ if item.strip()
+ ]
+ else:
+ value = [
+ item if item.startswith(".") else f".{item}"
+ for item in value
+ if item
+ ]
+ self.__allowed_domains = value
+
+ def _set_website_url(self, value: str) -> None:
+ """
+ Protected: set the website URL after validating and fetch website data.
+ Use this from inside the class to initialize or change the URL.
+ """
+ if not value:
+ raise ValueError("Website URL must be provided")
+
+ parsed_url: ParseResult = urlparse(value)
+
+ self._validate(parsed_url)
+
+ self.__website_url = value
+ self.__fetch_website_data()
+
+ @property
+ def fetch_failed(self) -> bool:
+ """
+ Returns whether the website data fetch failed.
+ """
+ return self.__fetch_failed
+
+ def _validate(self, parsed_url: ParseResult) -> None:
+ """
+ Validate the parsed URL.
+
+ Parameters:
+ parsed_url: The parsed URL to validate.
+
+ Raises:
+ ValueError: If the URL is missing parts, uses an invalid scheme,
+ points to a local/private address, or is not in allowed domains.
+ """
+ if not parsed_url.netloc or parsed_url.scheme not in ("http", "https"):
+ raise ValueError("Website URL must be a valid URL")
+
+ if not parsed_url.hostname:
+ raise ValueError("Website URL must contain a valid hostname")
+
+ if self.__is_local_address(parsed_url.hostname):
+ raise ValueError("Website URL must not be a local address")
+
+ if not self.__is_allowed_domain(parsed_url.hostname):
+ raise ValueError("Website URL must be an allowed domain")
+
+ def __is_local_address(self, hostname: str) -> bool:
+ """
+ Check if the given hostname is a local address.
+
+ Parameters:
+ hostname (str): The hostname to check.
+
+ Returns:
+ bool: True if the hostname is a local address, False otherwise.
+ """
+ if hostname in ("localhost", "127.0.0.1", "::1"):
+ return True
+
+ try:
+ ip: IPv4Address | IPv6Address = ip_address(hostname)
+ if ip.is_loopback or ip.is_private or ip.is_link_local or ip.is_reserved:
+ return True
+ except ValueError:
+ return False
+
+ return False
+
+ def __is_allowed_domain(self, hostname: str) -> bool:
+ """
+ Check if the given hostname is an allowed domain.
+
+ Parameters:
+ hostname (str): The hostname to check.
+
+ Returns:
+ bool: True if the hostname is an allowed domain, False otherwise.
+ """
+ allowed_domains = [".com", ".org", ".net", ".io"]
+ return any(hostname.endswith(domain) for domain in allowed_domains)
+
+ def __fetch_website_data(self) -> None:
+ """
+ Fetch website content and populate title, text, and links.
+
+ Side effects:
+ - Sets internal state: __title, __text, __links_on_page, __fetch_failed.
+ - Performs an HTTP GET with a browser-like User-Agent.
+ """
+ try:
+ get_fn = self.__session.get if self.__session else get
+ response = get_fn(
+ self.website_url,
+ timeout=10,
+ verify=True,
+ headers={"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36"}
+ )
+ except RequestException as e:
+ self.__title = "Error"
+ self.__text = str(e)
+ self.__fetch_failed = True
+ return
+
+ if response.ok:
+ extractor: Extractor = Extractor(response.text)
+ self.__title = extractor.extracted_title
+ self.__text = extractor.extracted_text
+ self.__links_on_page = extractor.extracted_links_on_page
+ else:
+ if response.status_code == 404:
+ self.__title = "Not Found"
+ self.__text = "The requested page was not found (404)."
+ else:
+ self.__title = "Error"
+ self.__text = f"Error: {response.status_code} - {response.reason}"
+ self.__fetch_failed = True
+
+ def __init__(self, website_url: str, allowed_domains: list[str] | str | None = None, session: Session | None = None) -> None:
+ """
+ Initializes the Website object and fetches its data.
+
+ Parameters:
+ website_url (str): The URL of the website to fetch.
+ allowed_domains (list[str] | str, optional): A list of allowed domain suffixes.
+ If a string is provided, it should be a comma-separated list of domain suffixes (e.g., ".com,.org,.net").
+ session (requests.Session | None, optional): Reused HTTP session for connection pooling.
+ """
+ self.__fetch_failed: bool = False
+ self.__session: Session | None = session
+ if allowed_domains is None:
+ self._allowed_domains = self.__DEFAULT_ALLOWED_DOMAINS.copy()
+ else:
+ self._allowed_domains = allowed_domains
+ # Use protected setter internally so the public API exposes only the getter.
+ self._set_website_url(website_url)
+
+ def __str__(self) -> str:
+ """
+ Returns a string representation of the Website object.
+ """
+ return f"Website(title={self.title}, url={self.website_url})"
\ No newline at end of file
diff --git a/week1/community-contributions/brochure-builder-with-multishot-prompting.ipynb b/week1/community-contributions/brochure-builder-with-multishot-prompting.ipynb
new file mode 100644
index 0000000..3427a82
--- /dev/null
+++ b/week1/community-contributions/brochure-builder-with-multishot-prompting.ipynb
@@ -0,0 +1,402 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "9905f163-759f-474b-8f7a-7d14da0df44d",
+ "metadata": {},
+ "source": [
+ "### BUSINESS CHALLENGE: Using Multi-shot Prompting\n",
+ "#### Day 5\n",
+ "\n",
+ "Create a product that builds a Brochure for a company to be used for prospective clients, investors and potential recruits.\n",
+ "\n",
+ "We will be provided a company name and their primary website."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "a0895f24-65ff-4624-8ae0-15d2d400d8f0",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# imports\n",
+ "# If these fail, please check you're running from an 'activated' environment with (llms) in the command prompt\n",
+ "\n",
+ "import os\n",
+ "import requests\n",
+ "import json\n",
+ "from typing import List\n",
+ "from dotenv import load_dotenv\n",
+ "from bs4 import BeautifulSoup\n",
+ "from IPython.display import Markdown, display, update_display\n",
+ "from openai import OpenAI"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "7794aa70-5962-4669-b86f-b53639f4f9ea",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Initialize and constants\n",
+ "\n",
+ "load_dotenv(override=True)\n",
+ "api_key = os.getenv('OPENAI_API_KEY')\n",
+ "\n",
+ "if api_key and api_key.startswith('sk-proj-') and len(api_key)>10:\n",
+ " print(\"API key looks good so far\")\n",
+ "else:\n",
+ " print(\"There might be a problem with your API key? Please visit the troubleshooting notebook!\")\n",
+ " \n",
+ "MODEL = 'gpt-4o-mini'\n",
+ "openai = OpenAI()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "63bf8631-2746-4255-bec1-522855d3e812",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# A class to represent a Webpage\n",
+ "\n",
+ "# Some websites need you to use proper headers when fetching them:\n",
+ "headers = {\n",
+ " \"User-Agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36\"\n",
+ "}\n",
+ "\n",
+ "class Website:\n",
+ " \"\"\"\n",
+ " A utility class to represent a Website that we have scraped, now with links\n",
+ " \"\"\"\n",
+ "\n",
+ " def __init__(self, url):\n",
+ " self.url = url\n",
+ " response = requests.get(url, headers=headers)\n",
+ " self.body = response.content\n",
+ " soup = BeautifulSoup(self.body, 'html.parser')\n",
+ " self.title = soup.title.string if soup.title else \"No title found\"\n",
+ " if soup.body:\n",
+ " for irrelevant in soup.body([\"script\", \"style\", \"img\", \"input\"]):\n",
+ " irrelevant.decompose()\n",
+ " self.text = soup.body.get_text(separator=\"\\n\", strip=True)\n",
+ " else:\n",
+ " self.text = \"\"\n",
+ " links = [link.get('href') for link in soup.find_all('a')]\n",
+ " self.links = [link for link in links if link]\n",
+ "\n",
+ " def get_contents(self):\n",
+ " return f\"Webpage Title:\\n{self.title}\\nWebpage Contents:\\n{self.text}\\n\\n\""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "1e7bb527-e769-4245-bb91-ae65e64593ff",
+ "metadata": {},
+ "source": [
+ "## First step: Have GPT-4o-mini figure out which links are relevant\n",
+ "\n",
+ "### Use a call to gpt-4o-mini to read the links on a webpage, and respond in structured JSON. "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "1ce303ae-b967-4261-aadc-02dafa54db4a",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "link_system_prompt = \"You are provided with a list of links found on a webpage. \\\n",
+ "You are able to decide which of the links would be most relevant to include in a brochure about the company, \\\n",
+ "such as links to an About page, or a Company page, or Careers/Jobs pages.\\n\"\n",
+ "link_system_prompt += \"You should respond in JSON as in this example:\"\n",
+ "link_system_prompt += \"\"\"\n",
+ "{\n",
+ " \"links\": [\n",
+ " {\"type\": \"about page\", \"url\": \"https://full.url/goes/here/about\"},\n",
+ " {\"type\": \"careers page\", \"url\": \"https://another.full.url/careers\"}\n",
+ " ]\n",
+ "}\n",
+ "\"\"\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "d24a4c0c-a1d1-4897-b2a7-4128d25c2e08",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def get_links_user_prompt(website):\n",
+ " user_prompt = f\"Here is the list of links on the website of {website.url} - \"\n",
+ " user_prompt += \"please decide which of these are relevant web links for a brochure about the company, respond with the full https URL in JSON format. \\\n",
+ "Do not include Terms of Service, Privacy, email links.\\n\"\n",
+ " user_prompt += \"Links (some might be relative links):\\n\"\n",
+ " user_prompt += \"\\n\".join(website.links)\n",
+ " return user_prompt"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "8103fc11-5bc0-41c4-8c97-502c9e96429c",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def get_links(url): # 1st inference\n",
+ " website = Website(url)\n",
+ " response = openai.chat.completions.create(\n",
+ " model=MODEL,\n",
+ " messages=[\n",
+ " {\"role\": \"system\", \"content\": link_system_prompt},\n",
+ " {\"role\": \"user\", \"content\": get_links_user_prompt(website)}\n",
+ " ],\n",
+ " response_format={\"type\": \"json_object\"}\n",
+ " )\n",
+ " result = response.choices[0].message.content\n",
+ " return json.loads(result)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "dc84a695-515d-4292-9a95-818f4fe3d20e",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "huggingface = Website(\"https://huggingface.co\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "91896908-1632-41fc-9b8b-39a7638d8dd1",
+ "metadata": {},
+ "source": [
+ "## Second step: make the brochure!\n",
+ "\n",
+ "Assemble all the details into another prompt to GPT4-o"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "ab7c54e3-e654-4b1f-8671-09194b628aa0",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def get_all_details(url): # 1st inference wrapper\n",
+ " result = \"Landing page:\\n\"\n",
+ " result += Website(url).get_contents()\n",
+ " links = get_links(url) # inference\n",
+ " # print(\"Found links:\", links)\n",
+ " for link in links[\"links\"]:\n",
+ " result += f\"\\n\\n{link['type']}\\n\"\n",
+ " result += Website(link[\"url\"]).get_contents()\n",
+ " return result"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "ea9f54d1-a248-4c56-a1de-6633193de5bf",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "system_prompt = \"You are an assistant that analyzes the contents of several relevant pages from a company website \\\n",
+ "and creates a short humorous, entertaining, jokey brochure about the company for prospective customers, investors and recruits. Respond in markdown.\\\n",
+ "Include details of company culture, customers and careers/jobs if you have the information.\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "13412c85-badd-4d79-a5ac-8283e4bb832f",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def get_brochure_user_prompt(company_name, url):\n",
+ " user_prompt = f\"You are looking at a company called: {company_name}\\n\"\n",
+ " user_prompt += f\"Here are the contents of its landing page and other relevant pages; use this information to build a short brochure of the company in markdown.\\n\"\n",
+ " user_prompt += get_all_details(url) # inference wrapper\n",
+ " user_prompt = user_prompt[:5_000] # Truncate if more than 5,000 characters\n",
+ " return user_prompt"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "addc0047-ea73-4748-abc3-747ff343c134",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def create_brochure(company_name, url): # 2nd inference\n",
+ " response = openai.chat.completions.create(\n",
+ " model=MODEL,\n",
+ " messages=[\n",
+ " {\"role\": \"system\", \"content\": system_prompt},\n",
+ " {\"role\": \"user\", \"content\": get_brochure_user_prompt(company_name, url)}\n",
+ " ],\n",
+ " )\n",
+ " result = response.choices[0].message.content\n",
+ " return result"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "82a3b61a-da26-4265-840a-0a93f81cd048",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "brochure_english = create_brochure(\"HuggingFace\", \"https://huggingface.co\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "5d165e3f-8fe2-4712-b098-d34d9fabe583",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "display(Markdown(brochure_english))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "107a2100-3f7d-4f16-8ba7-b5da602393c6",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def stream_brochure(company_name, url):\n",
+ " stream = openai.chat.completions.create(\n",
+ " model=MODEL,\n",
+ " messages=[\n",
+ " {\"role\": \"system\", \"content\": system_prompt},\n",
+ " {\"role\": \"user\", \"content\": get_brochure_user_prompt(company_name, url)}\n",
+ " ],\n",
+ " stream=True\n",
+ " )\n",
+ " \n",
+ " response = \"\"\n",
+ " display_handle = display(Markdown(\"\"), display_id=True)\n",
+ " for chunk in stream:\n",
+ " response += chunk.choices[0].delta.content or ''\n",
+ " response = response.replace(\"```\",\"\").replace(\"markdown\", \"\")\n",
+ " update_display(Markdown(response), display_id=display_handle.display_id)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "26cbe9b5-3603-49a1-a676-75c7ddaacdb8",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "stream_brochure(\"HuggingFace\", \"https://huggingface.co\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "c10d8189-7f79-4991-abc4-0764369b7d64",
+ "metadata": {},
+ "source": [
+ "### Third step: Translate the entire brochure to Spanish"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "666817eb-1e8b-4fee-bbab-c0dbfe2ea7c0",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "system_prompt = \"You are an assistant that analyzes the contents of a brochure \\\n",
+ "and translates to Spanish. Respond in markdown.\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "c48adb12-bc3c-48f9-ab38-b7ca895195f6",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def translate_user_prompt(company_name, url):\n",
+ " user_prompt = f\"Please translate the following brochure content to Spanish\\n\"\n",
+ " user_prompt += create_brochure(company_name, url) # inference wrapper\n",
+ " # user_prompt = user_prompt[:5_000] # Truncate if more than 5,000 characters\n",
+ " return user_prompt"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "b92b61ac-3be3-4e84-9000-ec8233697b81",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "translate_user_prompt(\"HuggingFace\", \"https://huggingface.co\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "6bfd04f4-4381-4730-ac5d-c9fa02f906df",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def translate_brochure(): # 3rd inference\n",
+ " stream = openai.chat.completions.create(\n",
+ " model=MODEL,\n",
+ " messages=[\n",
+ " {\"role\": \"system\", \"content\": system_prompt},\n",
+ " {\"role\": \"user\", \"content\": translate_user_prompt(\"HuggingFace\", \"https://huggingface.co\")}\n",
+ " ],\n",
+ " stream=True\n",
+ " )\n",
+ " \n",
+ " response = \"\"\n",
+ " display_handle = display(Markdown(\"\"), display_id=True)\n",
+ " for chunk in stream:\n",
+ " response += chunk.choices[0].delta.content or ''\n",
+ " response = response.replace(\"```\",\"\").replace(\"markdown\", \"\")\n",
+ " update_display(Markdown(response), display_id=display_handle.display_id)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "bb78ed28-9ecd-4c08-ae96-d7473cbc97dd",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "translate_brochure()"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.13"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/week1/community-contributions/day-1-bank-account-summarization.ipynb b/week1/community-contributions/day-1-bank-account-summarization.ipynb
new file mode 100644
index 0000000..bae0cfe
--- /dev/null
+++ b/week1/community-contributions/day-1-bank-account-summarization.ipynb
@@ -0,0 +1,270 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "f60dab2a-a377-4761-8be3-69a3b8124ca6",
+ "metadata": {
+ "editable": true,
+ "slideshow": {
+ "slide_type": ""
+ },
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "import pdfplumber\n",
+ "import re\n",
+ "import json\n",
+ "\n",
+ "def parse_transaction_line(line):\n",
+ " # More specific pattern that captures each component'\n",
+ " pattern = r'^(\\d{2}/\\d{2})\\s+(.+?)\\s+(-?[\\d,]+\\.\\d{2})\\s+(-?[\\d,]+\\.\\d{2})$'\n",
+ " match = re.match(pattern, line.strip())\n",
+ " \n",
+ " if match:\n",
+ " date, description, amount, balance = match.groups()\n",
+ " return {\n",
+ " 'date': date,\n",
+ " 'description': description.strip(),\n",
+ " 'amount': amount,\n",
+ " 'balance': balance\n",
+ " }\n",
+ " return None\n",
+ "\n",
+ "def parse_Credit_Card_transaction_line(line):\n",
+ " # More specific pattern that captures each component'\n",
+ " pattern = r'^(\\d{2}/\\d{2})\\s+(.+?)\\s+(-?[\\d,]+\\.\\d{2})$'\n",
+ " match = re.match(pattern, line.strip())\n",
+ " \n",
+ " if match:\n",
+ " date, description, amount = match.groups()\n",
+ " return {\n",
+ " 'date': date,\n",
+ " 'description': description.strip(),\n",
+ " 'amount': amount\n",
+ " }\n",
+ " return None\n",
+ "\n",
+ "# \n",
+ "def extract_transactions_CA_from_pdf(pdf_path):\n",
+ " transactions = []\n",
+ " \n",
+ " with pdfplumber.open(pdf_path) as pdf:\n",
+ " for page in pdf.pages:\n",
+ " text = page.extract_text()\n",
+ " for line in text.split(\"\\n\"):\n",
+ " parsed = parse_transaction_line(line)\n",
+ " if parsed:\n",
+ " transactions.append(parsed)\n",
+ " return transactions\n",
+ "\n",
+ "def extract_transactions_CreditCard_from_pdf(pdf_path):\n",
+ " transactions = []\n",
+ " \n",
+ " with pdfplumber.open(pdf_path) as pdf:\n",
+ " for page in pdf.pages:\n",
+ " text = page.extract_text()\n",
+ " for line in text.split(\"\\n\"):\n",
+ " parsed = parse_Credit_Card_transaction_line(line)\n",
+ " if parsed:\n",
+ " transactions.append(parsed)\n",
+ " return transactions\n",
+ "# print(transactions, len(transactions)) # check first 10 extracted lines\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "82c34eac-fc30-41d6-8325-77efc48d0dd8",
+ "metadata": {
+ "editable": true,
+ "slideshow": {
+ "slide_type": ""
+ },
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "# Load environment variables in a file called .env\n",
+ "from dotenv import load_dotenv\n",
+ "from openai import OpenAI\n",
+ "import os\n",
+ "\n",
+ "load_dotenv(override=True)\n",
+ "api_key = os.getenv('OPENAI_API_KEY')\n",
+ "\n",
+ "# Check the key\n",
+ "\n",
+ "if not api_key:\n",
+ " print(\"No API key was found - please head over to the troubleshooting notebook in this folder to identify & fix!\")\n",
+ "elif not api_key.startswith(\"sk-proj-\"):\n",
+ " print(\"An API key was found, but it doesn't start sk-proj-; please check you're using the right key - see troubleshooting notebook\")\n",
+ "elif api_key.strip() != api_key:\n",
+ " print(\"An API key was found, but it looks like it might have space or tab characters at the start or end - please remove them - see troubleshooting notebook\")\n",
+ "else:\n",
+ " print(\"API key found and looks good so far!\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 49,
+ "id": "769ee512-75f5-480a-9407-f9c4cd46b679",
+ "metadata": {
+ "editable": true,
+ "slideshow": {
+ "slide_type": ""
+ },
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "import pandas as pd\n",
+ "\n",
+ "# ---------- STEP 3: Build prompts ----------\n",
+ "\n",
+ "def build_prompts(transactions):\n",
+ " system_prompt = \"\"\"\n",
+ "You are a personal financial assistant.\n",
+ "Your job is to analyze bank transactions, categorize each expense into categories such as:\n",
+ "Food, Clothing, Rent, Utilities, Entertainment, Travel, Health, Miscellaneous, and Others.\n",
+ "\n",
+ "Your responsibilities:\n",
+ "\n",
+ "Categorize all transactions and compute total spending per category.\n",
+ "\n",
+ "Identify the top 5 categories by total spending.\n",
+ "\n",
+ "Detect high-frequency purchases, even if individual amounts are small (e.g., $4 coffee bought 40 times).\n",
+ "\n",
+ "For these, group transactions by merchant/description and count frequency.\n",
+ "\n",
+ "Highlight the top 5 frequent purchases, with both frequency and total spend.\n",
+ "\n",
+ "Provide a practical summary of spending habits, covering both biggest expenses and frequent small purchases.\n",
+ "\n",
+ "Suggest 2โ3 actionable recommendations to reduce spending, targeting both:\n",
+ "\n",
+ "Big categories (e.g., Rent, Travel, Entertainment).\n",
+ "\n",
+ "Small but frequent โhabit expensesโ (e.g., coffee, fast food, subscriptions).\n",
+ "\n",
+ "The output should be a valid JSON object with this structure:\n",
+ "{\n",
+ " \"summary\": {\n",
+ " \"Food\": ,\n",
+ " \"Clothing\": ,\n",
+ " \"Rent\": ,\n",
+ " \"Utilities\": ,\n",
+ " \"Entertainment\": ,\n",
+ " \"Travel\": ,\n",
+ " \"Health\": ,\n",
+ " \"Miscellaneous\": ,\n",
+ " \"Others\": \n",
+ " },\n",
+ " \"total_expenses\": ,\n",
+ " \"top_5_categories\": [ {\"category\": , \"amount\": } ],\n",
+ " \"top_5_frequent_purchases\": [ {\"item\": , \"count\": , \"total\": } ],\n",
+ " \"insights\": \"\",\n",
+ " \"recommendations\": [ \"\", \"\", \"\" ]\n",
+ "}\n",
+ "\n",
+ "\"\"\"\n",
+ "\n",
+ " user_prompt = \"Here are my bank account transactions for the past few months:\\n\\n\"\n",
+ " for txn in transactions:\n",
+ " user_prompt += f\"- Date: {txn['date']}, Description: {txn['description']}, Amount: {txn['amount']}\\n\"\n",
+ "\n",
+ " user_prompt += \"\"\"\n",
+ "Please analyze these transactions according to the instructions in the system prompt.\n",
+ "\"\"\"\n",
+ "\n",
+ " return system_prompt, user_prompt\n",
+ "\n",
+ "\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "307ca02b-2df6-4996-85e7-d073f74592f5",
+ "metadata": {
+ "editable": true,
+ "slideshow": {
+ "slide_type": ""
+ },
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "# ---------- STEP 4: Call OpenAI ----------\n",
+ "def analyze_transactions(pdf_path):\n",
+ " transactions = extract_transactions_CreditCard_from_pdf(pdf_path)\n",
+ " system_prompt, user_prompt = build_prompts(transactions)\n",
+ "\n",
+ " client = OpenAI() # assumes OPENAI_API_KEY is set in env\n",
+ "\n",
+ " response = client.chat.completions.create(\n",
+ " model = \"gpt-4o-mini\",\n",
+ " messages=[\n",
+ " {\"role\": \"system\", \"content\": system_prompt},\n",
+ " {\"role\": \"user\", \"content\": user_prompt}\n",
+ " ],\n",
+ " response_format={\"type\": \"json_object\"} # ensures valid JSON\n",
+ " )\n",
+ "\n",
+ " result = response.choices[0].message.content\n",
+ " return json.loads(result)\n",
+ "\n",
+ "# ---------- MAIN ----------\n",
+ "if __name__ == \"__main__\":\n",
+ " cc_pdf_file = \"cc_statement.pdf\"\n",
+ " # To Debug in case of failures\n",
+ " # transactions = extract_transactions_from_pdf(pdf_file)\n",
+ " # print(cc_transactions,len(cc_transactions))\n",
+ " # system_prompt, user_prompt = build_prompts(cc_transactions)\n",
+ " # print(system_prompt, user_prompt)\n",
+ "\n",
+ " # Analyse the function to create a smart alert\n",
+ " cc_transactions = extract_transactions_CreditCard_from_pdf(cc_pdf_file)\n",
+ " analysis = analyze_transactions(cc_pdf_file)\n",
+ " print(\"=========================================\")\n",
+ " print(\"=== Top 5 Spending Habits & Insights ====\")\n",
+ " print(\"=========================================\")\n",
+ " print(json.dumps(analysis, indent=2))\n",
+ " print(\"=========================================\")\n",
+ " print(\"=========================================\")\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "831922f4-5efd-4cba-9975-54767b65f6d6",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.9.6"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/week1/community-contributions/day-1-thesis_pdf_summarizer.ipynb b/week1/community-contributions/day-1-thesis_pdf_summarizer.ipynb
new file mode 100644
index 0000000..e18c68f
--- /dev/null
+++ b/week1/community-contributions/day-1-thesis_pdf_summarizer.ipynb
@@ -0,0 +1,305 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "id": "581151c0-941e-47b3-a3e0-2da65ba70087",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import os\n",
+ "import requests\n",
+ "from dotenv import load_dotenv\n",
+ "from IPython.display import Markdown, display\n",
+ "from openai import OpenAI"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "47353a41-4b47-499e-9460-fd645345f591",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "API key found and looks good so far\n"
+ ]
+ }
+ ],
+ "source": [
+ "load_dotenv()\n",
+ "api_key = os.getenv('OPENAI_API_KEY')\n",
+ "\n",
+ "if not api_key:\n",
+ " print('No API key was found')\n",
+ "elif not api_key.startswith(\"sk-proj-\"):\n",
+ " print(\"API key is found but is not in the proper format\")\n",
+ "else:\n",
+ " print(\"API key found and looks good so far\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "id": "dbfbb29a-3452-45a0-b9b3-4e329ac776fb",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "openai = OpenAI()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "88ffe256-e46a-45e8-a616-0ac574aa7085",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "system_prompt = \"\"\"You are a research summarizer specialized in wireless communication systems and propagation modeling. Your task is to summarize a research thesis in no more than 1000 words. The summary must be clear, structured, and written in markdown format.\n",
+ "\n",
+ "The summary should include the following sections:\n",
+ "\n",
+ "1. **Title and Authors** โ Provide the full title of the thesis and author name(s).\n",
+ "2. **Objective / Research Problem** โ Clearly state the core research goal or question addressed in the thesis.\n",
+ "3. **Scientific and Regional Background** โ Explain the technical context of radio wave propagation, and why studying it in the Horn of Africa region is important.\n",
+ "4. **Methodology** โ Summarize the modeling techniques, data sources, simulation tools, frequency bands (e.g., microwave, millimeter), and measurement or evaluation methods used.\n",
+ "5. **Key Findings** โ Highlight the quantitative and qualitative results, including differences between precipitation and clear-air conditions, and observed trends across geographic locations.\n",
+ "6. **Conclusion** โ Describe the primary outcomes and how they advance understanding in wireless communications.\n",
+ "7. **Limitations** โ Point out any constraints (e.g., lack of in-situ measurement, simulation assumptions).\n",
+ "8. **Future Work** โ Suggest next steps for improving or extending this research.\n",
+ "9. **Real-World Applications** โ Discuss how the models or findings could improve wireless network planning, 5G deployment, or link budgeting in East Africa and similar regions.\n",
+ "\n",
+ "Use academic language but keep it concise, clear, and structured for a technical reader. Output in markdown format only.\n",
+ "\"\"\"\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "5f3f7b1a-865f-44cc-854d-9e9e7771eb82",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Requirement already satisfied: ipywidgets in c:\\users\\esku4\\anaconda3\\envs\\llms\\lib\\site-packages (8.1.7)\n",
+ "Collecting pdfplumber\n",
+ " Downloading pdfplumber-0.11.7-py3-none-any.whl.metadata (42 kB)\n",
+ "Requirement already satisfied: comm>=0.1.3 in c:\\users\\esku4\\anaconda3\\envs\\llms\\lib\\site-packages (from ipywidgets) (0.2.3)\n",
+ "Requirement already satisfied: ipython>=6.1.0 in c:\\users\\esku4\\anaconda3\\envs\\llms\\lib\\site-packages (from ipywidgets) (9.4.0)\n",
+ "Requirement already satisfied: traitlets>=4.3.1 in c:\\users\\esku4\\anaconda3\\envs\\llms\\lib\\site-packages (from ipywidgets) (5.14.3)\n",
+ "Requirement already satisfied: widgetsnbextension~=4.0.14 in c:\\users\\esku4\\anaconda3\\envs\\llms\\lib\\site-packages (from ipywidgets) (4.0.14)\n",
+ "Requirement already satisfied: jupyterlab_widgets~=3.0.15 in c:\\users\\esku4\\anaconda3\\envs\\llms\\lib\\site-packages (from ipywidgets) (3.0.15)\n",
+ "Collecting pdfminer.six==20250506 (from pdfplumber)\n",
+ " Downloading pdfminer_six-20250506-py3-none-any.whl.metadata (4.2 kB)\n",
+ "Requirement already satisfied: Pillow>=9.1 in c:\\users\\esku4\\anaconda3\\envs\\llms\\lib\\site-packages (from pdfplumber) (11.3.0)\n",
+ "Collecting pypdfium2>=4.18.0 (from pdfplumber)\n",
+ " Downloading pypdfium2-4.30.0-py3-none-win_amd64.whl.metadata (48 kB)\n",
+ "Requirement already satisfied: charset-normalizer>=2.0.0 in c:\\users\\esku4\\anaconda3\\envs\\llms\\lib\\site-packages (from pdfminer.six==20250506->pdfplumber) (3.4.3)\n",
+ "Requirement already satisfied: cryptography>=36.0.0 in c:\\users\\esku4\\anaconda3\\envs\\llms\\lib\\site-packages (from pdfminer.six==20250506->pdfplumber) (45.0.6)\n",
+ "Requirement already satisfied: cffi>=1.14 in c:\\users\\esku4\\anaconda3\\envs\\llms\\lib\\site-packages (from cryptography>=36.0.0->pdfminer.six==20250506->pdfplumber) (1.17.1)\n",
+ "Requirement already satisfied: pycparser in c:\\users\\esku4\\anaconda3\\envs\\llms\\lib\\site-packages (from cffi>=1.14->cryptography>=36.0.0->pdfminer.six==20250506->pdfplumber) (2.22)\n",
+ "Requirement already satisfied: colorama in c:\\users\\esku4\\anaconda3\\envs\\llms\\lib\\site-packages (from ipython>=6.1.0->ipywidgets) (0.4.6)\n",
+ "Requirement already satisfied: decorator in c:\\users\\esku4\\anaconda3\\envs\\llms\\lib\\site-packages (from ipython>=6.1.0->ipywidgets) (5.2.1)\n",
+ "Requirement already satisfied: ipython-pygments-lexers in c:\\users\\esku4\\anaconda3\\envs\\llms\\lib\\site-packages (from ipython>=6.1.0->ipywidgets) (1.1.1)\n",
+ "Requirement already satisfied: jedi>=0.16 in c:\\users\\esku4\\anaconda3\\envs\\llms\\lib\\site-packages (from ipython>=6.1.0->ipywidgets) (0.19.2)\n",
+ "Requirement already satisfied: matplotlib-inline in c:\\users\\esku4\\anaconda3\\envs\\llms\\lib\\site-packages (from ipython>=6.1.0->ipywidgets) (0.1.7)\n",
+ "Requirement already satisfied: prompt_toolkit<3.1.0,>=3.0.41 in c:\\users\\esku4\\anaconda3\\envs\\llms\\lib\\site-packages (from ipython>=6.1.0->ipywidgets) (3.0.51)\n",
+ "Requirement already satisfied: pygments>=2.4.0 in c:\\users\\esku4\\anaconda3\\envs\\llms\\lib\\site-packages (from ipython>=6.1.0->ipywidgets) (2.19.2)\n",
+ "Requirement already satisfied: stack_data in c:\\users\\esku4\\anaconda3\\envs\\llms\\lib\\site-packages (from ipython>=6.1.0->ipywidgets) (0.6.3)\n",
+ "Requirement already satisfied: typing_extensions>=4.6 in c:\\users\\esku4\\anaconda3\\envs\\llms\\lib\\site-packages (from ipython>=6.1.0->ipywidgets) (4.14.1)\n",
+ "Requirement already satisfied: wcwidth in c:\\users\\esku4\\anaconda3\\envs\\llms\\lib\\site-packages (from prompt_toolkit<3.1.0,>=3.0.41->ipython>=6.1.0->ipywidgets) (0.2.13)\n",
+ "Requirement already satisfied: parso<0.9.0,>=0.8.4 in c:\\users\\esku4\\anaconda3\\envs\\llms\\lib\\site-packages (from jedi>=0.16->ipython>=6.1.0->ipywidgets) (0.8.4)\n",
+ "Requirement already satisfied: executing>=1.2.0 in c:\\users\\esku4\\anaconda3\\envs\\llms\\lib\\site-packages (from stack_data->ipython>=6.1.0->ipywidgets) (2.2.0)\n",
+ "Requirement already satisfied: asttokens>=2.1.0 in c:\\users\\esku4\\anaconda3\\envs\\llms\\lib\\site-packages (from stack_data->ipython>=6.1.0->ipywidgets) (3.0.0)\n",
+ "Requirement already satisfied: pure_eval in c:\\users\\esku4\\anaconda3\\envs\\llms\\lib\\site-packages (from stack_data->ipython>=6.1.0->ipywidgets) (0.2.3)\n",
+ "Downloading pdfplumber-0.11.7-py3-none-any.whl (60 kB)\n",
+ "Downloading pdfminer_six-20250506-py3-none-any.whl (5.6 MB)\n",
+ " ---------------------------------------- 0.0/5.6 MB ? eta -:--:--\n",
+ " --------------------------------------- 5.5/5.6 MB 30.7 MB/s eta 0:00:01\n",
+ " ---------------------------------------- 5.6/5.6 MB 22.9 MB/s 0:00:00\n",
+ "Downloading pypdfium2-4.30.0-py3-none-win_amd64.whl (2.9 MB)\n",
+ " ---------------------------------------- 0.0/2.9 MB ? eta -:--:--\n",
+ " ---------------------------------------- 2.9/2.9 MB 28.0 MB/s 0:00:00\n",
+ "Installing collected packages: pypdfium2, pdfminer.six, pdfplumber\n",
+ "\n",
+ " ---------------------------------------- 0/3 [pypdfium2]\n",
+ " ---------------------------------------- 0/3 [pypdfium2]\n",
+ " ------------- -------------------------- 1/3 [pdfminer.six]\n",
+ " ------------- -------------------------- 1/3 [pdfminer.six]\n",
+ " ------------- -------------------------- 1/3 [pdfminer.six]\n",
+ " ------------- -------------------------- 1/3 [pdfminer.six]\n",
+ " ------------- -------------------------- 1/3 [pdfminer.six]\n",
+ " ------------- -------------------------- 1/3 [pdfminer.six]\n",
+ " -------------------------- ------------- 2/3 [pdfplumber]\n",
+ " ---------------------------------------- 3/3 [pdfplumber]\n",
+ "\n",
+ "Successfully installed pdfminer.six-20250506 pdfplumber-0.11.7 pypdfium2-4.30.0\n",
+ "Note: you may need to restart the kernel to use updated packages.\n"
+ ]
+ }
+ ],
+ "source": [
+ "pip install ipywidgets pdfplumber"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "id": "06dcfc1d-b106-4b9a-9346-6dd6af4a4015",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "UNIVERSITY OF KWAZULU-NATAL\n",
+ "Radio Wave Propagation Modeling under\n",
+ "Precipitation and Clear-air at Microwave\n",
+ "and Millimetric Bands over Wireless Links\n",
+ "in the Horn of Africa\n",
+ "Feyisa Debo Diba\n",
+ "February, 2017\n",
+ "Supervisor: Professor Thomas J. Afullo\n",
+ "Co-supervisor: Dr. Akintunde Ayodeji Alonge\n",
+ "Radio Wave Propagation Modeling under\n",
+ "Precipitation and Clear-air at Microwave\n",
+ "and Millimetric Bands over Wireless Links\n",
+ "in the Horn of Africa\n",
+ "Feyisa Debo Diba\n",
+ "In fulfillment of the Degree of Doctor of Philosophy in\n",
+ "Electronic Engineering, College of Agriculture, Engineering\n",
+ "and Science, University of KwaZulu-Natal, Durban\n",
+ "February, 2017\n",
+ "Supervisor:\n",
+ "As the candidateโs Supervisor, I agree/do not agree to the submission of this thesis\n",
+ "Professor T.J. Afullo โโโโโโโโโโโ-\n",
+ "Dateโโโโโโโโโโโโโโโโโ\n",
+ "Co-Supervisor:\n",
+ "Dr. Akintunde Ayodeji Alonge\n",
+ "As the candidateโs Co.Supervisor, I agree to the submission of this thesis\n",
+ "Dr. A. A. Alonge โโโโโโโโโโโ-\n",
+ "Dateโโโโโโโโโโโโโโโโโ\n",
+ "ii\n",
+ "DECLARATION 1 - PLAGIARISM\n",
+ "I, Feyisa Debo Diba\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Cell 3: Download and extract from PDF URL\n",
+ "pdf_url = (\n",
+ " \"https://researchspace.ukzn.ac.za/server/api/core/bitstreams/\"\n",
+ " \"29218203-bfc8-4fcb-bc63-9afba3341910/content\"\n",
+ ")\n",
+ "\n",
+ "response = requests.get(pdf_url)\n",
+ "if response.status_code != 200:\n",
+ " raise Exception(f\"Failed to download PDF (Status code: {response.status_code})\")\n",
+ "\n",
+ "with pdfplumber.open(BytesIO(response.content)) as pdf:\n",
+ " thesis_text = \"\\n\".join(page.extract_text() for page in pdf.pages if page.extract_text())\n",
+ "\n",
+ "# Optional Preview\n",
+ "print(thesis_text[:1000])\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "id": "84c544db-64a0-4181-beb0-1cc72bc88466",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/markdown": [
+ "# Summary of the Research Thesis\n",
+ "\n",
+ "## 1. Title and Authors\n",
+ "**Title:** Radio Wave Propagation Modeling under Precipitation and Clear-air at Microwave and Millimetric Bands over Wireless Links in the Horn of Africa \n",
+ "**Author:** Feyisa Debo Diba \n",
+ "**Supervisors:** Professor Thomas J. Afullo, Dr. Akintunde Ayodeji Alonge \n",
+ "\n",
+ "## 2. Objective / Research Problem\n",
+ "The thesis investigates radio wave propagation modeling in clear air and precipitation conditions over wireless communication systems in the Horn of Africa, specifically Ethiopia. The research aims to address the attenuation problem caused by precipitation for systems operating at higher frequency bands.\n",
+ "\n",
+ "## 3. Scientific and Regional Background\n",
+ "The congestion of lower operating frequency bands has led to the rapid growth of utilizing higher frequency spectrum for wireless communication systems. However, the Horn of Africa, particularly Ethiopia, lacks comprehensive studies on propagation modeling under different atmospheric conditions. This research provides valuable insights for the region, contributing to the efficient operation of wireless networks.\n",
+ "\n",
+ "## 4. Methodology\n",
+ "The research uses three years of atmospheric data (temperature, pressure, relative humidity) from the National Meteorological Agency of Ethiopia and clear air signal measurements over terrestrial Line-of-Sight (LOS) links from EthioTelecom. Rainfall data from a Davis Vantage weather station installed at Jimma University, Ethiopia, are also used. The study applies the ITU-R model for refractivity gradient prediction and the Rice-Holmberg (R-H) model for one-minute rain rate distribution. A semi-Markovian model is used for rainfall event characterization and generation.\n",
+ "\n",
+ "## 5. Key Findings\n",
+ "The research derived radio climatological parameters for different rain and clear air fade models. It also proposed rainfall rate conversion factors for Ethiopian sites and developed rainfall rate and fade margin contour maps for Ethiopia. The study found that the sojourn time of spikes in every rain regime is appropriately described by Erlang-k distribution. The number of spikes of generated rainfall events and the corresponding sojourn times follow the power-law relationship.\n",
+ "\n",
+ "## 6. Conclusion\n",
+ "The research provides a comprehensive analysis of radio wave propagation under different atmospheric conditions in Ethiopia. The findings contribute to the understanding of the impact of atmospheric conditions on wireless communication systems operating at higher frequency bands.\n",
+ "\n",
+ "## 7. Limitations\n",
+ "The research is limited by the availability and quality of atmospheric and signal level data. The simulation models also have inherent assumptions that may affect the accuracy of the results.\n",
+ "\n",
+ "## 8. Future Work\n",
+ "Future research could focus on refining the models used in this study by incorporating more data and improving the simulation techniques. Studies could also be extended to other regions in the Horn of Africa.\n",
+ "\n",
+ "## 9. Real-World Applications\n",
+ "The findings of this research can improve wireless network planning and 5G deployment in East Africa. The models developed can also be used in link budgeting, which is crucial for the design and operation of wireless communication systems."
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "# Cell 4: Summarize via OpenAI\n",
+ "messages = [\n",
+ " {\"role\": \"system\", \"content\": system_prompt},\n",
+ " {\"role\": \"user\", \"content\": f\"Here is the thesis text (truncated):\\n\\n{thesis_text[:10000]}\"}\n",
+ "]\n",
+ "\n",
+ "response = openai.chat.completions.create(\n",
+ " model=\"gpt-4\",\n",
+ " messages=messages,\n",
+ " temperature=0.3\n",
+ ")\n",
+ "\n",
+ "summary = response.choices[0].message.content.strip()\n",
+ "display(Markdown(summary))\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "e1cdf9ec-5efb-4d4b-8de2-83648865f092",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.13"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/week1/community-contributions/day1-email-subject-implementation.ipynb b/week1/community-contributions/day1-email-subject-implementation.ipynb
new file mode 100644
index 0000000..e968e7c
--- /dev/null
+++ b/week1/community-contributions/day1-email-subject-implementation.ipynb
@@ -0,0 +1,115 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "4e2a9393-7767-488e-a8bf-27c12dca35bd",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# imports\n",
+ "\n",
+ "import os\n",
+ "from dotenv import load_dotenv\n",
+ "from openai import OpenAI\n",
+ "\n",
+ "# If you get an error running this cell, then please head over to the troubleshooting notebook!"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "7b87cadb-d513-4303-baee-a37b6f938e4d",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Load environment variables in a file called .env\n",
+ "\n",
+ "load_dotenv(override=True)\n",
+ "api_key = os.getenv('OPENAI_API_KEY')\n",
+ "\n",
+ "# Check the key\n",
+ "\n",
+ "if not api_key:\n",
+ " print(\"No API key was found - please head over to the troubleshooting notebook in this folder to identify & fix!\")\n",
+ "elif not api_key.startswith(\"sk-proj-\"):\n",
+ " print(\"An API key was found, but it doesn't start sk-proj-; please check you're using the right key - see troubleshooting notebook\")\n",
+ "elif api_key.strip() != api_key:\n",
+ " print(\"An API key was found, but it looks like it might have space or tab characters at the start or end - please remove them - see troubleshooting notebook\")\n",
+ "else:\n",
+ " print(\"API key found and looks good so far!\")\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "019974d9-f3ad-4a8a-b5f9-0a3719aea2d3",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "openai = OpenAI()\n",
+ "\n",
+ "# If this doesn't work, try Kernel menu >> Restart Kernel and Clear Outputs Of All Cells, then run the cells from the top of this notebook down.\n",
+ "# If it STILL doesn't work (horrors!) then please see the Troubleshooting notebook in this folder for full instructions"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "00743dac-0e70-45b7-879a-d7293a6f68a6",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Step 1: Create your prompts\n",
+ "\n",
+ "system_prompt = \"Eres un analista acostumbrado a trabajar con correos electrรณnicos que contiene un gran conocimiento sobre la mejor manera de resumir contenido releveante \\\n",
+ "dejando de lado cualquier informaciรณn que no despierte interรฉs o no sea el tema principal del correo. Tu funciรณn serรก leer contenido de correos y definir un listado de las 3 mejores opciones con el formato: Opciรณn *numero de la opciรณn*: *sujeto* Motivo: *que palabras clave dentro del texto has utilizado para llegar a esa conclusion y la relaciรณn semรกntica con tu idea\"\n",
+ "user_prompt = \"\"\"\n",
+ "Tengo un correo que le quiero enviar a mi profesor pero no se muy bien como llamarlo, ayudame. El correo es el siguiente:\n",
+ "Hola profe,\n",
+ "Ultimamente estoy disfrutando mucho sus clases y la informaciรณn que presenta me parece muy importante. Este fin de semana me voy de vacaciones y no podrรฉ\n",
+ "ir a sus clases la semana que viene. Me gustarรญa si pudiera pasarme los pdfs de la siguiente semana para echarle un vistazo por mi cuenta durante mi ausencia en Francia.\n",
+ "\n",
+ "Un saludo,\n",
+ "Daniel.\n",
+ "\"\"\"\n",
+ "\n",
+ "# Step 2: Make the messages list\n",
+ "\n",
+ "messages = [{\"role\" : \"system\" , \"content\": system_prompt},\n",
+ " {\"role\": \"user\", \"content\": user_prompt}]\n",
+ "\n",
+ "# Step 3: Call OpenAI\n",
+ "\n",
+ "response = openai.chat.completions.create( \n",
+ " model = \"gpt-4o-mini\",\n",
+ " messages = messages)\n",
+ "\n",
+ "# Step 4: print the result\n",
+ "\n",
+ "print(response.choices[0].message.content)"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.13"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/week1/community-contributions/day1-research-paper-summarizer-with-highlighter.ipynb b/week1/community-contributions/day1-research-paper-summarizer-with-highlighter.ipynb
new file mode 100644
index 0000000..74a00f9
--- /dev/null
+++ b/week1/community-contributions/day1-research-paper-summarizer-with-highlighter.ipynb
@@ -0,0 +1,202 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "5c527a13-459e-4a46-b00e-f2c5056de155",
+ "metadata": {},
+ "source": [
+ "# Research Paper Summarizer with Text Highlighting"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "861a0be5-6da7-4f66-8f82-bc083a913f9f",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# imports\n",
+ "\n",
+ "import os\n",
+ "import requests\n",
+ "from dotenv import load_dotenv\n",
+ "from bs4 import BeautifulSoup\n",
+ "from IPython.display import Markdown, display\n",
+ "from openai import OpenAI"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "74bf6765-53b6-457b-ac2d-0d1afa7fbf8f",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "API key found and looks good so far!\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Load environment variables in a file called .env\n",
+ "\n",
+ "load_dotenv(override=True)\n",
+ "api_key = os.getenv('OPENAI_API_KEY')\n",
+ "\n",
+ "# Check the key\n",
+ "\n",
+ "if not api_key:\n",
+ " print(\"No API key was found - please head over to the troubleshooting notebook in this folder to identify & fix!\")\n",
+ "elif not api_key.startswith(\"sk-proj-\"):\n",
+ " print(\"An API key was found, but it doesn't start sk-proj-; please check you're using the right key - see troubleshooting notebook\")\n",
+ "elif api_key.strip() != api_key:\n",
+ " print(\"An API key was found, but it looks like it might have space or tab characters at the start or end - please remove them - see troubleshooting notebook\")\n",
+ "else:\n",
+ " print(\"API key found and looks good so far!\")\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "227ed7af-d539-4c87-988b-80e6e049c863",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "openai = OpenAI()\n",
+ "\n",
+ "# If this doesn't work, try Kernel menu >> Restart Kernel and Clear Outputs Of All Cells, then run the cells from the top of this notebook down.\n",
+ "# If it STILL doesn't work (horrors!) then please see the Troubleshooting notebook in this folder for full instructions"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "id": "dcaadf8b-456d-48ca-af9d-9f57d3414308",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# A class to represent a Webpage\n",
+ "# If you're not familiar with Classes, check out the \"Intermediate Python\" notebook\n",
+ "\n",
+ "# Some websites need you to use proper headers when fetching them:\n",
+ "headers = {\n",
+ " \"User-Agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36\"\n",
+ "}\n",
+ "\n",
+ "class Website:\n",
+ "\n",
+ " def __init__(self, url):\n",
+ " \"\"\"\n",
+ " Create this Website object from the given url using the BeautifulSoup library\n",
+ " \"\"\"\n",
+ " self.url = url\n",
+ " response = requests.get(url, headers=headers)\n",
+ " soup = BeautifulSoup(response.content, 'html.parser')\n",
+ " self.title = soup.title.string if soup.title else \"No title found\"\n",
+ " for irrelevant in soup.body([\"script\", \"style\", \"img\", \"input\"]):\n",
+ " irrelevant.decompose()\n",
+ " self.text = soup.body.get_text(separator=\"\\n\", strip=True)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "id": "6315093f-be68-408e-a5e1-6a2e4ea675e8",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def user_prompt_for(website):\n",
+ " user_prompt = f\"You are looking at an article website titled {website.title}\"\n",
+ " user_prompt += \"\\nThe contents of this website is as follows; \\\n",
+ "please provide a short summary of this website in markdown. \\\n",
+ "I'm also looking for complete statements containing the following keywords (if found): \\\n",
+ "'large circuit model', 'ChipGPT' \\n\\n\"\n",
+ " user_prompt += website.text\n",
+ " return user_prompt\n",
+ "\n",
+ "\n",
+ "article = Website(\"https://arxiv.org/html/2401.12224v1\")\n",
+ "# print(user_prompt_for(article))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "id": "ff8a4112-f118-4866-b6cf-82675de0a38d",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "system_prompt = \"You are an assistant that analyzes the contents of a scientific \\\n",
+ "article for a PhD student (who has to read a lot of papers and journals). The \\\n",
+ "user will provide the article website and keyword(s) they are looking to learn and \\\n",
+ "cite from. Your job is to summarize the paper and point out all the statements \\\n",
+ "containing the specific keyword(s) the user typed. \\\n",
+ "Respond in markdown.\"\n",
+ "\n",
+ "\n",
+ "def messages_for(website):\n",
+ " return [\n",
+ " {\"role\": \"system\", \"content\": system_prompt},\n",
+ " {\"role\": \"user\", \"content\": user_prompt_for(website)}\n",
+ " ]\n",
+ "\n",
+ " \n",
+ "#messages_for(article)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "id": "b5e47bea-403d-48c3-ab9d-4d6adef83241",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def summarize(url):\n",
+ " website = Website(url)\n",
+ " response = openai.chat.completions.create(\n",
+ " model = \"gpt-4o-mini\",\n",
+ " messages = messages_for(website)\n",
+ " )\n",
+ " return response.choices[0].message.content\n",
+ "\n",
+ "\n",
+ "def display_summary(url):\n",
+ " summary = summarize(url)\n",
+ " display(Markdown(summary))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "9f6ac1bc-5bc8-4daa-8174-d201400e517a",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "display_summary(\"https://arxiv.org/html/2401.12224v1\")"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.13"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/week1/community-contributions/day1-selenium-web-summary-es-mx.ipynb b/week1/community-contributions/day1-selenium-web-summary-es-mx.ipynb
new file mode 100644
index 0000000..2a3de8b
--- /dev/null
+++ b/week1/community-contributions/day1-selenium-web-summary-es-mx.ipynb
@@ -0,0 +1,260 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "2588fbba",
+ "metadata": {},
+ "source": [
+ "# Website Analysis and Summarization with Selenium and OpenAI\n",
+ "\n",
+ "> This notebook demonstrates how to extract and summarize the main content of any website using Selenium for dynamic extraction and OpenAI for generating concise summaries in Mexican Spanish.\n",
+ "\n",
+ "## Overview\n",
+ "This notebook provides a workflow to automatically analyze websites, extract relevant text, and generate a short summary using a language model. Navigation elements are ignored, focusing on news, announcements, and main content.\n",
+ "\n",
+ "## Features\n",
+ "- Extracts relevant text from web pages using Selenium and BeautifulSoup.\n",
+ "- Generates automatic summaries using OpenAI's language models.\n",
+ "- Presents results in markdown format.\n",
+ "\n",
+ "## Requirements\n",
+ "- Python 3.8+\n",
+ "- Google Chrome browser installed\n",
+ "- The following Python packages:\n",
+ " - selenium\n",
+ " - webdriver-manager\n",
+ " - beautifulsoup4\n",
+ " - openai\n",
+ " - python-dotenv\n",
+ " - requests\n",
+ "- An OpenAI API key (project key, starting with `sk-proj-`)\n",
+ "- Internet connection\n",
+ "\n",
+ "## How to Use\n",
+ "1. Install the required packages:\n",
+ " ```bash\n",
+ " pip install selenium webdriver-manager undetected-chromedriver beautifulsoup4 openai python-dotenv requests\n",
+ " ```\n",
+ "2. Add your OpenAI API key to a `.env` file as `OPENAI_API_KEY`.\n",
+ "3. Run the notebook cells in order. You can change the target website URL in the code to analyze different sites.\n",
+ "4. The summary will be displayed in markdown format below the code cell.\n",
+ "\n",
+ "**Note:** Some websites may block automated access. The notebook includes options to simulate a real user and avoid bot detection, but results may vary depending on the site's protections.\n",
+ "\n",
+ "---"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "dc7c2ade",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Imports\n",
+ "import os\n",
+ "import requests\n",
+ "from dotenv import load_dotenv\n",
+ "from bs4 import BeautifulSoup\n",
+ "from IPython.display import Markdown, display\n",
+ "from openai import OpenAI\n",
+ "\n",
+ "from selenium import webdriver\n",
+ "from selenium.webdriver.chrome.service import Service\n",
+ "from selenium.webdriver.common.by import By\n",
+ "from selenium.webdriver.chrome.options import Options\n",
+ "from selenium.webdriver.support.ui import WebDriverWait\n",
+ "from selenium.webdriver.support import expected_conditions as EC\n",
+ "from webdriver_manager.chrome import ChromeDriverManager\n",
+ "import undetected_chromedriver as uc"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "a2d21987",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Load the environment variables from .env\n",
+ "load_dotenv(override=True)\n",
+ "api_key = os.getenv('OPENAI_API_KEY')\n",
+ "\n",
+ "# Check the key\n",
+ "\n",
+ "if not api_key:\n",
+ " print(\"No API key was found - please head over to the troubleshooting notebook in this folder to identify & fix!\")\n",
+ "elif not api_key.startswith(\"sk-proj-\"):\n",
+ " print(\"An API key was found, but it doesn't start sk-proj-; please check you're using the right key - see troubleshooting notebook\")\n",
+ "elif api_key.strip() != api_key:\n",
+ " print(\"An API key was found, but it looks like it might have space or tab characters at the start or end - please remove them - see troubleshooting notebook\")\n",
+ "else:\n",
+ " print(\"API key found and looks good so far!\")\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "bbb3a8ed",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "openai = OpenAI()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "5313aa64",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "class Website:\n",
+ " def __init__(self, url, headless=True, wait_time=10):\n",
+ " self.url = url # Website URL to analyze\n",
+ " self.title = None # Title of the website\n",
+ " self.text = None # Extracted text from the website\n",
+ " \n",
+ " # Chrome options configuration for Selenium\n",
+ " options = Options()\n",
+ " if headless:\n",
+ " options.add_argument(\"--headless=new\") # Run Chrome in headless mode (no window)\n",
+ " options.add_argument(\"--disable-gpu\") # Disable GPU acceleration\n",
+ " options.add_argument(\"--no-sandbox\") # Disable Chrome sandbox (required for some environments)\n",
+ " options.add_argument(\"--window-size=1920,1080\") # Set window size to simulate a real user\n",
+ " # Simulate a real user-agent to avoid bot detection\n",
+ " options.add_argument(\"user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36\")\n",
+ " \n",
+ " # Initialize Chrome WebDriver\n",
+ " self.driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)\n",
+ " self.driver.get(url) # Open the URL in the browser\n",
+ " \n",
+ " try:\n",
+ " # Wait until the element is present in the page\n",
+ " WebDriverWait(self.driver, wait_time).until(EC.presence_of_element_located((By.TAG_NAME, \"body\")))\n",
+ " html = self.driver.page_source # Get the full HTML of the page\n",
+ " soup = BeautifulSoup(html, 'html.parser') # Parse HTML with BeautifulSoup\n",
+ " self.title = soup.title.string if soup.title else 'No title found' # Extract the title\n",
+ " if soup.body:\n",
+ " # Remove irrelevant elements from the body\n",
+ " for irrelevant in soup.body([\"script\", \"style\", \"img\", \"input\"]):\n",
+ " irrelevant.decompose()\n",
+ " # Extract clean text from the body\n",
+ " self.text = soup.body.get_text(separator='\\n', strip=True)\n",
+ " else:\n",
+ " self.text = \"No body found\" # If no body is found, indicate it\n",
+ " except Exception as e:\n",
+ " print(f\"Error accessing the site: {e}\") # Print error to console\n",
+ " self.text = \"Error accessing the site\" # Store error in the attribute\n",
+ " finally:\n",
+ " self.driver.quit() # Always close the browser, whether or not an error occurred"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "e902c6b2",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "system_prompt = \"You are an assistant that analyzes the contents of a website \\\n",
+ "and provides a short summary, ignoring text that might be navigation related. \\\n",
+ "Respond in markdown in Mexican Spanish.\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "eaee8f36",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# A function that writes a User Prompt that asks for summaries of websites:\n",
+ "\n",
+ "def user_prompt_for(website):\n",
+ " user_prompt = f\"You are looking at a website titled {website.title}\"\n",
+ " user_prompt += \"\\nThe contents of this website is as follows; \\\n",
+ "please provide a short summary of this website in markdown. \\\n",
+ "If it includes news or announcements, then summarize these too.\\n\\n\"\n",
+ " user_prompt += website.text\n",
+ " return user_prompt"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "9ac4ed8b",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Creates messages for the OpenAI API\n",
+ "def messages_for(website):\n",
+ " return [\n",
+ " {\"role\": \"system\", \"content\": system_prompt},\n",
+ " {\"role\": \"user\", \"content\": user_prompt_for(website)}\n",
+ " ]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "1536d537",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Creates a summary for the given URL\n",
+ "def summarize(url):\n",
+ " website = Website(url)\n",
+ " response = openai.chat.completions.create(\n",
+ " model = \"gpt-4o-mini\",\n",
+ " messages = messages_for(website)\n",
+ " )\n",
+ " return response.choices[0].message.content"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "fe135339",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Shows the summary for the given URL\n",
+ "def display_summary(url):\n",
+ " summary = summarize(url)\n",
+ " display(Markdown(summary))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "a301ab4e",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "display_summary(\"https://openai.com/\")"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "venv",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.9"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/week1/community-contributions/day1.ipynb b/week1/community-contributions/day1.ipynb
new file mode 100644
index 0000000..b876e38
--- /dev/null
+++ b/week1/community-contributions/day1.ipynb
@@ -0,0 +1,817 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "d15d8294-3328-4e07-ad16-8a03e9bbfdb9",
+ "metadata": {},
+ "source": [
+ "# YOUR FIRST LAB\n",
+ "### Please read this section. This is valuable to get you prepared, even if it's a long read -- it's important stuff.\n",
+ "\n",
+ "## Your first Frontier LLM Project\n",
+ "\n",
+ "Let's build a useful LLM solution - in a matter of minutes.\n",
+ "\n",
+ "By the end of this course, you will have built an autonomous Agentic AI solution with 7 agents that collaborate to solve a business problem. All in good time! We will start with something smaller...\n",
+ "\n",
+ "Our goal is to code a new kind of Web Browser. Give it a URL, and it will respond with a summary. The Reader's Digest of the internet!!\n",
+ "\n",
+ "Before starting, you should have completed the setup for [PC](../SETUP-PC.md) or [Mac](../SETUP-mac.md) and you hopefully launched this jupyter lab from within the project root directory, with your environment activated.\n",
+ "\n",
+ "## If you're new to Jupyter Lab\n",
+ "\n",
+ "Welcome to the wonderful world of Data Science experimentation! Once you've used Jupyter Lab, you'll wonder how you ever lived without it. Simply click in each \"cell\" with code in it, such as the cell immediately below this text, and hit Shift+Return to execute that cell. As you wish, you can add a cell with the + button in the toolbar, and print values of variables, or try out variations. \n",
+ "\n",
+ "I've written a notebook called [Guide to Jupyter](Guide%20to%20Jupyter.ipynb) to help you get more familiar with Jupyter Labs, including adding Markdown comments, using `!` to run shell commands, and `tqdm` to show progress.\n",
+ "\n",
+ "## If you're new to the Command Line\n",
+ "\n",
+ "Please see these excellent guides: [Command line on PC](https://chatgpt.com/share/67b0acea-ba38-8012-9c34-7a2541052665) and [Command line on Mac](https://chatgpt.com/canvas/shared/67b0b10c93a081918210723867525d2b). \n",
+ "\n",
+ "## If you'd prefer to work in IDEs\n",
+ "\n",
+ "If you're more comfortable in IDEs like VSCode, Cursor or PyCharm, they both work great with these lab notebooks too. \n",
+ "If you'd prefer to work in VSCode, [here](https://chatgpt.com/share/676f2e19-c228-8012-9911-6ca42f8ed766) are instructions from an AI friend on how to configure it for the course.\n",
+ "\n",
+ "## If you'd like to brush up your Python\n",
+ "\n",
+ "I've added a notebook called [Intermediate Python](Intermediate%20Python.ipynb) to get you up to speed. But you should give it a miss if you already have a good idea what this code does: \n",
+ "`yield from {book.get(\"author\") for book in books if book.get(\"author\")}`\n",
+ "\n",
+ "## I am here to help\n",
+ "\n",
+ "If you have any problems at all, please do reach out. \n",
+ "I'm available through the platform, or at ed@edwarddonner.com, or at https://www.linkedin.com/in/eddonner/ if you'd like to connect (and I love connecting!) \n",
+ "And this is new to me, but I'm also trying out X/Twitter at [@edwarddonner](https://x.com/edwarddonner) - if you're on X, please show me how it's done ๐ \n",
+ "\n",
+ "## More troubleshooting\n",
+ "\n",
+ "Please see the [troubleshooting](troubleshooting.ipynb) notebook in this folder to diagnose and fix common problems. At the very end of it is a diagnostics script with some useful debug info.\n",
+ "\n",
+ "## For foundational technical knowledge (eg Git, APIs, debugging) \n",
+ "\n",
+ "If you're relatively new to programming -- I've got your back! While it's ideal to have some programming experience for this course, there's only one mandatory prerequisite: plenty of patience. ๐ I've put together a set of self-study guides that cover Git and GitHub, APIs and endpoints, beginner python and more.\n",
+ "\n",
+ "This covers Git and GitHub; what they are, the difference, and how to use them: \n",
+ "https://github.com/ed-donner/agents/blob/main/guides/03_git_and_github.ipynb\n",
+ "\n",
+ "This covers technical foundations: \n",
+ "ChatGPT vs API; taking screenshots; Environment Variables; Networking basics; APIs and endpoints: \n",
+ "https://github.com/ed-donner/agents/blob/main/guides/04_technical_foundations.ipynb\n",
+ "\n",
+ "This covers Python for beginners, and making sure that a `NameError` never trips you up: \n",
+ "https://github.com/ed-donner/agents/blob/main/guides/06_python_foundations.ipynb\n",
+ "\n",
+ "This covers the essential techniques for figuring out errors: \n",
+ "https://github.com/ed-donner/agents/blob/main/guides/08_debugging.ipynb\n",
+ "\n",
+ "And you'll find other useful guides in the same folder in GitHub. Some information applies to my other Udemy course (eg Async Python) but most of it is very relevant for LLM engineering.\n",
+ "\n",
+ "## If this is old hat!\n",
+ "\n",
+ "If you're already comfortable with today's material, please hang in there; you can move swiftly through the first few labs - we will get much more in depth as the weeks progress. Ultimately we will fine-tune our own LLM to compete with OpenAI!\n",
+ "\n",
+ "\n",
+ " \n",
+ " \n",
+ " \n",
+ " | \n",
+ " \n",
+ " Please read - important note\n",
+ " The way I collaborate with you may be different to other courses you've taken. I prefer not to type code while you watch. Rather, I execute Jupyter Labs, like this, and give you an intuition for what's going on. My suggestion is that you carefully execute this yourself, after watching the lecture. Add print statements to understand what's going on, and then come up with your own variations. If you have a Github account, use this to showcase your variations. Not only is this essential practice, but it demonstrates your skills to others, including perhaps future clients or employers...\n",
+ " | \n",
+ "
\n",
+ "
\n",
+ "\n",
+ " \n",
+ " \n",
+ " \n",
+ " | \n",
+ " \n",
+ " This code is a live resource - keep an eye out for my emails\n",
+ " I push updates to the code regularly. As people ask questions, I add more examples or improved commentary. As a result, you'll notice that the code below isn't identical to the videos. Everything from the videos is here; but I've also added better explanations and new models like DeepSeek. Consider this like an interactive book.
\n",
+ " I try to send emails regularly with important updates related to the course. You can find this in the 'Announcements' section of Udemy in the left sidebar. You can also choose to receive my emails via your Notification Settings in Udemy. I'm respectful of your inbox and always try to add value with my emails!\n",
+ " \n",
+ " | \n",
+ "
\n",
+ "
\n",
+ "\n",
+ " \n",
+ " \n",
+ " \n",
+ " | \n",
+ " \n",
+ " Business value of these exercises\n",
+ " A final thought. While I've designed these notebooks to be educational, I've also tried to make them enjoyable. We'll do fun things like have LLMs tell jokes and argue with each other. But fundamentally, my goal is to teach skills you can apply in business. I'll explain business implications as we go, and it's worth keeping this in mind: as you build experience with models and techniques, think of ways you could put this into action at work today. Please do contact me if you'd like to discuss more or if you have ideas to bounce off me.\n",
+ " | \n",
+ "
\n",
+ "
"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "4e2a9393-7767-488e-a8bf-27c12dca35bd",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# imports\n",
+ "\n",
+ "import os\n",
+ "import requests\n",
+ "from dotenv import load_dotenv\n",
+ "from bs4 import BeautifulSoup\n",
+ "from IPython.display import Markdown, display\n",
+ "from openai import OpenAI\n",
+ "\n",
+ "# If you get an error running this cell, then please head over to the troubleshooting notebook!"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "6900b2a8-6384-4316-8aaa-5e519fca4254",
+ "metadata": {},
+ "source": [
+ "# Connecting to OpenAI (or Ollama)\n",
+ "\n",
+ "The next cell is where we load in the environment variables in your `.env` file and connect to OpenAI. \n",
+ "\n",
+ "If you'd like to use free Ollama instead, please see the README section \"Free Alternative to Paid APIs\", and if you're not sure how to do this, there's a full solution in the solutions folder (day1_with_ollama.ipynb).\n",
+ "\n",
+ "## Troubleshooting if you have problems:\n",
+ "\n",
+ "Head over to the [troubleshooting](troubleshooting.ipynb) notebook in this folder for step by step code to identify the root cause and fix it!\n",
+ "\n",
+ "If you make a change, try restarting the \"Kernel\" (the python process sitting behind this notebook) by Kernel menu >> Restart Kernel and Clear Outputs of All Cells. Then try this notebook again, starting at the top.\n",
+ "\n",
+ "Or, contact me! Message me or email ed@edwarddonner.com and we will get this to work.\n",
+ "\n",
+ "Any concerns about API costs? See my notes in the README - costs should be minimal, and you can control it at every point. You can also use Ollama as a free alternative, which we discuss during Day 2."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "7b87cadb-d513-4303-baee-a37b6f938e4d",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Load environment variables in a file called .env\n",
+ "\n",
+ "load_dotenv(override=True)\n",
+ "api_key = os.getenv('OPENAI_API_KEY')\n",
+ "\n",
+ "# Check the key\n",
+ "\n",
+ "if not api_key:\n",
+ " print(\"No API key was found - please head over to the troubleshooting notebook in this folder to identify & fix!\")\n",
+ "elif not api_key.startswith(\"sk-proj-\"):\n",
+ " print(\"An API key was found, but it doesn't start sk-proj-; please check you're using the right key - see troubleshooting notebook\")\n",
+ "elif api_key.strip() != api_key:\n",
+ " print(\"An API key was found, but it looks like it might have space or tab characters at the start or end - please remove them - see troubleshooting notebook\")\n",
+ "else:\n",
+ " print(\"API key found and looks good so far!\")\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "019974d9-f3ad-4a8a-b5f9-0a3719aea2d3",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "openai = OpenAI()\n",
+ "\n",
+ "# If this doesn't work, try Kernel menu >> Restart Kernel and Clear Outputs Of All Cells, then run the cells from the top of this notebook down.\n",
+ "# If it STILL doesn't work (horrors!) then please see the Troubleshooting notebook in this folder for full instructions"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "442fc84b-0815-4f40-99ab-d9a5da6bda91",
+ "metadata": {},
+ "source": [
+ "# Let's make a quick call to a Frontier model to get started, as a preview!"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "id": "a58394bf-1e45-46af-9bfd-01e24da6f49a",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Hello! Itโs great to hear from you! How can I help you today?\n"
+ ]
+ }
+ ],
+ "source": [
+ "# To give you a preview -- calling OpenAI with these messages is this easy. Any problems, head over to the Troubleshooting notebook.\n",
+ "\n",
+ "message = \"Hello, GPT! This is my first ever message to you! Hi!\"\n",
+ "response = openai.chat.completions.create(model=\"gpt-4o-mini\", messages=[{\"role\":\"user\", \"content\":message}])\n",
+ "print(response.choices[0].message.content)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "2aa190e5-cb31-456a-96cc-db109919cd78",
+ "metadata": {},
+ "source": [
+ "## OK onwards with our first project"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "id": "c5e793b2-6775-426a-a139-4848291d0463",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# A class to represent a Webpage\n",
+ "# If you're not familiar with Classes, check out the \"Intermediate Python\" notebook\n",
+ "\n",
+ "# Some websites need you to use proper headers when fetching them:\n",
+ "headers = {\n",
+ " \"User-Agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36\"\n",
+ "}\n",
+ "\n",
+ "class Website:\n",
+ "\n",
+ " def __init__(self, url):\n",
+ " \"\"\"\n",
+ " Create this Website object from the given url using the BeautifulSoup library\n",
+ " \"\"\"\n",
+ " self.url = url\n",
+ " response = requests.get(url, headers=headers)\n",
+ " soup = BeautifulSoup(response.content, 'html.parser')\n",
+ " self.title = soup.title.string if soup.title else \"No title found\"\n",
+ " for irrelevant in soup.body([\"script\", \"style\", \"img\", \"input\"]):\n",
+ " irrelevant.decompose()\n",
+ " self.text = soup.body.get_text(separator=\"\\n\", strip=True)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "id": "2ef960cf-6dc2-4cda-afb3-b38be12f4c97",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Home - Edward Donner\n",
+ "Home\n",
+ "Connect Four\n",
+ "Outsmart\n",
+ "An arena that pits LLMs against each other in a battle of diplomacy and deviousness\n",
+ "About\n",
+ "Posts\n",
+ "Well, hi there.\n",
+ "Iโm Ed. I like writing code and experimenting with LLMs, and hopefully youโre here because you do too. I also enjoy DJing (but Iโm badly out of practice), amateur electronic music production (\n",
+ "very\n",
+ "amateur) and losing myself in\n",
+ "Hacker News\n",
+ ", nodding my head sagely to things I only half understand.\n",
+ "Iโm the co-founder and CTO of\n",
+ "Nebula.io\n",
+ ". Weโre applying AI to a field where it can make a massive, positive impact: helping people discover their potential and pursue their reason for being. Recruiters use our product today to source, understand, engage and manage talent. Iโm previously the founder and CEO of AI startup untapt,\n",
+ "acquired in 2021\n",
+ ".\n",
+ "We work with groundbreaking, proprietary LLMs verticalized for talent, weโve\n",
+ "patented\n",
+ "our matching model, and our award-winning platform has happy customers and tons of press coverage.\n",
+ "Connect\n",
+ "with me for more!\n",
+ "May 28, 2025\n",
+ "Connecting my courses โ become an LLM expert and leader\n",
+ "May 18, 2025\n",
+ "2025 AI Executive Briefing\n",
+ "April 21, 2025\n",
+ "The Complete Agentic AI Engineering Course\n",
+ "January 23, 2025\n",
+ "LLM Workshop โ Hands-on with Agents โ resources\n",
+ "Navigation\n",
+ "Home\n",
+ "Connect Four\n",
+ "Outsmart\n",
+ "An arena that pits LLMs against each other in a battle of diplomacy and deviousness\n",
+ "About\n",
+ "Posts\n",
+ "Get in touch\n",
+ "ed [at] edwarddonner [dot] com\n",
+ "www.edwarddonner.com\n",
+ "Follow me\n",
+ "LinkedIn\n",
+ "Twitter\n",
+ "Facebook\n",
+ "Subscribe to newsletter\n",
+ "Type your emailโฆ\n",
+ "Subscribe\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Let's try one out. Change the website and add print statements to follow along.\n",
+ "\n",
+ "ed = Website(\"https://edwarddonner.com\")\n",
+ "print(ed.title)\n",
+ "print(ed.text)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "6a478a0c-2c53-48ff-869c-4d08199931e1",
+ "metadata": {},
+ "source": [
+ "## Types of prompts\n",
+ "\n",
+ "You may know this already - but if not, you will get very familiar with it!\n",
+ "\n",
+ "Models like GPT4o have been trained to receive instructions in a particular way.\n",
+ "\n",
+ "They expect to receive:\n",
+ "\n",
+ "**A system prompt** that tells them what task they are performing and what tone they should use\n",
+ "\n",
+ "**A user prompt** -- the conversation starter that they should reply to"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "id": "abdb8417-c5dc-44bc-9bee-2e059d162699",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Define our system prompt - you can experiment with this later, changing the last sentence to 'Respond in markdown in Spanish.\"\n",
+ "\n",
+ "system_prompt = \"You are an assistant that analyzes the contents of a website \\\n",
+ "and provides a short summary, ignoring text that might be navigation related. \\\n",
+ "Respond in markdown.\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "id": "f0275b1b-7cfe-4f9d-abfa-7650d378da0c",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# A function that writes a User Prompt that asks for summaries of websites:\n",
+ "\n",
+ "def user_prompt_for(website):\n",
+ " user_prompt = f\"You are looking at a website titled {website.title}\"\n",
+ " user_prompt += \"\\nThe contents of this website is as follows; \\\n",
+ "please provide a short summary of this website in markdown. \\\n",
+ "If it includes news or announcements, then summarize these too.\\n\\n\"\n",
+ " user_prompt += website.text\n",
+ " return user_prompt"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "id": "26448ec4-5c00-4204-baec-7df91d11ff2e",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "You are looking at a website titled Home - Edward Donner\n",
+ "The contents of this website is as follows; please provide a short summary of this website in markdown. If it includes news or announcements, then summarize these too.\n",
+ "\n",
+ "Home\n",
+ "Connect Four\n",
+ "Outsmart\n",
+ "An arena that pits LLMs against each other in a battle of diplomacy and deviousness\n",
+ "About\n",
+ "Posts\n",
+ "Well, hi there.\n",
+ "Iโm Ed. I like writing code and experimenting with LLMs, and hopefully youโre here because you do too. I also enjoy DJing (but Iโm badly out of practice), amateur electronic music production (\n",
+ "very\n",
+ "amateur) and losing myself in\n",
+ "Hacker News\n",
+ ", nodding my head sagely to things I only half understand.\n",
+ "Iโm the co-founder and CTO of\n",
+ "Nebula.io\n",
+ ". Weโre applying AI to a field where it can make a massive, positive impact: helping people discover their potential and pursue their reason for being. Recruiters use our product today to source, understand, engage and manage talent. Iโm previously the founder and CEO of AI startup untapt,\n",
+ "acquired in 2021\n",
+ ".\n",
+ "We work with groundbreaking, proprietary LLMs verticalized for talent, weโve\n",
+ "patented\n",
+ "our matching model, and our award-winning platform has happy customers and tons of press coverage.\n",
+ "Connect\n",
+ "with me for more!\n",
+ "May 28, 2025\n",
+ "Connecting my courses โ become an LLM expert and leader\n",
+ "May 18, 2025\n",
+ "2025 AI Executive Briefing\n",
+ "April 21, 2025\n",
+ "The Complete Agentic AI Engineering Course\n",
+ "January 23, 2025\n",
+ "LLM Workshop โ Hands-on with Agents โ resources\n",
+ "Navigation\n",
+ "Home\n",
+ "Connect Four\n",
+ "Outsmart\n",
+ "An arena that pits LLMs against each other in a battle of diplomacy and deviousness\n",
+ "About\n",
+ "Posts\n",
+ "Get in touch\n",
+ "ed [at] edwarddonner [dot] com\n",
+ "www.edwarddonner.com\n",
+ "Follow me\n",
+ "LinkedIn\n",
+ "Twitter\n",
+ "Facebook\n",
+ "Subscribe to newsletter\n",
+ "Type your emailโฆ\n",
+ "Subscribe\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(user_prompt_for(ed))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "ea211b5f-28e1-4a86-8e52-c0b7677cadcc",
+ "metadata": {},
+ "source": [
+ "## Messages\n",
+ "\n",
+ "The API from OpenAI expects to receive messages in a particular structure.\n",
+ "Many of the other APIs share this structure:\n",
+ "\n",
+ "```python\n",
+ "[\n",
+ " {\"role\": \"system\", \"content\": \"system message goes here\"},\n",
+ " {\"role\": \"user\", \"content\": \"user message goes here\"}\n",
+ "]\n",
+ "```\n",
+ "To give you a preview, the next 2 cells make a rather simple call - we won't stretch the mighty GPT (yet!)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "id": "f25dcd35-0cd0-4235-9f64-ac37ed9eaaa5",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "messages = [\n",
+ " {\"role\": \"system\", \"content\": \"You are a snarky assistant\"},\n",
+ " {\"role\": \"user\", \"content\": \"What is 2 + 2?\"}\n",
+ "]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "id": "21ed95c5-7001-47de-a36d-1d6673b403ce",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Oh, youโre going for the big math questions now, huh? Well, if you insist on dragging me into elementary school territory, the answer is 4. Shocking, I know.\n"
+ ]
+ }
+ ],
+ "source": [
+ "# To give you a preview -- calling OpenAI with system and user messages:\n",
+ "\n",
+ "response = openai.chat.completions.create(model=\"gpt-4o-mini\", messages=messages)\n",
+ "print(response.choices[0].message.content)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "d06e8d78-ce4c-4b05-aa8e-17050c82bb47",
+ "metadata": {},
+ "source": [
+ "## And now let's build useful messages for GPT-4o-mini, using a function"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "id": "0134dfa4-8299-48b5-b444-f2a8c3403c88",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# See how this function creates exactly the format above\n",
+ "\n",
+ "def messages_for(website):\n",
+ " return [\n",
+ " {\"role\": \"system\", \"content\": system_prompt},\n",
+ " {\"role\": \"user\", \"content\": user_prompt_for(website)}\n",
+ " ]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "id": "36478464-39ee-485c-9f3f-6a4e458dbc9c",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "[{'role': 'system',\n",
+ " 'content': 'You are an assistant that analyzes the contents of a website and provides a short summary, ignoring text that might be navigation related. Respond in markdown.'},\n",
+ " {'role': 'user',\n",
+ " 'content': 'You are looking at a website titled Home - Edward Donner\\nThe contents of this website is as follows; please provide a short summary of this website in markdown. If it includes news or announcements, then summarize these too.\\n\\nHome\\nConnect Four\\nOutsmart\\nAn arena that pits LLMs against each other in a battle of diplomacy and deviousness\\nAbout\\nPosts\\nWell, hi there.\\nIโm Ed. I like writing code and experimenting with LLMs, and hopefully youโre here because you do too. I also enjoy DJing (but Iโm badly out of practice), amateur electronic music production (\\nvery\\namateur) and losing myself in\\nHacker News\\n, nodding my head sagely to things I only half understand.\\nIโm the co-founder and CTO of\\nNebula.io\\n. Weโre applying AI to a field where it can make a massive, positive impact: helping people discover their potential and pursue their reason for being. Recruiters use our product today to source, understand, engage and manage talent. Iโm previously the founder and CEO of AI startup untapt,\\nacquired in 2021\\n.\\nWe work with groundbreaking, proprietary LLMs verticalized for talent, weโve\\npatented\\nour matching model, and our award-winning platform has happy customers and tons of press coverage.\\nConnect\\nwith me for more!\\nMay 28, 2025\\nConnecting my courses โ become an LLM expert and leader\\nMay 18, 2025\\n2025 AI Executive Briefing\\nApril 21, 2025\\nThe Complete Agentic AI Engineering Course\\nJanuary 23, 2025\\nLLM Workshop โ Hands-on with Agents โ resources\\nNavigation\\nHome\\nConnect Four\\nOutsmart\\nAn arena that pits LLMs against each other in a battle of diplomacy and deviousness\\nAbout\\nPosts\\nGet in touch\\ned [at] edwarddonner [dot] com\\nwww.edwarddonner.com\\nFollow me\\nLinkedIn\\nTwitter\\nFacebook\\nSubscribe to newsletter\\nType your emailโฆ\\nSubscribe'}]"
+ ]
+ },
+ "execution_count": 14,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Try this out, and then try for a few more websites\n",
+ "\n",
+ "messages_for(ed)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "16f49d46-bf55-4c3e-928f-68fc0bf715b0",
+ "metadata": {},
+ "source": [
+ "## Time to bring it together - the API for OpenAI is very simple!"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "id": "905b9919-aba7-45b5-ae65-81b3d1d78e34",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# And now: call the OpenAI API. You will get very familiar with this!\n",
+ "\n",
+ "def summarize(url):\n",
+ " website = Website(url)\n",
+ " response = openai.chat.completions.create(\n",
+ " model = \"gpt-4o-mini\",\n",
+ " messages = messages_for(website)\n",
+ " )\n",
+ " return response.choices[0].message.content"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "id": "05e38d41-dfa4-4b20-9c96-c46ea75d9fb5",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'# Summary of Edward Donner\\'s Website\\n\\nThe website is the personal platform of Ed Donner, a software engineer and co-founder/CTO of Nebula.io, an AI-focused company that aims to help individuals discover their potential through technology. Ed expresses his passion for coding, experimenting with large language models (LLMs), and interests in DJing and electronic music production.\\n\\n## Key Sections:\\n- **About Ed**: Provides personal background, detailing his experience in AI startups, including his previous venture, untapt, which was acquired in 2021. He highlights the use of patented matching models and LLMs in talent management.\\n- **Connect Four & Outsmart**: Features interactive games or platforms where LLMs engage in diplomatic and strategic challenges.\\n- **Courses & Announcements**:\\n - **May 28, 2025**: Announced a course focused on becoming an LLM expert and leader.\\n - **May 18, 2025**: Announcement for the 2025 AI Executive Briefing.\\n - **April 21, 2025**: Introduction of \"The Complete Agentic AI Engineering Course.\"\\n - **January 23, 2025**: A workshop providing hands-on experience with agents and associated resources.\\n\\nThe website also encourages visitors to connect and engage through various social media platforms and a newsletter subscription.'"
+ ]
+ },
+ "execution_count": 16,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "summarize(\"https://edwarddonner.com\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "id": "3d926d59-450e-4609-92ba-2d6f244f1342",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# A function to display this nicely in the Jupyter output, using markdown\n",
+ "\n",
+ "def display_summary(url):\n",
+ " summary = summarize(url)\n",
+ " display(Markdown(summary))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "id": "3018853a-445f-41ff-9560-d925d1774b2f",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/markdown": [
+ "# Summary of \"Home - Edward Donner\"\n",
+ "\n",
+ "The website is dedicated to Edward Donner, a co-founder and CTO of Nebula.io, a platform focused on leveraging AI to assist individuals in discovering their potential and engaging with talent. Edward expresses an interest in coding, experiments with large language models (LLMs), DJing, and electronic music production. He has a history as the founder of an AI startup, untapt, which was acquired in 2021.\n",
+ "\n",
+ "## Key Features:\n",
+ "- **Connect Four**: A game involving LLMs competing in diplomacy and strategy.\n",
+ "- **About**: Information about Edward's professional background and interests.\n",
+ "- **Courses and Workshops**: \n",
+ " - **Recent Announcements**:\n",
+ " - **May 28, 2025**: Launch of a program to become an LLM expert and leader.\n",
+ " - **May 18, 2025**: Announcement of a 2025 AI Executive Briefing.\n",
+ " - **April 21, 2025**: Introduction of the Complete Agentic AI Engineering Course.\n",
+ " - **January 23, 2025**: A hands-on LLM Workshop focusing on resources related to agents.\n",
+ "\n",
+ "The content emphasizes his passion for AI and education within the industry."
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "display_summary(\"https://edwarddonner.com\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "b3bcf6f4-adce-45e9-97ad-d9a5d7a3a624",
+ "metadata": {},
+ "source": [
+ "# Let's try more websites\n",
+ "\n",
+ "Note that this will only work on websites that can be scraped using this simplistic approach.\n",
+ "\n",
+ "Websites that are rendered with Javascript, like React apps, won't show up. See the community-contributions folder for a Selenium implementation that gets around this. You'll need to read up on installing Selenium (ask ChatGPT!)\n",
+ "\n",
+ "Also Websites protected with CloudFront (and similar) may give 403 errors - many thanks Andy J for pointing this out.\n",
+ "\n",
+ "But many websites will work just fine!"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 19,
+ "id": "45d83403-a24c-44b5-84ac-961449b4008f",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/markdown": [
+ "# CNN Website Summary\n",
+ "\n",
+ "CNN is a major news outlet that provides comprehensive coverage of world events, featuring sections on U.S. and international news, political analysis, business, health, entertainment, and sports. The site emphasizes real-time updates and includes various multimedia formats such as videos and articles.\n",
+ "\n",
+ "### Notable Articles and Updates\n",
+ "\n",
+ "- **Ukraine-Russia War**: Pro-Ukraine protests are ongoing, and there are discussions regarding dignitaries meeting on U.S. soil amidst rising tensions.\n",
+ " \n",
+ "- **Israel-Hamas Conflict**: Analysis and reports highlight significant developments including Israelโs settlement plans which may impact the future of a Palestinian state.\n",
+ "\n",
+ "- **Health**: New heart health guidelines suggest going alcohol-free; studies indicate a high level of stress among teenagers.\n",
+ "\n",
+ "- **Entertainment**:\n",
+ " - Megadeth is set to release its final album and embark on a farewell tour.\n",
+ " - Taylor Swift's recent appearances are noted for cultural impact.\n",
+ "\n",
+ "- **Science**: Climate-related findings unveil vulnerabilities in GPS and satellites due to pollution.\n",
+ "\n",
+ "- **Business**: Discussions are ongoing about potential government stake in Intel, affecting stock prices.\n",
+ "\n",
+ "### Additional Features\n",
+ "CNN also offers a variety of interactive content including quizzes, games, and newsletters tailored to reader interests. The site encourages user engagement through feedback on advertisements and technical issues.\n",
+ "\n",
+ "Overall, CNN remains a significant source for breaking news and in-depth analysis across a broad spectrum of topics."
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "display_summary(\"https://cnn.com\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "75e9fd40-b354-4341-991e-863ef2e59db7",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "display_summary(\"https://anthropic.com\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 20,
+ "id": "00743dac-0e70-45b7-879a-d7293a6f68a6",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Subject: Leave Notification: Medical Emergency (Aug 17-21, 2025)\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Step 1: Create your prompts\n",
+ "\n",
+ "system_prompt = \"you are the email assistant, which provide the subject of the email\"\n",
+ "user_prompt = \"\"\"\n",
+ " please provide the appropriate subject for below email\n",
+ "hi team,\n",
+ "due to some medical emergency , i will be on leave for 5 days starting\n",
+ "from 17-08-2025 to 21-08-2025.\n",
+ "\n",
+ "please call me in case of any urgency.\n",
+ "\n",
+ "regards\n",
+ "Rahul\n",
+ "\"\"\"\n",
+ "\n",
+ "# Step 2: Make the messages list\n",
+ "\n",
+ "messages = [\n",
+ " {\"role\": \"system\", \"content\": system_prompt},\n",
+ " {\"role\": \"user\", \"content\": user_prompt}\n",
+ "]\n",
+ "\n",
+ "# Step 3: Call OpenAI\n",
+ "response = openai.chat.completions.create(model=\"gpt-4o-mini\", messages=messages)\n",
+ "\n",
+ "\n",
+ "# Step 4: print the result\n",
+ "\n",
+ "print(response.choices[0].message.content)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "36ed9f14-b349-40e9-a42c-b367e77f8bda",
+ "metadata": {},
+ "source": [
+ "## An extra exercise for those who enjoy web scraping\n",
+ "\n",
+ "You may notice that if you try `display_summary(\"https://openai.com\")` - it doesn't work! That's because OpenAI has a fancy website that uses Javascript. There are many ways around this that some of you might be familiar with. For example, Selenium is a hugely popular framework that runs a browser behind the scenes, renders the page, and allows you to query it. If you have experience with Selenium, Playwright or similar, then feel free to improve the Website class to use them. In the community-contributions folder, you'll find an example Selenium solution from a student (thank you!)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "eeab24dc-5f90-4570-b542-b0585aca3eb6",
+ "metadata": {},
+ "source": [
+ "# Sharing your code\n",
+ "\n",
+ "I'd love it if you share your code afterwards so I can share it with others! You'll notice that some students have already made changes (including a Selenium implementation) which you will find in the community-contributions folder. If you'd like add your changes to that folder, submit a Pull Request with your new versions in that folder and I'll merge your changes.\n",
+ "\n",
+ "If you're not an expert with git (and I am not!) then GPT has given some nice instructions on how to submit a Pull Request. It's a bit of an involved process, but once you've done it once it's pretty clear. As a pro-tip: it's best if you clear the outputs of your Jupyter notebooks (Edit >> Clean outputs of all cells, and then Save) for clean notebooks.\n",
+ "\n",
+ "Here are good instructions courtesy of an AI friend: \n",
+ "https://chatgpt.com/share/677a9cb5-c64c-8012-99e0-e06e88afd293"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "f4484fcf-8b39-4c3f-9674-37970ed71988",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.13"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/week1/community-contributions/day1_coverletter_tailored_to_CV_and_job_description.ipynb b/week1/community-contributions/day1_coverletter_tailored_to_CV_and_job_description.ipynb
new file mode 100644
index 0000000..9c63b6a
--- /dev/null
+++ b/week1/community-contributions/day1_coverletter_tailored_to_CV_and_job_description.ipynb
@@ -0,0 +1,211 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "d955d75d-4970-48fe-983e-a2a850cecfc5",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import os\n",
+ "import requests\n",
+ "from dotenv import load_dotenv\n",
+ "from bs4 import BeautifulSoup\n",
+ "from IPython.display import Markdown, display\n",
+ "from openai import OpenAI\n",
+ "\n",
+ "import PyPDF2\n",
+ "from selenium import webdriver\n",
+ "from selenium.webdriver.chrome.options import Options\n",
+ "from selenium.webdriver.chrome.service import Service\n",
+ "from webdriver_manager.chrome import ChromeDriverManager\n",
+ "from bs4 import BeautifulSoup\n",
+ "import time"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "6e1e5dd3-f91a-466b-8fd4-2dbf4eedf101",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "load_dotenv(override = True)\n",
+ "api_key = os.getenv(\"OPENAI_API_KEY\")\n",
+ "\n",
+ "if not api_key:\n",
+ " print(\"No API key\")\n",
+ "elif not api_key.startswith(\"sk-proj-\"):\n",
+ " print(\"API key doesn't look correct, check it\")\n",
+ "elif api_key.strip() != api_key:\n",
+ " print(\"It looks like API key has an extra space - check it\")\n",
+ "else:\n",
+ " print(\"API key looks good, moving on!\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "67a6e583-1ef7-4b77-8886-c0e8c619933c",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "openai = OpenAI()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "34a07806-dd68-4a86-8b6e-e1b2aaf0daa1",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# path to the CV\n",
+ "path = \"/Users/yanasklar/Documents/For applying/CV/ะกV_YanaSklyar_c.pdf\"\n",
+ "headers = {\n",
+ " \"User-Agent\": \"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/139.0.0.0 Safari/537.36\"\n",
+ "}\n",
+ "\n",
+ "class Vacancy:\n",
+ " def __init__(self, url, instructions = \"\"):\n",
+ " self.url = url\n",
+ " \n",
+ " # configure Chrome settings\n",
+ " options = Options()\n",
+ " # options.add_argument(\"--headless\") \n",
+ " \"\"\"\n",
+ " Headless mode runs the browser in the background (invisible).\n",
+ " However, some websites (like openai.com) block headless browsers.\n",
+ " So if this line is active, the page may not load correctly and you may not get the full content.\n",
+ " \"\"\"\n",
+ " options.add_argument(\"--disable-gpu\")\n",
+ " options.add_argument(\"--no-sandbox\")\n",
+ " options.add_argument(\"--window-size=1920x1080\")\n",
+ "\n",
+ " # use webdriver-manager to manage ChromeDriver\n",
+ " service = Service(ChromeDriverManager().install())\n",
+ " driver = webdriver.Chrome(service=service, options=options)\n",
+ " driver.get(url)\n",
+ " time.sleep(3) # let the page load\n",
+ "\n",
+ " # take the source of the page\n",
+ " page_source = driver.page_source\n",
+ " driver.quit()\n",
+ "\n",
+ " # analyse with BeautifulSoup\n",
+ " soup = BeautifulSoup(page_source, 'html.parser')\n",
+ "\n",
+ " self.title = soup.title.string if soup.title else \"No title found\"\n",
+ " for irrelevant in soup.body([\"img\", \"script\", \"style\", \"input\"]):\n",
+ " irrelevant.decompose()\n",
+ " self.text = soup.body.get_text(separator='\\n', strip=True)\n",
+ "\n",
+ " # read CV\n",
+ " with open(path, 'rb') as f:\n",
+ " reader = PyPDF2.PdfReader(f)\n",
+ " cv_text = \"\"\n",
+ " for page in reader.pages:\n",
+ " text = page.extract_text()\n",
+ " if text:\n",
+ " cv_text += text + \"\\n\"\n",
+ " self.cv_text = cv_text\n",
+ "\n",
+ " # summarise and print the description of the job\n",
+ " message = f\"\"\"Here is the content of a webpage: {self.text}.\n",
+ " Find job description on that page,\n",
+ " summarise it, include the list requirements and other important details.\n",
+ " \"\"\"\n",
+ " messages = [{\"role\":\"user\", \"content\":message}]\n",
+ " response = openai.chat.completions.create(model='gpt-4o-mini', messages = messages)\n",
+ " print(\"The job description: \", response.choices[0].message.content)\n",
+ "\n",
+ " # create prompts\n",
+ " self.system_prompt = \"\"\"You are a career assistant specializing in writing cover letter.\n",
+ " Your tasks:\n",
+ " 1. Read the candidate's CV (provided as text).\n",
+ " 2. Read the job description (provided from a webpage).\n",
+ " 3. Write a concise and compelling cover letter, that:\n",
+ " - Hightlights the most relevant experience and skills from the CV,\n",
+ " - Aligns directly wit the requirements in the job description,\n",
+ " - Adapts to cultural and professional norms in Israel.\n",
+ " The letter should be no longer than half a page, persuasive and tailored to make the applicant stand out.\n",
+ " \"\"\"\n",
+ "\n",
+ " user_prompt = f\"\"\"\n",
+ " Here is my CV:\n",
+ " {self.cv_text}\n",
+ " \n",
+ " The job vacancy is from the website {self.title}.\n",
+ " Here is the decription of the vacancy:\n",
+ " {self.text}\n",
+ " Please write a cover letter that connects my background to this vacancy.\n",
+ " Make it persuasive and suitable for Israeli job market.\n",
+ " \"\"\"\n",
+ " \n",
+ " if instructions:\n",
+ " user_prompt += f\"Additional instructions: {instructions}\"\n",
+ " self.user_prompt = user_prompt"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "9160b9f5-177b-4477-8e54-3a212f275a22",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def cover_letter(url, instructions = \"\"):\n",
+ " vacancy = Vacancy(url, instructions)\n",
+ " messages = [\n",
+ " {\"role\":\"system\", \"content\":vacancy.system_prompt},\n",
+ " {\"role\":\"user\", \"content\":vacancy.user_prompt}\n",
+ " ]\n",
+ " response = openai.chat.completions.create(model='gpt-4o-mini', messages=messages)\n",
+ " if not response:\n",
+ " print(\"smt went wrong\")\n",
+ " print(response.choices[0].message.content)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "1de4b55c-a8da-445f-9865-c7a8bafdbc3c",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "a = \"https://www.linkedin.com/jobs/view/4285898438/?alternateChannel=search&eBP=CwEAAAGY3R5LOabDLOVTy6xvBcSlWyAkIXQz8IRkSM3rgsqTPtvcEvUSnq980O7oLV2Hh_ldTpc2cBBmRq1IRnLtp7TzEcUvndFEXeCuviA5yo7oFYfW7KoEp4SPNzmf3D9LtnSgk9Iudy3skk6n3hVOtyDpx8Zm0AiTWPvdwCaZ_w5Xu8lAG797NRNDco71ynm99LmCOC9Go7DdDQ2eLewamc4SOsA4xWcXy0GmZVy3kBF1AprK3ylAYR2wrm5-hp4lRpbbfUxXjkEOG6H_GbPpKtN-N8mYnMd9w_cej5qQmTFX86gqSi6HuXFtK0h46TbOS5r-YQksVd1Yb4kYZnDznWXPLbxp04xVJSPzsHoa05wQdOfZ2UUSoMTJmic3n3qfV2u9Bp8n4sLYtINpzKdvm4eADGGkN-nR3O2oPeas9XjGbBwNdjXHAcX_PJoRwlFdQ1gVkYQEF1T7qAfXUJoUt-fv4oLxGnIgV6yJuMgw&refId=9NA7Bvt%2FhCqDkFNRGu1dPA%3D%3D&trackingId=W11hvpcIjHA%2FjU%2FFZ%2B1uAA%3D%3D\"\n",
+ "b = \"The style of the cover letter should informal, as if i talked to a friend about my background\"\n",
+ "cover_letter(a, b)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "0feb3cbe-686a-4a97-9ca3-a0cb32a24c5d",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python (llms)",
+ "language": "python",
+ "name": "llms"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.13"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/week1/community-contributions/day1_selenium_microsoftedgedriver.ipynb b/week1/community-contributions/day1_selenium_microsoftedgedriver.ipynb
new file mode 100644
index 0000000..c2e6b86
--- /dev/null
+++ b/week1/community-contributions/day1_selenium_microsoftedgedriver.ipynb
@@ -0,0 +1,123 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "3ba06289-d17a-4ccd-85f5-2b79956d4e59",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "!pip install selenium"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "935fe7b1-1807-4f75-863d-4c118e425a19",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "pip show selenium"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "eabbbc62-1de1-4883-9b3e-9c90145ea6c5",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from selenium import webdriver\n",
+ "from selenium.webdriver.edge.options import Options as EdgeOptions # Import EdgeOptions\n",
+ "from selenium.webdriver.edge.service import Service as EdgeService # Import EdgeService\n",
+ "from bs4 import BeautifulSoup\n",
+ "import time\n",
+ "import os\n",
+ "\n",
+ "class Website:\n",
+ " def __init__(self, url, driver_path=None, wait_time=3):\n",
+ " self.url = url\n",
+ " self.wait_time = wait_time\n",
+ "\n",
+ " # Headless Edge settings\n",
+ " options = EdgeOptions() # Use EdgeOptions\n",
+ " # options.add_argument(\"--headless\")\n",
+ " options.add_argument(\"--disable-gpu\")\n",
+ " options.add_argument(\"--no-sandbox\")\n",
+ " options.add_argument(\"--window-size=1920x1080\")\n",
+ "\n",
+ " # Driver path\n",
+ " if driver_path:\n",
+ " # For Edge, you might need to specify the path to msedgedriver\n",
+ " # For driver download, https://developer.microsoft.com/en-us/microsoft-edge/tools/webdriver/?form=MA13LH#downloads\n",
+ " service = EdgeService(executable_path=driver_path) # Use EdgeService\n",
+ " else:\n",
+ " # If msedgedriver.exe is in your system's PATH, you can omit executable_path\n",
+ " service = EdgeService()\n",
+ "\n",
+ " # Start browser\n",
+ " # Use webdriver.Edge() for Microsoft Edge\n",
+ " driver = webdriver.Edge(service=service, options=options)\n",
+ " driver.get(url)\n",
+ "\n",
+ " # Wait for the loading page\n",
+ " time.sleep(self.wait_time)\n",
+ "\n",
+ " # Take page source\n",
+ " html = driver.page_source\n",
+ " driver.quit()\n",
+ "\n",
+ " # Analysis with BeautifulSoup \n",
+ " soup = BeautifulSoup(html, 'html.parser')\n",
+ " self.title = soup.title.string if soup.title else \"No title found\"\n",
+ "\n",
+ " # Clean irrelevant tags\n",
+ " for irrelevant in soup.body([\"script\", \"style\", \"img\", \"input\"]):\n",
+ " irrelevant.decompose()\n",
+ "\n",
+ " self.text = soup.body.get_text(separator=\"\\n\", strip=True)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "852c52e2-bd4d-4bb9-94ef-e498c33f1a89",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "site = Website(\"https://openai.com\", driver_path=\"/Users/klee/Documents/edgedriver_mac64_m1/msedgedriver\")\n",
+ "print(\"Title:\", site.title)\n",
+ "print(\"\\nFirst 500 character:\\n\", site.text[:500])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "7620c685-c35c-4d6b-aaf1-a3da98f19ca7",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.13"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/week1/community-contributions/day2_exercise_using_input.ipynb b/week1/community-contributions/day2_exercise_using_input.ipynb
new file mode 100644
index 0000000..f616d96
--- /dev/null
+++ b/week1/community-contributions/day2_exercise_using_input.ipynb
@@ -0,0 +1,319 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "d15d8294-3328-4e07-ad16-8a03e9bbfdb9",
+ "metadata": {},
+ "source": [
+ "# Welcome to your first assignment!\n",
+ "\n",
+ "Instructions are below. Please give this a try, and look in the solutions folder if you get stuck (or feel free to ask me!)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "4e2a9393-7767-488e-a8bf-27c12dca35bd",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# imports\n",
+ "\n",
+ "import requests\n",
+ "from bs4 import BeautifulSoup\n",
+ "from IPython.display import Markdown, display"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "29ddd15d-a3c5-4f4e-a678-873f56162724",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Constants\n",
+ "\n",
+ "OLLAMA_API = \"http://localhost:11434/api/chat\"\n",
+ "HEADERS = {\"Content-Type\": \"application/json\"}\n",
+ "MODEL = \"llama3.2\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "dac0a679-599c-441f-9bf2-ddc73d35b940",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Create a messages list using the same format that we used for OpenAI\n",
+ "\n",
+ "messages = [\n",
+ " {\"role\": \"user\", \"content\": \"Describe some of the business applications of Generative AI\"}\n",
+ "]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "7bb9c624-14f0-4945-a719-8ddb64f66f47",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "payload = {\n",
+ " \"model\": MODEL,\n",
+ " \"messages\": messages,\n",
+ " \"stream\": False\n",
+ " }"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "7745b9c4-57dc-4867-9180-61fa5db55eb8",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import ollama\n",
+ "\n",
+ "response = ollama.chat(model=MODEL, messages=messages)\n",
+ "print(response['message']['content'])"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "a4704e10-f5fb-4c15-a935-f046c06fb13d",
+ "metadata": {},
+ "source": [
+ "## Alternative approach - using OpenAI python library to connect to Ollama"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "23057e00-b6fc-4678-93a9-6b31cb704bff",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# There's actually an alternative approach that some people might prefer\n",
+ "# You can use the OpenAI client python library to call Ollama:\n",
+ "\n",
+ "from openai import OpenAI\n",
+ "ollama_via_openai = OpenAI(base_url='http://localhost:11434/v1', api_key='ollama')\n",
+ "\n",
+ "response = ollama_via_openai.chat.completions.create(\n",
+ " model=MODEL,\n",
+ " messages=messages\n",
+ ")\n",
+ "\n",
+ "print(response.choices[0].message.content)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "1622d9bb-5c68-4d4e-9ca4-b492c751f898",
+ "metadata": {},
+ "source": [
+ "# NOW the exercise for you\n",
+ "\n",
+ "Take the code from day1 and incorporate it here, to build a website summarizer that uses Llama 3.2 running locally instead of OpenAI; use either of the above approaches."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "id": "0c1f84c4-4cc0-4085-8ea5-871a8ca46a47",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# imports\n",
+ "\n",
+ "import ollama"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "id": "890852ab-2cd4-41dc-b168-6bd1360b967a",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "MODEL = \"llama3.2\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "id": "6de38216-6d1c-48c4-877b-86d403f4e0f8",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# A class to represent a Webpage\n",
+ "\n",
+ "# Some websites need you to use proper headers when fetching them:\n",
+ "headers = {\n",
+ " \"User-Agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36\"\n",
+ "}\n",
+ "\n",
+ "class Website:\n",
+ "\n",
+ " def __init__(self, url):\n",
+ " \"\"\"\n",
+ " Create this Website object from the given url using the BeautifulSoup library\n",
+ " \"\"\"\n",
+ " self.url = url\n",
+ " response = requests.get(url, headers=headers)\n",
+ " soup = BeautifulSoup(response.content, 'html.parser')\n",
+ " self.title = soup.title.string if soup.title else \"No title found\"\n",
+ " for irrelevant in soup.body([\"script\", \"style\", \"img\", \"input\"]):\n",
+ " irrelevant.decompose()\n",
+ " self.text = soup.body.get_text(separator=\"\\n\", strip=True)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "id": "9d398f9a-c66e-42b5-91b4-5417944b8408",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def user_prompt_generator(website) -> str:\n",
+ " user_prompt = f\"You will act as a website summarizer with knowledge of Web Content Accessibility Guidelines. You will look into the web: {website.title} and \"\n",
+ " user_prompt += \"break down the relevant information about it in this categories: What is the website about, \\\n",
+ " to whom the website belongs and what practises should improve to have a better user experience. \\n\\n\"\n",
+ " user_prompt += website.text\n",
+ "\n",
+ " return user_prompt"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 23,
+ "id": "156d7c67-b714-4156-9f69-faf0c50aaf13",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def messages_generator(user_prompt : str) -> list[dict[str, str]]:\n",
+ " messages = [{\"role\" : \"user\", \"content\" : user_prompt}]\n",
+ "\n",
+ " return messages"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 21,
+ "id": "f07c4143-6cc5-4d28-846c-a373564e9264",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def user_request_reader() -> str:\n",
+ " while True:\n",
+ " website_url = input(\"Define what website you want to summarize by giving the url: \")\n",
+ " if website_url.lower().startswith(\"http\"):\n",
+ " return website_url\n",
+ " print(\"URL not valid. Please provide a full url.\\n\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 19,
+ "id": "94933255-2ca8-40b5-8f74-865d3e781058",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def summarizer_bot():\n",
+ " website_url = user_request_reader()\n",
+ " website = Website(website_url)\n",
+ " \n",
+ " user_prompt = user_prompt_generator(website)\n",
+ " messages = messages_generator(user_prompt)\n",
+ "\n",
+ " response = ollama.chat(model=MODEL, messages=messages)\n",
+ " print(response['message']['content'])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 24,
+ "id": "2d81faa4-25b3-4d5d-8f36-93772e449b5c",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdin",
+ "output_type": "stream",
+ "text": [
+ "Define what website you want to summarize by giving the url: test.com\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "URL not valid. Please provide a full url.\n",
+ "\n"
+ ]
+ },
+ {
+ "name": "stdin",
+ "output_type": "stream",
+ "text": [
+ "Define what website you want to summarize by giving the url: https://edwarddonner.com\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "**Summary:**\n",
+ "\n",
+ "The website \"Home - Edward Donner\" belongs to Edward Donner, a co-founder and CTO of Nebula.io, an AI startup. The website is about Edward's interests in writing code, experimenting with Large Language Models (LLMs), and DJing, as well as his work in applying AI to help people discover their potential.\n",
+ "\n",
+ "**Categories:**\n",
+ "\n",
+ "### What is the website about?\n",
+ "\n",
+ "The website is primarily about Edward Donner's personal brand, showcasing his expertise in AI and LLMs. It includes information about his work at Nebula.io, which applies AI to talent management. The website also features a \"Connect Four\" arena where LLMs compete against each other, as well as sections for learning more about LLMs and staying up-to-date with Edward's courses and publications.\n",
+ "\n",
+ "### To whom does the website belong?\n",
+ "\n",
+ "The website belongs to Edward Donner, a co-founder and CTO of Nebula.io. It appears to be a personal website or blog, showcasing his expertise and interests in AI and LLMs.\n",
+ "\n",
+ "### Practices to improve for better user experience:\n",
+ "\n",
+ "1. **Clearer navigation**: The website's menu is simple but not intuitive. Adding clear categories or sections would help users quickly find the information they're looking for.\n",
+ "2. **More detailed about section**: The \"About\" section provides a brief overview of Edward's work and interests, but it could be more detailed and comprehensive.\n",
+ "3. **Improved accessibility**: While the website is likely following general web accessibility guidelines, there are no clear indications of this on the page. Adding alt text to images, providing a clear font size and color scheme, and ensuring sufficient contrast between background and foreground would improve the user experience for people with disabilities.\n",
+ "4. **Better calls-to-action (CTAs)**: The website could benefit from more prominent CTAs, guiding users towards specific actions such as signing up for courses or following Edward on social media.\n",
+ "5. **SEO optimization**: The website's content and meta tags appear to be optimized for search engines, but a more thorough SEO analysis would help identify areas for improvement.\n",
+ "\n",
+ "Overall, the website provides a clear overview of Edward Donner's interests and expertise in AI and LLMs, but could benefit from some tweaks to improve accessibility, navigation, and CTAs.\n"
+ ]
+ }
+ ],
+ "source": [
+ "# The call\n",
+ "summarizer_bot()"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.13"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/week1/community-contributions/day5-GitaScripting.ipynb b/week1/community-contributions/day5-GitaScripting.ipynb
new file mode 100644
index 0000000..964b183
--- /dev/null
+++ b/week1/community-contributions/day5-GitaScripting.ipynb
@@ -0,0 +1,338 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "7759922b-12c9-44e0-8ac3-5f2a02b321d7",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import fitz # PyMuPDF\n",
+ "import os\n",
+ "import requests\n",
+ "import json\n",
+ "from typing import List\n",
+ "from dotenv import load_dotenv\n",
+ "from IPython.display import Markdown, display, update_display\n",
+ "from openai import OpenAI"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "a266273a-05e3-451e-a318-428726cfa39c",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Initialize and constants\n",
+ "\n",
+ "load_dotenv(override=True)\n",
+ "api_key = os.getenv('OPENAI_API_KEY')\n",
+ "\n",
+ "if api_key and api_key.startswith('sk-proj-') and len(api_key)>10:\n",
+ " print(\"API key looks good so far\")\n",
+ "else:\n",
+ " print(\"There might be a problem with your API key? Please visit the troubleshooting notebook!\")\n",
+ " \n",
+ "MODEL = 'gpt-4o-mini'\n",
+ "openai = OpenAI()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "45566572-dd66-48dc-ab7b-6adbe26eacba",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "exceptions = []"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "effc0e7b-d668-48b3-86d0-dbb5d8fe3d55",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Building system prompt\n",
+ "def get_verse_system_prompt():\n",
+ " system_prompt = \"You are a spiritual student who classifies the versus of the BhagavadGita according to a given theme.\\n\"\n",
+ " system_prompt += \"Given a theme, you should pick a verse from any chapter and give it's location in the form of index chapter.verse_number (6.2)\\n\"\n",
+ " system_prompt += \"You should respond in JSON as in this example:\\n\"\n",
+ " system_prompt += \"\"\"\n",
+ " {\"title\": \"Chapter 3, Verse 21 (3.21)\", \"verse\": \"เคเคฐเฅเคฎเคฃเคพ เคนเฅเคฏเคชเคฟ เคธเคเคธเคฟเคฆเฅเคงเคฟเคฎเฅโ\n",
+ " เคเคธเฅเคฅเคฟเคคเคพ เคเคจเคเคพเคฆเคฏ:เฅค\n",
+ " เคฒเฅเคเคธเคเคเฅเคฐเคนเคฎเฅเคตเคพเคชเคฟ\n",
+ " เคธเคฎเฅเคชเคถเฅเคฏเคจเฅเคเคฐเฅเคคเฅเคฎเคฐเฅเคนเคธเคฟเฅฅ\"}\n",
+ " \"\"\"\n",
+ " return system_prompt"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "bbfb1035-b183-4481-9b49-3cc1b12b42e8",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "print(get_verse_system_prompt())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "6acdcd6c-1fc5-4c71-81d0-665e25808e46",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Define user prompt\n",
+ "def get_verse_user_prompt(theme):\n",
+ " user_prompt = f'''\n",
+ " Here is the theme : {theme},\n",
+ " Please find a verse from BhagavadGita excluding {exceptions} for a given theme {theme}\n",
+ " '''#excluding those results which are already used\n",
+ " \n",
+ " user_prompt += \"If the verse is not in the exceptions for a given theme and used for a different theme, you are free to suggest it for a different theme.\"\n",
+ " return user_prompt"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "72f5c755-ec2d-4545-9a31-0f6b2e5ed4da",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "print(get_verse_user_prompt('motivation'))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "304d432c-7216-4a90-a5d8-db36b193657d",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#Call openAI to return versus\n",
+ "def get_verses(theme):\n",
+ " response = openai.chat.completions.create(\n",
+ " model=MODEL,\n",
+ " messages=[\n",
+ " {\"role\": \"system\", \"content\": get_verse_system_prompt()},\n",
+ " {\"role\": \"user\", \"content\": get_verse_user_prompt(theme)}\n",
+ " ],\n",
+ " response_format={\"type\": \"json_object\"}\n",
+ " )\n",
+ " result = response.choices[0].message.content\n",
+ " result = json.loads(result)\n",
+ "\n",
+ " #Remember those results which are suggested now\n",
+ " combination = (theme, result['title'])\n",
+ " exceptions.append(combination)\n",
+ " return result"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "b90eeb35-e10e-48ee-ade6-e0594da8c51b",
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [],
+ "source": [
+ "print(get_verses('motivation'))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "b5b8925e-52e4-4cb7-9205-51c65ed88fb8",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# So far we have fetched the new verses relevant to a given theme \n",
+ "# Lets generate a script for producting youtube video"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "8ff0862b-0310-4174-ad12-64047932dc9e",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#def function for system prompt\n",
+ "def get_script_system_prompt(tone, theme, format):\n",
+ " sys_prompt = 'You are a script writer for a youtube spiritual channel\\n'\n",
+ " sys_prompt += 'You are given a verse like below: \\n'\n",
+ " sys_prompt += str(get_verses(theme))\n",
+ " sys_prompt += '\\n'\n",
+ " sys_prompt += f'Give me an engaging script in a {tone} tone for a {format} format video for audience like youth seeking purpose, spiritual seekers, indians abroad, scholars and curious minds.'\n",
+ "\n",
+ " return sys_prompt"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "47476516-cd2f-4b16-b378-a70617bbe284",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "print(get_script_system_prompt('Motivating','motivation','long'))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "e305525b-8dde-4e93-927a-e24531827498",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# def function for user prompt\n",
+ "def get_script_user_prompt(format, theme):\n",
+ " user_prompt = f'Given the verse, help me generate a detailed script suitable for {format} format video.\\n'\n",
+ " user_prompt += f'Please give me the complete verse, its meaning, a relevant story having a dilemma which the verse solves and the interpretation of the verse with respect to {theme}.\\n'\n",
+ " user_prompt += 'Let the script give cues about video editing, host actions.'\n",
+ " user_prompt += 'given the below example, please follow the format:\\n'\n",
+ " user_prompt += \"\"\"\n",
+ " [Opening Scene - Soft Instrumental Music Playing]\n",
+ "\n",
+ " [Cut to Host in a serene setting, perhaps by a river or in a lush green garden.]\n",
+ "\n",
+ " Host: (Smiling at the camera) \"Namaste, dear viewers! Welcome back to our channel, where we explore the depths of spirituality and seek to ignite the flame of wisdom within you. Today, we delve into a profound verse from the Bhagavad Gita that speaks to the very essence of life and identity.\"\n",
+ "\n",
+ " [Text On Screen: Chapter 2, Verse 13 (2.13)]\n",
+ "\n",
+ " Host: (With a sense of reverence) \"Letโs first take a moment to recite this verse together. It goes like this:\n",
+ "\n",
+ " เคฆเฅเคนเคฟเคจเฅเคฝเคธเฅเคฎเคฟเคจเฅเคจเฅเคฏเคฅเคพ เคฆเฅเคนเฅ เคเฅเคฎเคพเคฐเค เคฏเฅเคตเคจเค เคเคฐเคพเฅค\n",
+ " เคคเคฅเคพเคฆเฅเคนเคพเคจเฅเคคเคฐเคชเฅเคฐเคพเคชเฅเคคเคฟเคฐเฅเคงเฅเคฐเคธเฅเคคเคคเฅเคฐ เคจ เคฎเฅเคนเฅเคฏเคคเคฟเฅฅ\n",
+ "\n",
+ " Now, letโs understand the essence of this verse.\"\n",
+ "\n",
+ " [Cut to Graphic: Verse Translation with Key Concepts Highlighted]\n",
+ "\n",
+ " Host Voiceover: (Calm and engaging tone) \"The meaning of this beautiful verse translates to: 'Just as the body undergoes changes from childhood to old age, similarly, the soul transitions from one body to another. The wise, who understand this, are never bewildered by these changes.'\n",
+ "\n",
+ " [Cut back to Host]\n",
+ "\n",
+ " Host: (Nodding, creating a connection)\n",
+ " \"So, why is this verse so important, especially for us as young seekers of purpose? It highlights a profound truthโthat our identities are not confined by our physical forms or the stages of life we experience. Instead, we are eternal beings who are constantly evolving.\"\n",
+ "\n",
+ " [Scene Transition - Soft Music Playing]\n",
+ "\n",
+ " [Cut to a Story Animation - A young man named Arjun in a busy city]\n",
+ "\n",
+ " Host (Voiceover): \"Let me share a relatable story. Meet Arjun. Like many of us, he was once full of dreams and aspirations. He excelling in school, pursuing a career in engineering. But as the years passed, he faced a crossroads. As the pressure mounted, he began to question his identity.\n",
+ "\n",
+ " (Visuals show Arjun overwhelmed by societal expectations, with people pushing him in different directions.)\n",
+ "\n",
+ " He felt distinct phases of life pulling at him: childhood dreams, youthful ambitions, and the looming responsibilities of adulthood. The changing seasons of his life left him confused and wondering if he had lost his true self.\"\n",
+ "\n",
+ " [Cut back to Host, empathetic tone]\n",
+ "\n",
+ " Host: \"Have you ever felt like Arjun? Itโs a dilemma we all face, especially in today's fast-paced world where expectations can cloud our true identity. But just like our verse suggests, we should recognize that these changes donโt define us. They are simply part of the journey.\"\n",
+ "\n",
+ " [Scene Transition - Calm Music Playing while Host meditates]\n",
+ "\n",
+ " Host: (Speaking gently) \"Letโs take a moment to reflect. When we are sad, does that sadness define us? Or when we achieve success, do we become defined solely by that success? The answer isn't as straightforward as it seems. Hereโs the catch: our essence is beyond these transient states. Like the body, our identities are fluid.\"\n",
+ "\n",
+ " [Cut to Visuals of Nature - flowing rivers, trees shedding leaves, etc.]\n",
+ "\n",
+ " Host Voiceover: \"Imagine the endless cycle of natureโthe changing seasons, the growth, the decay, and rebirth. Just like the leaves that drop to make way for new growth, our experiences contribute to our spiritual evolution.\"\n",
+ "\n",
+ " [Cut back to Host - Inviting and Warm Tone]\n",
+ "\n",
+ " Host: \"Just as the wise who understand the transformation of the soul remain unshaken, we, too, can cultivate that wisdom to rise above the chaos of change. Recognize your true essenceโbeyond the body, the roles, the titles. Understand that your spirit is eternal.\"\n",
+ "\n",
+ " [Scene Transition - Soft Inspirational Music Begins]\n",
+ "\n",
+ " Host: (Passionately) \"So how can we embody this truth in our daily lives? Hereโs a small exercise: Each day, take a few moments to meditate on who you really are. Write down what aspects of your identity are tied to transient things. Challenge yourselfโwhat happens when you peel these layers away?\"\n",
+ "\n",
+ " [Cut to host with a pad, writing ideas]\n",
+ "\n",
+ " [Scene Transition - Editing Cues - Show engaging graphics of identity, layers of a person, etc.]\n",
+ "\n",
+ " Host Voiceover: \"Each effort towards understanding and embracing our true self draws us closer to the realization that we are eternal souls, having a human experience. This is the wisdom that can empower you to stand tall against the adversities of life.\"\n",
+ "\n",
+ " [Cut back to Host]\n",
+ "\n",
+ " Host: (Concluding) \"Thank you for joining me today in this exploration of Chapter 2, Verse 13 of the Bhagavad Gita. Remember, when you feel lost in the complexities of life, return to this teachings and remind yourself that you are not just a body; you are an eternal being on a magnificent journey.\n",
+ "\n",
+ " [Closing Scene - Uplifting Music Playing]\n",
+ "\n",
+ " Host: \"Donโt forget to like, share, and subscribe if you found resonance in this message. And share your thoughts in the comments below. What did you find most challenging in your own journey of self-identity? Letโs connect and support each other in our spiritual quests. Until next time, stay enlightened, stay inspired!\"\n",
+ "\n",
+ " [End Screen with Subscribe Button and Previous Video Suggestions]\n",
+ "\n",
+ " [End of Script]\n",
+ " \"\"\"\n",
+ " \n",
+ " return user_prompt"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "c4b29cb9-d8d1-413a-8152-4250e2430a42",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "print(get_script_user_prompt('long','motivation'))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "1dfa60ce-9e88-4f7d-8e60-ac37a0aafc15",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def create_script(tone, theme, format):\n",
+ " response = openai.chat.completions.create(\n",
+ " model=MODEL,\n",
+ " messages=[\n",
+ " {\"role\": \"system\", \"content\": get_script_system_prompt(tone,theme,format)},\n",
+ " {\"role\": \"user\", \"content\": get_script_user_prompt(format,theme)}\n",
+ " ],\n",
+ " )\n",
+ " result = response.choices[0].message.content\n",
+ " display(Markdown(result))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "ec86c436-42ae-4313-b12f-4fad42ab2227",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "create_script('motivating','self-identity','long')"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.13"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/week1/community-contributions/training-summary-translation-length/jacquieAM/website-summary.ipynb b/week1/community-contributions/training-summary-translation-length/jacquieAM/website-summary.ipynb
new file mode 100644
index 0000000..9c31463
--- /dev/null
+++ b/week1/community-contributions/training-summary-translation-length/jacquieAM/website-summary.ipynb
@@ -0,0 +1,329 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "9ab446e4-219c-4589-aa8f-9386adcf5c60",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "## Project Overview\n",
+ "This project combines web scraping with OpenAIโs GPT models to summarize online training content. It extracts material from Microsoftโs **Quantum Computing Fundamentals** learning path, cleans it, and generates concise summaries per lesson as well as an overall course summary. \n",
+ "\n",
+ "## Key Features\n",
+ "- Fetches and parses webpages using **requests** and **BeautifulSoup** \n",
+ "- Produces summaries in multiple languages (e.g., English, Spanish, or any language) and at varying levels of detail (short, medium, detailed) \n",
+ "- Summarizes individual lessons on demand or processes entire learning paths \n",
+ "- Presents results as clean, structured **Markdown** directly in the notebook \n",
+ "\n",
+ "## Tech Stack\n",
+ "- **Model**: GPT-4o-mini \n",
+ "- **Language**: Python \n",
+ "- **Libraries**: BeautifulSoup, OpenAI \n",
+ "\n",
+ "## Purpose\n",
+ "This project demonstrates how AI can streamline the understanding of technical documentation and online courses by generating multilingual, customizable summaries. \n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "4e2a9393-7767-488e-a8bf-27c12dca35bd",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# imports\n",
+ "\n",
+ "import os\n",
+ "import requests\n",
+ "from dotenv import load_dotenv\n",
+ "from bs4 import BeautifulSoup\n",
+ "from IPython.display import Markdown, display\n",
+ "from openai import OpenAI\n",
+ "\n",
+ "# If you get an error running this cell, then please head over to the troubleshooting notebook!"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "7b87cadb-d513-4303-baee-a37b6f938e4d",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Load environment variables from .env file (not included)\n",
+ "\n",
+ "load_dotenv(override=True)\n",
+ "api_key = os.getenv('OPENAI_API_KEY')\n",
+ "\n",
+ "# Check the key\n",
+ "\n",
+ "if not api_key:\n",
+ " print(\"No API key was found\")\n",
+ "elif not api_key.startswith(\"sk-proj-\"):\n",
+ " print(\"An API key was found, but it doesn't start sk-proj-; please check you're using the right key\")\n",
+ "elif api_key.strip() != api_key:\n",
+ " print(\"An API key was found, but it looks like it might have space or tab characters at the start or end - please remove them\")\n",
+ "else:\n",
+ " print(\"API key found and looks good so far!\")\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "019974d9-f3ad-4a8a-b5f9-0a3719aea2d3",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "openai = OpenAI()\n",
+ "\n",
+ "# If this doesn't work, try Kernel menu >> Restart Kernel and Clear Outputs Of All Cells, then run the cells from the top of this notebook down.\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "c5e793b2-6775-426a-a139-4848291d0463",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# A class to represent a Webpage\n",
+ "\n",
+ "# Some websites need you to use proper headers when fetching them:\n",
+ "headers = {\n",
+ " \"User-Agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36\"\n",
+ "}\n",
+ "\n",
+ "class Website:\n",
+ "\n",
+ " def __init__(self, url):\n",
+ " \"\"\"\n",
+ " Create this Website object from the given url using the BeautifulSoup library\n",
+ " \"\"\"\n",
+ " self.url = url\n",
+ " response = requests.get(url, headers=headers)\n",
+ " soup = BeautifulSoup(response.content, 'html.parser')\n",
+ " self.title = soup.title.string if soup.title else \"No title found\"\n",
+ " for irrelevant in soup.body([\"script\", \"style\", \"img\", \"input\"]):\n",
+ " irrelevant.decompose()\n",
+ " self.text = soup.body.get_text(separator=\"\\n\", strip=True)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "2ef960cf-6dc2-4cda-afb3-b38be12f4c97",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "\n",
+ "\n",
+ "training_website = Website(\"https://learn.microsoft.com/en-us/training/paths/quantum-computing-fundamentals/\")\n",
+ "print(training_website.title)\n",
+ "print(training_website.text)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "abdb8417-c5dc-44bc-9bee-2e059d162699",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Create a system prompt function that can use different language and length \n",
+ "\n",
+ "def build_system_prompt(language=\"Spanish\", length=\"short\"):\n",
+ " return f\"\"\"You are an assistant that analyzes the contents of a website and provides a {length} summary, ignoring text that might be navigation related.\n",
+ " Respond in 20 words or less markdown, and respond in {language}.\n",
+ " \"\"\"\n",
+ " \n",
+ " \n",
+ " "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "987c95a6-6618-4d22-a2c3-3038a9d3f154",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Create a function that writes a User Prompt that asks for summaries of websites:\n",
+ "\n",
+ "def user_prompt_for(website):\n",
+ " user_prompt = f\"You are looking at a website titled {website.title}\"\n",
+ " user_prompt += \"\\nThe contents of this website is as follows; \\\n",
+ "please provide a short summary in {language} of this website in markdown. \\\n",
+ "If it includes news or announcements, then summarize these too.\\n\\n\"\n",
+ " user_prompt += website.text\n",
+ " return user_prompt"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "8a846c89-81d8-4f48-9d62-7744d76694e2",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "print(user_prompt_for(training_website))\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "26448ec4-5c00-4204-baec-7df91d11ff2e",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "print(user_prompt_for(training_website))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "d06e8d78-ce4c-4b05-aa8e-17050c82bb47",
+ "metadata": {},
+ "source": [
+ "## And now let's build useful messages for GPT-4o-mini, using a function"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "0134dfa4-8299-48b5-b444-f2a8c3403c88",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "\n",
+ "def messages_for(website, language=\"Spanish\", length=\"short\"):\n",
+ " return [\n",
+ " {\"role\": \"system\", \"content\": build_system_prompt(language, length)},\n",
+ " {\"role\": \"user\", \"content\": user_prompt_for(website)}\n",
+ " ]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "16f49d46-bf55-4c3e-928f-68fc0bf715b0",
+ "metadata": {},
+ "source": [
+ "## Time to bring it together - the API for OpenAI is very simple!"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "425214b8-c5c5-4d7a-8b79-f9e151c9d54f",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "905b9919-aba7-45b5-ae65-81b3d1d78e34",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#call the OpenAI API. \n",
+ "\n",
+ "def summarize(url, language=\"Spanish\", length=\"short\"):\n",
+ " website = Website(url)\n",
+ " response = openai.chat.completions.create(\n",
+ " model=\"gpt-4o-mini\",\n",
+ " messages=messages_for(website, language, length)\n",
+ " )\n",
+ " return response.choices[0].message.content\n",
+ " "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "1c437357-d004-49f5-95c3-fce38aefcb5c",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#Summarize all the lessons in microsoft quantum computer training, having the option to summarize by lesson, or the training as a whole\n",
+ "\n",
+ "def summarize_training(path_url, language=\"Spanish\", length=\"short\"):\n",
+ " links = get_links_from_path(path_url)\n",
+ " print(f\"Found {len(links)} lessons\")\n",
+ "\n",
+ " all_summaries = []\n",
+ "\n",
+ " for link in links:\n",
+ " print(f\"Summarizing {link}...\")\n",
+ " summary = summarize(link, language, length)\n",
+ " all_summaries.append(f\"### {link}\\n{summary}\\n\")\n",
+ "\n",
+ " combined_prompt = \"Here are summaries of each lesson:\\n\\n\" + \"\\n\".join(all_summaries)\n",
+ " response = openai.chat.completions.create(\n",
+ " model=\"gpt-4o-mini\",\n",
+ " messages=[\n",
+ " {\"role\": \"system\", \"content\": build_system_prompt(language, length)},\n",
+ " {\"role\": \"user\", \"content\": \"Please summarize the entire training path based on these lesson summaries:\\n\\n\" + combined_prompt}\n",
+ " ]\n",
+ " )\n",
+ "\n",
+ " return \"\\n\".join(all_summaries) + \"\\n\\n## General Course Summary\\n\" + response.choices[0].message.content\n",
+ " "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "05e38d41-dfa4-4b20-9c96-c46ea75d9fb5",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "summarize(\"https://learn.microsoft.com/en-us/training/paths/quantum-computing-fundamentals/\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "3d926d59-450e-4609-92ba-2d6f244f1342",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# A function to display this nicely in the Jupyter output, using markdown\n",
+ "\n",
+ "def display_summary(url):\n",
+ " summary = summarize(url)\n",
+ " display(Markdown(summary))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "3018853a-445f-41ff-9560-d925d1774b2f",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "display_summary(\"https://learn.microsoft.com/en-us/training/paths/quantum-computing-fundamentals/\")"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.13"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/week1/community-contributions/website-summarization-using-ollama.ipynb b/week1/community-contributions/website-summarization-using-ollama.ipynb
new file mode 100644
index 0000000..75edddb
--- /dev/null
+++ b/week1/community-contributions/website-summarization-using-ollama.ipynb
@@ -0,0 +1,142 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "a68b1042-558a-4051-85e2-9ffd7a31a871",
+ "metadata": {},
+ "source": [
+ "# Website Summarization Using llama\n",
+ "### Week 1 Day 2 Exercise"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "176fcb2f-9ac7-460b-9fad-415e89c4920e",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import os\n",
+ "import requests\n",
+ "from dotenv import load_dotenv\n",
+ "from bs4 import BeautifulSoup\n",
+ "from IPython.display import Markdown, display\n",
+ "from openai import OpenAI"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "b9c63761-c904-491b-92c7-e41eb319c3e4",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Constants\n",
+ "\n",
+ "# OLLAMA_API = \"http://localhost:11434/api/chat\"\n",
+ "# HEADERS = {\"Content-Type\": \"application/json\"}\n",
+ "MODEL = \"llama3.2\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "afe29712-751c-4322-a4c6-aed01e6acf26",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "headers = {\n",
+ " \"User-Agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36\"\n",
+ "}\n",
+ "\n",
+ "class Website:\n",
+ "\n",
+ " def __init__(self, url):\n",
+ " \"\"\"\n",
+ " Create this Website object from the given url using the BeautifulSoup library\n",
+ " \"\"\"\n",
+ " self.url = url\n",
+ " response = requests.get(url, headers=headers)\n",
+ " soup = BeautifulSoup(response.content, 'html.parser')\n",
+ " self.title = soup.title.string if soup.title else \"No title found\"\n",
+ " for irrelevant in soup.body([\"script\", \"style\", \"img\", \"input\"]):\n",
+ " irrelevant.decompose()\n",
+ " self.text = soup.body.get_text(separator=\"\\n\", strip=True)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "id": "be3eeb3f-aec5-4ef8-9427-3b80b2dce919",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "system_prompt = \"You are an assistant that analyzes the contents of a website \\\n",
+ "and provides a short summary, ignoring text that might be navigation related. \\\n",
+ "Respond in markdown.\"\n",
+ "\n",
+ "\n",
+ "def user_prompt_for(website):\n",
+ " user_prompt = f\"You are looking at a website titled {website.title}\"\n",
+ " user_prompt += \"\\nThe contents of this website is as follows; \\\n",
+ "please provide a short summary of this website in markdown. \\\n",
+ "If it includes news or announcements, then summarize these too.\\n\\n\"\n",
+ " user_prompt += website.text\n",
+ " return user_prompt\n",
+ " \n",
+ "\n",
+ "def messages_for(website):\n",
+ " return [\n",
+ " {\"role\": \"system\", \"content\": system_prompt},\n",
+ " {\"role\": \"user\", \"content\": user_prompt_for(website)}\n",
+ " ]\n",
+ "\n",
+ "ollama_via_openai = OpenAI(base_url='http://localhost:11434/v1', api_key='ollama')\n",
+ "\n",
+ "def summarize(url):\n",
+ " website = Website(url)\n",
+ " response = ollama_via_openai.chat.completions.create(\n",
+ " model = MODEL,\n",
+ " messages = messages_for(website)\n",
+ " )\n",
+ " return response.choices[0].message.content\n",
+ "\n",
+ "\n",
+ "def display_summary(url):\n",
+ " summary = summarize(url)\n",
+ " display(Markdown(summary))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "id": "a78b587d-3a75-45a8-9ac5-f78dcddfa822",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "display_summary(\"https://cnn.com\")"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.13"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/week1/community-contributions/week-1_exercise.ipynb b/week1/community-contributions/week-1_exercise.ipynb
new file mode 100644
index 0000000..5072bc1
--- /dev/null
+++ b/week1/community-contributions/week-1_exercise.ipynb
@@ -0,0 +1,337 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "64d2e4a0",
+ "metadata": {},
+ "source": [
+ "# End of Week 1 Exercise\n",
+ "\n",
+ "To demonstrate your familiarity with OpenAI API, and also Ollama, build a tool that takes a technical question,\n",
+ "and responds with an explanation. This is a tool that you will be able to use yourself during the course!"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "id": "e62b915e",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from openai import OpenAI\n",
+ "import ollama\n",
+ "from dotenv import load_dotenv\n",
+ "import os\n",
+ "from IPython.display import display, update_display, Markdown"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "id": "8bdfc47a",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "MODEL_GPT = 'gpt-4o-mini'\n",
+ "MODEL_LLAMA = 'llama3'\n",
+ "load_dotenv()\n",
+ "\n",
+ "api_key = os.getenv('OPENAI_API_KEY')\n",
+ "\n",
+ "openai=OpenAI()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "id": "57983d03",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def create_messages(prompt=\"Describe some of the business applications of Generative AI\"):\n",
+ " \"\"\"Create properly formatted messages for API calls\"\"\"\n",
+ " messages = [\n",
+ " {\n",
+ " \"role\": \"system\",\n",
+ " \"content\": \"You are a helpful technical assistant that provides clear, detailed explanations for technical questions.\"\n",
+ " },\n",
+ " {\"role\": \"user\", \"content\": prompt}\n",
+ " ]\n",
+ " return messages"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "id": "a6bcb94d",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def answer_with_openai(prompt=\"Describe some of the business applications of Generative AI\"):\n",
+ " \"\"\"Get answer using OpenAI API and print in stream\"\"\"\n",
+ " try:\n",
+ " messages = create_messages(prompt)\n",
+ " stream = openai.chat.completions.create(\n",
+ " model=MODEL_GPT,\n",
+ " messages=messages,\n",
+ " temperature=0.7,\n",
+ " stream=True\n",
+ " )\n",
+ " answer = \"\"\n",
+ " display_handle = display(Markdown(\"\"), display_id=True)\n",
+ " for chunk in stream:\n",
+ " if chunk.choices[0].delta.content:\n",
+ " answer += chunk.choices[0].delta.content\n",
+ " # Clean up markdown formatting for display\n",
+ " clean_answer = answer.replace(\"```\", \"\").replace(\"markdown\", \"\")\n",
+ " update_display(Markdown(clean_answer), display_id=display_handle.display_id)\n",
+ " return answer\n",
+ " except Exception as e:\n",
+ " return f\"Error with OpenAI: {str(e)}\"\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "id": "e96159ab",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def answer_with_ollama(prompt=\"Describe some of the business applications of Generative AI\"):\n",
+ " \"\"\"Get answer using Ollama API and print in stream\"\"\"\n",
+ " try:\n",
+ " messages = create_messages(prompt)\n",
+ " stream = ollama.chat(\n",
+ " model=MODEL_LLAMA,\n",
+ " messages=messages,\n",
+ " stream=True\n",
+ " )\n",
+ " answer = \"\"\n",
+ " display_handle = display(Markdown(\"\"), display_id=True)\n",
+ " for chunk in stream:\n",
+ " if chunk['message']['content']:\n",
+ " answer += chunk['message']['content']\n",
+ " # Clean up markdown formatting for display\n",
+ " clean_answer = answer.replace(\"```\", \"\").replace(\"markdown\", \"\")\n",
+ " update_display(Markdown(clean_answer), display_id=display_handle.display_id)\n",
+ " return answer\n",
+ " except Exception as e:\n",
+ " return f\"Error with Ollama: {str(e)}\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "id": "ab72f8b6",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def technical_qa_tool(question, use_openai=True, use_ollama=True):\n",
+ " \"\"\"Main function to get technical explanations from both APIs\"\"\"\n",
+ " print(f\"Question: {question}\")\n",
+ " print(\"=\" * 80)\n",
+ " \n",
+ " if use_openai:\n",
+ " print(\"\\n๐ค OpenAI Response:\")\n",
+ " print(\"-\" * 40)\n",
+ " answer_with_openai(question)\n",
+ " \n",
+ " if use_ollama:\n",
+ " print(\"\\n๐ฆ Ollama Response:\")\n",
+ " print(\"-\" * 40)\n",
+ " answer_with_ollama(question)\n",
+ " # display(Markdown(ollama_answer))\n",
+ " \n",
+ " print(\"\\n\" + \"=\" * 80)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "id": "1a6aa4a2",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Question: What is the difference between supervised and unsupervised machine learning?\n",
+ "================================================================================\n",
+ "\n",
+ "๐ค OpenAI Response:\n",
+ "----------------------------------------\n"
+ ]
+ },
+ {
+ "data": {
+ "text/markdown": [
+ "Supervised and unsupervised machine learning are two primary categories of machine learning techniques, and they differ mainly in how they learn from data and the type of problems they are used to solve. Hereโs a detailed explanation of each:\n",
+ "\n",
+ "### Supervised Machine Learning\n",
+ "\n",
+ "**Definition**: In supervised learning, the model is trained on a labeled dataset, meaning that each training example is paired with an output label. The goal is to learn a mapping from inputs (features) to the output labels.\n",
+ "\n",
+ "**Characteristics**:\n",
+ "- **Labeled Data**: Requires a dataset that includes both the input features and the corresponding output labels.\n",
+ "- **Objective**: The objective is to predict the output for new, unseen data based on the learned mapping from the training data.\n",
+ "- **Common Techniques**:\n",
+ " - **Regression**: For predicting continuous values (e.g., predicting house prices).\n",
+ " - **Classification**: For predicting discrete labels (e.g., spam detection in emails).\n",
+ "- **Examples**:\n",
+ " - Predicting whether an email is spam or not based on various features (classification).\n",
+ " - Forecasting sales figures based on historical sales data (regression).\n",
+ "\n",
+ "### Unsupervised Machine Learning\n",
+ "\n",
+ "**Definition**: In unsupervised learning, the model is trained on data that is not labeled, meaning that it does not have predefined output labels. The goal is to discover patterns, groupings, or structures within the data.\n",
+ "\n",
+ "**Characteristics**:\n",
+ "- **Unlabeled Data**: Works with datasets that only have input features without any associated output labels.\n",
+ "- **Objective**: The objective is to explore the data and find hidden patterns or intrinsic structures without specific guidance.\n",
+ "- **Common Techniques**:\n",
+ " - **Clustering**: Grouping similar data points together (e.g., customer segmentation).\n",
+ " - **Dimensionality Reduction**: Reducing the number of features while retaining essential information (e.g., PCA - Principal Component Analysis).\n",
+ "- **Examples**:\n",
+ " - Grouping customers into segments based on purchasing behavior (clustering).\n",
+ " - Reducing the dimensionality of a dataset to visualize it in two or three dimensions (dimensionality reduction).\n",
+ "\n",
+ "### Key Differences\n",
+ "\n",
+ "1. **Data Type**:\n",
+ " - Supervised Learning: Requires labeled data.\n",
+ " - Unsupervised Learning: Works with unlabeled data.\n",
+ "\n",
+ "2. **Goal**:\n",
+ " - Supervised Learning: To learn a function that maps inputs to the correct outputs.\n",
+ " - Unsupervised Learning: To identify patterns or groupings in the input data.\n",
+ "\n",
+ "3. **Applications**:\n",
+ " - Supervised Learning: Typically used in scenarios where past data with known outcomes is available (e.g., fraud detection, image classification).\n",
+ " - Unsupervised Learning: Used for exploratory data analysis or when the outcome is not known (e.g., market basket analysis, anomaly detection).\n",
+ "\n",
+ "In summary, the primary difference between supervised and unsupervised machine learning lies in the presence or absence of labeled data and the objectives of the learning process. Supervised learning aims to predict outcomes based on existing labels, while unsupervised learning seeks to identify hidden structures in data without predefined labels."
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "๐ฆ Ollama Response:\n",
+ "----------------------------------------\n"
+ ]
+ },
+ {
+ "data": {
+ "text/markdown": [
+ "In machine learning, there are two main categories: supervised and unsupervised learning. The key difference lies in the type of data used to train the model and the goal of the learning process.\n",
+ "\n",
+ "**Supervised Learning**\n",
+ "\n",
+ "In supervised learning, you have a labeled dataset that contains both input data (features) and corresponding output labels or target variables. The goal is to learn a mapping between the input data and the output labels so that the model can make accurate predictions on new, unseen data.\n",
+ "\n",
+ "Here are some characteristics of supervised learning:\n",
+ "\n",
+ "1. Labeled training data: You have a dataset with input data and corresponding output labels.\n",
+ "2. Specific goal: You want to predict the output label for a given input instance.\n",
+ "3. Model evaluation: You evaluate the performance of your model using metrics like accuracy, precision, recall, F1 score, etc.\n",
+ "\n",
+ "Examples of supervised learning tasks include:\n",
+ "\n",
+ "* Image classification (e.g., recognizing dogs vs. cats)\n",
+ "* Sentiment analysis (e.g., determining if text is positive or negative)\n",
+ "* Regression problems (e.g., predicting house prices based on features like number of bedrooms and square footage)\n",
+ "\n",
+ "**Unsupervised Learning**\n",
+ "\n",
+ "In unsupervised learning, you have an unlabeled dataset, and the goal is to discover patterns, relationships, or structure in the data without a specific target variable. This type of learning is often used for exploratory data analysis, feature selection, and dimensionality reduction.\n",
+ "\n",
+ "Here are some characteristics of unsupervised learning:\n",
+ "\n",
+ "1. Unlabeled training data: You have a dataset with only input features (no output labels).\n",
+ "2. No specific goal: You want to find interesting patterns or structure in the data.\n",
+ "3. Model evaluation: You evaluate the performance of your model using metrics like silhouette score, Calinski-Harabasz index, etc.\n",
+ "\n",
+ "Examples of unsupervised learning tasks include:\n",
+ "\n",
+ "* Clustering (e.g., grouping customers based on their purchase history)\n",
+ "* Dimensionality reduction (e.g., reducing the number of features in a dataset while preserving important information)\n",
+ "* Anomaly detection (e.g., identifying unusual behavior or outliers in financial transactions)\n",
+ "\n",
+ "In summary, supervised learning involves training a model to make predictions based on labeled data, whereas unsupervised learning aims to discover patterns and relationships in unlabeled data."
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "================================================================================\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Test the tool with a technical question\n",
+ "technical_question = \"What is the difference between supervised and unsupervised machine learning?\"\n",
+ "technical_qa_tool(technical_question)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "0a976ce1",
+ "metadata": {
+ "vscode": {
+ "languageId": "plaintext"
+ }
+ },
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "9b0a539e",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Interactive version - uncomment to use\n",
+ "# user_question = input(\"Enter your technical question: \")\n",
+ "# technical_qa_tool(user_question)"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.13"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/week1/community-contributions/week1 EXERCISE - TechHelpAgent.ipynb b/week1/community-contributions/week1 EXERCISE - TechHelpAgent.ipynb
new file mode 100644
index 0000000..a750b2e
--- /dev/null
+++ b/week1/community-contributions/week1 EXERCISE - TechHelpAgent.ipynb
@@ -0,0 +1,206 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "fe12c203-e6a6-452c-a655-afb8a03a4ff5",
+ "metadata": {},
+ "source": [
+ "# End of week 1 exercise\n",
+ "\n",
+ "To demonstrate your familiarity with OpenAI API, and also Ollama, build a tool that takes a technical question, \n",
+ "and responds with an explanation. This is a tool that you will be able to use yourself during the course!"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "id": "c1070317-3ed9-4659-abe3-828943230e03",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# imports\n",
+ "import os\n",
+ "from dotenv import load_dotenv\n",
+ "from IPython.display import Markdown, display, update_display\n",
+ "from openai import OpenAI\n",
+ "import json\n",
+ "from IPython.display import Markdown, display, update_display\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "4a456906-915a-4bfd-bb9d-57e505c5093f",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# constants\n",
+ "\n",
+ "MODEL_GPT = 'gpt-4o-mini'\n",
+ "MODEL_LLAMA = 'llama3.2'\n",
+ "openai = OpenAI()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "a8d7923c-5f28-4c30-8556-342d7c8497c1",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "API key looks good so far\n"
+ ]
+ }
+ ],
+ "source": [
+ "# set up environment\n",
+ "load_dotenv(override=True)\n",
+ "api_key = os.getenv(\"OPENAI_API_KEY\")\n",
+ "if api_key and api_key.startswith('sk-proj-') and len(api_key)>10:\n",
+ " print(\"API key looks good so far\")\n",
+ "else:\n",
+ " print(\"There might be a problem with your API key? Please visit the troubleshooting notebook!\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "3f0d0137-52b0-47a8-81a8-11a90a010798",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# here is the question; type over this to ask something new\n",
+ "system_prompt = \"You are a software engineering and data science expert and you have knowledge in all the areas of software engineering and latest technologies, trends. You should guide and help users with your technical solutions for all software engineering and data science related questions\"\n",
+ "user_prompt = \"\"\"\n",
+ "Please explain what this code does and why:\n",
+ "yield from {book.get(\"author\") for book in books if book.get(\"author\")}\n",
+ "\"\"\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "60ce7000-a4a5-4cce-a261-e75ef45063b4",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/markdown": [
+ "This code snippet is utilizing a Python generator expression combined with the `yield from` statement to yield values from a set comprehension. Let's break it down:\n",
+ "\n",
+ "1. **Set Comprehension**:\n",
+ " ```python\n",
+ " {book.get(\"author\") for book in books if book.get(\"author\")}\n",
+ " ```\n",
+ " - This is a set comprehension that iterates over a collection called `books`.\n",
+ " - For each `book`, it retrieves the value associated with the key `\"author\"` using the `get()` method.\n",
+ " - The `if book.get(\"author\")` condition ensures that only books that have a valid (non-None or non-empty) author are included. This effectively filters out any books where the author is not present.\n",
+ "\n",
+ " As a result, this part creates a set of unique authors from the list of books. Since sets automatically discard duplicates, if multiple books have the same author, that author will only appear once in the resulting set.\n",
+ "\n",
+ "2. **Yielding Values**:\n",
+ " ```python\n",
+ " yield from\n",
+ " ```\n",
+ " - The `yield from` statement is used when you want to yield all values from an iterable. It allows a generator to yield all values from another generator or iterable.\n",
+ " - In this context, it will yield each author from the set created by the comprehension.\n",
+ "\n",
+ "3. **Putting It All Together**:\n",
+ " What this overall code does is:\n",
+ " - It generates and yields unique authors from a collection of books, ensuring that each author is listed only once and only for books that actually specify an author.\n",
+ "\n",
+ "### Purpose:\n",
+ "This code is useful in scenarios where you need to obtain a seemingly infinite generator of authors from a collection of books, processing each author one by one without creating a permanent list or set in memory, which can be beneficial for memory efficiency especially if you have a very large collection of books.\n",
+ "\n",
+ "### Example Usage:\n",
+ "Hereโs a basic example of how you might use this in a generator function:\n",
+ "\n",
+ "```python\n",
+ "def get_unique_authors(books):\n",
+ " yield from {book.get(\"author\") for book in books if book.get(\"author\")}\n",
+ "\n",
+ "# Example books list\n",
+ "books = [\n",
+ " {\"title\": \"Book 1\", \"author\": \"Author A\"},\n",
+ " {\"title\": \"Book 2\", \"author\": \"Author B\"},\n",
+ " {\"title\": \"Book 3\", \"author\": \"Author A\"},\n",
+ " {\"title\": \"Book 4\", \"author\": None},\n",
+ "]\n",
+ "\n",
+ "for author in get_unique_authors(books):\n",
+ " print(author)\n",
+ "```\n",
+ "\n",
+ "This would output:\n",
+ "```\n",
+ "Author A\n",
+ "Author B\n",
+ "```\n",
+ "\n",
+ "In this example, `Author A` only appears once, demonstrating the uniqueness provided by the set comprehension."
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "None\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Get gpt-4o-mini to answer, with streaming\n",
+ "response = openai.chat.completions.create(\n",
+ " model=MODEL_GPT,\n",
+ " messages=[\n",
+ " {\"role\": \"system\", \"content\": system_prompt},\n",
+ " {\"role\": \"user\", \"content\": user_prompt}\n",
+ " ],\n",
+ " stream=True\n",
+ " )\n",
+ "result = response.choices[0].message.content\n",
+ "print(display(Markdown(result)))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "8f7c8ea8-4082-4ad0-8751-3301adcf6538",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Get Llama 3.2 to answer"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "llms",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.13"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/week1/community-contributions/week1_assignments/scrape_website.py b/week1/community-contributions/week1_assignments/scrape_website.py
new file mode 100644
index 0000000..d040e22
--- /dev/null
+++ b/week1/community-contributions/week1_assignments/scrape_website.py
@@ -0,0 +1,15 @@
+from bs4 import BeautifulSoup
+import requests
+
+
+class ScrapeWebsite:
+
+ def __init__(self, url, headers):
+ """ Scraping Website which provides title and content"""
+ self.url = url
+ response = requests.get(self.url, headers=headers)
+ soup = BeautifulSoup(response.content, 'html.parser')
+ self.title = soup.title.string if soup.title else "No title found"
+ for irrelevant in soup.body(["script", "style", "img", "input"]):
+ irrelevant.decompose()
+ self.text = soup.body.get_text(separator="\n", strip=True)
\ No newline at end of file
diff --git a/week1/community-contributions/week1_assignments/text_summary_ollama.ipynb b/week1/community-contributions/week1_assignments/text_summary_ollama.ipynb
new file mode 100644
index 0000000..d7a5b3b
--- /dev/null
+++ b/week1/community-contributions/week1_assignments/text_summary_ollama.ipynb
@@ -0,0 +1,186 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "4e2a9393-7767-488e-a8bf-27c12dca35bd",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# imports\n",
+ "import os\n",
+ "from dotenv import load_dotenv\n",
+ "from IPython.display import Markdown, display\n",
+ "from openai import OpenAI \n",
+ "from scrape_website import ScrapeWebsite"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "29ddd15d-a3c5-4f4e-a678-873f56162724",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Constants\n",
+ "MODEL = \"llama3.2\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "42c8a8c2",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "system_prompt = \"You are an analyst that analyses the content of the website \\\n",
+ " provides summary and ignore text related to navigation. Respond in markdown.\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "51e86dd1",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def user_prompt_for(website):\n",
+ " user_prompt = f\"You are looking at a website titled {website.title}\"\n",
+ " user_prompt += \"\\nThe contents of this website is as follows; Please provide short summary in Markdown. Please include news and \\\n",
+ " announcements\"\n",
+ " user_prompt+=website.text\n",
+ " return user_prompt"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "b69d7238",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def messages_for(website):\n",
+ " return [\n",
+ " {\"role\":\"system\", \"content\": system_prompt},\n",
+ " {\"role\":\"user\", \"content\": user_prompt_for(website)}\n",
+ " ]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "a56e99ea",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "headers = {\n",
+ " \"User-Agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36\"\n",
+ "}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "9b4061d0",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def summarise(url):\n",
+ " website = ScrapeWebsite(url, headers)\n",
+ " ollama_via_openai = OpenAI(base_url='http://localhost:11434/v1', api_key='ollama')\n",
+ " response = ollama_via_openai.chat.completions.create(\n",
+ " model=MODEL,\n",
+ " messages=messages_for(website)\n",
+ " )\n",
+ "\n",
+ " return response.choices[0].message.content"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "65f96545",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def display_summary(url):\n",
+ " summary = summarise(url)\n",
+ " display(Markdown(summary))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "23057e00-b6fc-4678-93a9-6b31cb704bff",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Generative AI has numerous business applications across various industries. Here are some examples:\n",
+ "\n",
+ "1. **Marketing and Advertising**: Generative AI can create personalized product recommendations, generate targeted advertisements, and develop new marketing campaigns.\n",
+ "2. **Content Creation**: AI-powered tools can assist in content creation, such as writing articles, generating social media posts, and creating videos, podcasts, and music.\n",
+ "3. **Product Design and Development**: Generative AI can aid in designing products, such as 3D modeling, prototyping, and testing product feasibility.\n",
+ "4. **Customer Service Chatbots**: AI-powered chatbots can provide personalized customer service, answering common queries, and helping resolve issues faster.\n",
+ "5. **Language Translation**: Generative AI can translate languages in real-time, enabling businesses to communicate with global customers more effectively.\n",
+ "6. **Data Analysis and Visualization**: AI can analyze large datasets, identify patterns, and create insights, making it easier for businesses to make informed decisions.\n",
+ "7. **Cybersecurity Threat Detection**: Generative AI-powered systems can detect and respond to cyber threats more efficiently, reducing the risk of data breaches and attacks.\n",
+ "8. **Supply Chain Optimization**: AI can optimize supply chain operations, predict demand, and identify opportunities for improvement, leading to increased efficiency and reduced costs.\n",
+ "9. **Network Security**: Generative AI can analyze network traffic patterns, detect anomalies, and prevent cyber-attacks.\n",
+ "10. **Finance and Banking**: AI-powered systems can detect financial fraud, predict customer creditworthiness, and generate credit reports.\n",
+ "\n",
+ "**Industry-specific applications:**\n",
+ "\n",
+ "1. **Healthcare**: AI can help with medical diagnosis, patient data analysis, and personalized medicine.\n",
+ "2. **Manufacturing**: Generative AI can create optimized production schedules, predict equipment failures, and improve product quality.\n",
+ "3. **Education**: AI-powered tools can develop personalized learning plans, automate grading, and provide educational resources.\n",
+ "4. **Real Estate**: AI can help with property valuations, identify market trends, and analyze potential clients' needs.\n",
+ "\n",
+ "**Business benefits:**\n",
+ "\n",
+ "1. **Increased efficiency**: Automating mundane tasks frees up human resources for more strategic work.\n",
+ "2. **Improved accuracy**: Generative AI reduces the likelihood of human error in decision-making and task execution.\n",
+ "3. **Enhanced customer experience**: Personalized experiences are created through data-driven insights.\n",
+ "4. **Competitive advantage**: Companies using AI can differentiate themselves from competitors by offering innovative services and products.\n",
+ "\n",
+ "As Generative AI continues to evolve, we can expect even more exciting applications across various industries, leading to increased efficiency, accuracy, and improved competitiveness for businesses worldwide.\n"
+ ]
+ }
+ ],
+ "source": [
+ "display_summary(\"https://www.firstpost.com/world/united-states/\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "6de38216-6d1c-48c4-877b-86d403f4e0f8",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "llms",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.13"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/week1/community-contributions/week1_assignments/text_summary_openai_gpt_5mini.ipynb b/week1/community-contributions/week1_assignments/text_summary_openai_gpt_5mini.ipynb
new file mode 100644
index 0000000..ab6c1a4
--- /dev/null
+++ b/week1/community-contributions/week1_assignments/text_summary_openai_gpt_5mini.ipynb
@@ -0,0 +1,265 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "1e45263e",
+ "metadata": {},
+ "source": [
+ "# Web Data Extraction and Summarization using openAI Latest model gpt-5-mini"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "df155151",
+ "metadata": {},
+ "source": [
+ "#### Import Libraries"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "id": "588f8e43",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import os\n",
+ "from dotenv import load_dotenv\n",
+ "from IPython.display import Markdown, display\n",
+ "from openai import OpenAI \n",
+ "from scrape_website import ScrapeWebsite"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "b5925769",
+ "metadata": {},
+ "source": [
+ "#### load api key"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "6cca85ec",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "load_dotenv(override=True)\n",
+ "api_key = os.getenv('OPENAI_API_KEY')"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "56703f80",
+ "metadata": {},
+ "source": [
+ "#### ScrapWebsite using BeautifulSoup"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "3d60c909",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "headers = {\n",
+ " \"User-Agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36\"\n",
+ "}"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "a8b73c27",
+ "metadata": {},
+ "source": [
+ "#### System Prompt"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "4a0c3bda",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "system_prompt = \"You are an analyst that analyses the content of the website \\\n",
+ " provides summary and ignore text related to navigation. Respond in markdown.\""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "9117963b",
+ "metadata": {},
+ "source": [
+ "#### User Prompt"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "id": "ab164d55",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def user_prompt_for(website):\n",
+ " user_prompt = f\"You are looking at a website titled {website.title}\"\n",
+ " user_prompt += \"\\nThe contents of this website is as follows; Please provide short summary in Markdown. Please include news and \\\n",
+ " announcements\"\n",
+ " user_prompt+=website.text\n",
+ " return user_prompt"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "de7423fb",
+ "metadata": {},
+ "source": [
+ "#### Format messages in openAI standard"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "id": "47c82247",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def messages_for(website):\n",
+ " return [\n",
+ " {\"role\":\"system\", \"content\": system_prompt},\n",
+ " {\"role\":\"user\", \"content\": user_prompt_for(website)}\n",
+ " ]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "6e9bb6e1",
+ "metadata": {},
+ "source": [
+ "#### Summarise the content in website using openAI latest model gpt-5-mini"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "id": "068d6bb2",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def summarise(url):\n",
+ " website = ScrapeWebsite(url, headers)\n",
+ " openai = OpenAI()\n",
+ " response = openai.chat.completions.create(model=\"gpt-5-mini\", messages=messages_for(website))\n",
+ " return response.choices[0].message.content"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "7e6e9da6",
+ "metadata": {},
+ "source": [
+ "#### Show summary as Markdown"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "id": "cd86c2ca",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def display_summary(url):\n",
+ " summary = summarise(url)\n",
+ " display(Markdown(summary))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "ed5e50d2",
+ "metadata": {},
+ "source": [
+ "#### Output"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "id": "74a056b1",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/markdown": [
+ "# Summary โ United States Of America | Firstpost (Live/Latest)\n",
+ "\n",
+ "Site focus: Live updates and rundowns of US and world news with emphasis on politics, justice, economy, national security, and breaking incidents. Coverage mixes headlines, investigations, opinion and special features/web stories.\n",
+ "\n",
+ "## Major news (headlines)\n",
+ "- Police shooting near CDC/Emory in Atlanta: a suspected shooter and a police officer were killed after reports of an active shooter near the CDC and Emory University campuses. \n",
+ "- Death of astronaut Jim Lovell (97): Apollo 13 commander and former Navy pilot died in a Chicago suburb. \n",
+ "- Stephen Miran named to Fed Board (short-term): Trump appointed economist Stephen Miran to the Federal Reserve Board through Jan 2026; noted for support of tariffs and rate cuts. \n",
+ "- Trump fires labour statistics chief: President Trump sacked the official overseeing labor data hours after a weak jobs report. \n",
+ "- House panel subpoenas Clintons over Epstein: congressional subpoenas seek documents in relation to Jeffrey Epstein amid pressure on the administration over Epstein files. \n",
+ "- Ghislaine Maxwell moved to lower-security prison in Texas amid scrutiny of Epstein files and government handling. \n",
+ "- FBI/administration tension on Epstein Files: Trump said he would โrelease everythingโ after reports the FBI redacted names from the Epstein Files. \n",
+ "- Probe launched into attorney who investigated Trump cases: US officials began a probe targeting Special Counsel Jack Smith. \n",
+ "- NTSB finds technical issues in Army helicopter crash: investigation into crash that killed 67 people identified technical problems. \n",
+ "- Trump unveils modified reciprocal tariffs: new executive order introduced modified tariffs on multiple countries; effective date possibly as late as Oct 5. \n",
+ "- Trump-EU trade deal announced: reported pact imposing a 15% tariff on most EU goods, with large energy and investment components but unresolved issues remain. \n",
+ "- Federal Reserve holds rates steady: Fed kept rates unchanged for a fifth meeting, despite political pressure from Trump. \n",
+ "- White House remodel plan: Trump pushing to build a reported $200 million ballroom at the presidential residence, funded by Trump/donors per WH. \n",
+ "- US citizenship test format under review: Trump administration considers reverting to the 2020 naturalisation test format, citing concerns the current test is too easy. \n",
+ "- American Airlines incident in Denver: passengers evacuated after a Boeing plane caught fire (tire/maintenance issue) before takeoff. \n",
+ "- John Bolton criticizes Tulsi Gabbard: former NSA lambastes Gabbardโs report on Obama as exaggerated and lacking substance. \n",
+ "- Ohio solicitor general Mathura Sridharan trolled: Indian-origin jurist faced racist online backlash after appointment; Ohio AG responded strongly.\n",
+ "\n",
+ "## Announcements, features & recurring elements\n",
+ "- Web stories and quick-read lists: travel/animals/safety themed pieces (e.g., โ10 airport codesโ, โ10 animals that are naturally blueโ, World Tiger Day lists). \n",
+ "- Regular sections and shows highlighted in coverage: Firstpost America, Firstpost Africa, First Sports, Vantage, Fast and Factual, Between The Lines, Flashback, Live TV. \n",
+ "- Events and special coverage teased: Raisina Dialogue, Champions Trophy, Delhi Elections 2025, Budget 2025, US Elections 2024, Firstpost Defence Summit. \n",
+ "- Trending topics emphasized: Donald Trump, Narendra Modi, Elon Musk, United States, Joe Biden. \n",
+ "- Quick-links / network: cross-promotion of other Network18 properties (News18, Moneycontrol, CNBC TV18, Forbes India).\n",
+ "\n",
+ "## Tone and emphasis\n",
+ "- Heavy focus on US politics, Trump administration actions and controversies (Epstein Files, tariffs, personnel changes), justice probes, national security incidents, and major breaking events.\n",
+ "- Mix of investigative/legal reporting, immediate breaking news, and light/web-story listicles.\n",
+ "\n",
+ "If you want, I can produce a one-page brief of just the Trump-related items, a timeline of the Epstein/Clinton/Subpoena coverage, or extract all headlines with publication order."
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "display_summary(\"https://www.firstpost.com/world/united-states/\")"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "llms",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.13"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/week1/community-contributions/youtube_video_summarize.ipynb b/week1/community-contributions/youtube_video_summarize.ipynb
new file mode 100644
index 0000000..c96714f
--- /dev/null
+++ b/week1/community-contributions/youtube_video_summarize.ipynb
@@ -0,0 +1,216 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 36,
+ "id": "8ca2e60d-17c0-40fc-91c6-c16915b39c06",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import re, html, json\n",
+ "import requests\n",
+ "from urllib.error import HTTPError\n",
+ "from openai import OpenAI\n",
+ "from IPython.display import Markdown, display, update_display\n",
+ "from youtube_transcript_api import YouTubeTranscriptApi, NoTranscriptFound, TranscriptsDisabled, VideoUnavailable\n",
+ "\n",
+ "OLLAMA_API = \"http://localhost:11434/api/chat\"\n",
+ "HEADERS = {\"Content-Type\": \"application/json\"}\n",
+ "MODEL = \"llama3.2\"\n",
+ "api_key='ollama'\n",
+ "\n",
+ "def yt_title_desc_transcript(url: str, lang=\"en\"):\n",
+ " \"\"\"\n",
+ " Returns {\"title\": str|None, \"description\": str|None, \"transcript\": str|None}.\n",
+ " - Title via oEmbed (no API key).\n",
+ " - Description scraped from the watch page (shortDescription).\n",
+ " - Transcript via youtube-transcript-api, gracefully handling 400/disabled.\n",
+ " \"\"\"\n",
+ " # --- extract 11-char video id ---\n",
+ " m = re.search(r\"(?:v=|/)([0-9A-Za-z_-]{11})|^([0-9A-Za-z_-]{11})$\", url)\n",
+ " vid = (m.group(1) or m.group(2)) if m else None\n",
+ " if not vid:\n",
+ " return {\"title\": None, \"description\": None, \"transcript\": None}\n",
+ "\n",
+ " # --- title via oEmbed (very robust) ---\n",
+ " title = None\n",
+ " try:\n",
+ " r = requests.get(\"https://www.youtube.com/oembed\",\n",
+ " params={\"url\": f\"https://www.youtube.com/watch?v={vid}\", \"format\": \"json\"},\n",
+ " timeout=10)\n",
+ " if r.ok:\n",
+ " title = r.json().get(\"title\")\n",
+ " except Exception:\n",
+ " pass\n",
+ "\n",
+ " # --- description from watch page (shortDescription in initial JSON) ---\n",
+ " description = None\n",
+ " try:\n",
+ " page = requests.get(f\"https://www.youtube.com/watch?v={vid}\", timeout=10).text\n",
+ " # Look for ytInitialPlayerResponse JSON\n",
+ " jmatch = re.search(r\"ytInitialPlayerResponse\\s*=\\s*({.*?});\", page, re.DOTALL)\n",
+ " if jmatch:\n",
+ " data = json.loads(jmatch.group(1))\n",
+ " desc = data.get(\"videoDetails\", {}).get(\"shortDescription\")\n",
+ " if desc:\n",
+ " description = html.unescape(desc)\n",
+ " except Exception:\n",
+ " pass\n",
+ "\n",
+ " # --- transcript (handle 400 cleanly) ---\n",
+ " transcript_text = None\n",
+ " try:\n",
+ " items = YouTubeTranscriptApi.get_transcript(vid, languages=[lang])\n",
+ " transcript_text = \" \".join(ch[\"text\"].strip() for ch in items if ch.get(\"text\"))\n",
+ " except (NoTranscriptFound, TranscriptsDisabled, VideoUnavailable, HTTPError):\n",
+ " # HTTPError covers the \"HTTP Error 400: Bad Request\" case\n",
+ " transcript_text = None\n",
+ " except Exception:\n",
+ " transcript_text = None\n",
+ "\n",
+ " return {\"title\": title, \"description\": description, \"transcript\": transcript_text}\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 32,
+ "id": "ad9be496-4e91-4562-90f3-54d11208da55",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "\n",
+ "system_prompt = '''\n",
+ "You are an assistant that generates detailed yet concise summaries of YouTube videos.\n",
+ "When the user provides a title and description of a YouTube video, your task is to write a coherent, engaging, and informative summary of around 500 words.\n",
+ "The summary should:\n",
+ "\n",
+ "Capture the main themes and key points the video likely covers.\n",
+ "\n",
+ "Expand on the description logically, providing context and flow.\n",
+ "\n",
+ "Stay neutral, factual, and clear (no personal opinions).\n",
+ "\n",
+ "Be self-contained so it makes sense without needing to watch the video.\n",
+ "'''"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 33,
+ "id": "dd4be0bc-df1f-47e0-9e03-9b734117f80a",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def user_prompt(title, description):\n",
+ " prompt = '''Provide me the YouTube video title and description.\\n\n",
+ " I will generate a clear, engaging, and concise summary of the video content in around 500 words,\\n\n",
+ " highlighting the main ideas, key points, and important details.\\n'''\n",
+ " prompt += f'here is the title : {title} \\n Description : {description} '\n",
+ " return prompt"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 34,
+ "id": "46896ad3-db1e-448a-8a03-036b9568c69f",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def stream_youtube(yt_url):\n",
+ " ollama = OpenAI(base_url='http://localhost:11434/v1', api_key='ollama')\n",
+ " video_metadata = yt_title_desc_transcript(yt_url)\n",
+ " stream = ollama.chat.completions.create(\n",
+ " model=MODEL,\n",
+ " messages = [\n",
+ " {\"role\":\"system\", \"content\": system_prompt},\n",
+ " {\"role\":\"user\", \"content\": user_prompt(video_metadata['title'], video_metadata['description'])}\n",
+ " ],\n",
+ " stream=True\n",
+ " \n",
+ " )\n",
+ " response = \"\"\n",
+ " display_handle = display(Markdown(\"\"), display_id=True)\n",
+ " for chunk in stream:\n",
+ " response += chunk.choices[0].delta.content or ''\n",
+ " response = response.replace(\"```\",\"\").replace(\"markdown\", \"\")\n",
+ " update_display(Markdown(response), display_id=display_handle.display_id)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 37,
+ "id": "b59f8773-c13e-4050-ad3c-b578d07ef5e7",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/markdown": [
+ "Here is a summary of the YouTube video:\n",
+ "\n",
+ "**Monta Re: A Baul-Inspired Tribute to the Mystic Guru Shankaracharya**\n",
+ "\n",
+ "The music video for \"Monta Re\" by Amit Trivedi, featuring Swanand Kirkire and Amitabh Bhattacharya, is a soulful tribute to the mystic guru Shankaracharya. Set in the Bengali folk music tradition, this song brings to life the ancient tales of Shankaracharya's spiritual journey.\n",
+ "\n",
+ "With elegant lyrics penned by Amitabh Bhattacharya, \"Monta Re\" transports listeners to the banks of the Ganges River, where Shankaracharya wandered in search of wisdom and inner peace. The song's haunting melodies and emotive vocals evoke a sense of longing and introspection, perfectly capturing the mystic guru's spiritual essence.\n",
+ "\n",
+ "The music video beautifully illustrates the baul-inspired style, with intricate traditional dance movements performed by a group of energetic dancers. The choreography seamlessly blends elements of Bengal's folk heritage with modern sensibilities, making the song an engaging watch for audience members interested in Indian classical music.\n",
+ "\n",
+ "**Music and Lyric Credit:**\n",
+ "Amit Trivedi handles the music composition, ensuring that the melody complements the song's themes without overpowering them. Amitabh Bhattacharya takes credit for the lyrics, which tell stunning stories of Shankaracharya's spiritual adventures. The song features Swanand Kirkire and Amitabh Bhattacharya as vocalists, further enriching its emotional impact.\n",
+ "\n",
+ "**Relevance to Bengali Culture:**\n",
+ "\"Monta Re\" is a heartwarming tribute to Bengal's rich cultural heritage. Inspired by the baul traditions of the region, this song honors Shankaracharya's life and spiritual journey without diminishing his significance in modern times. By showcasing these folk roots, \"Monta Re\" provides fans with an enriching sensory experience.\n",
+ "\n",
+ "You can listen to \"Monta Re\" along with other T-Series music videos released by Amit Trivedi at the links provided below:\n",
+ "\n",
+ "- Watch \"Ankahee\"\n",
+ "- Check out \"Sawaar Loon\"\n",
+ "- Explore \"Zinda Hoon\"\n",
+ "\n",
+ "Follow the official T-SERIES YouTube channel for an ever-growing variety of original music tracks!\n",
+ "\n",
+ "By embracing the richness of Bengali folk traditions, \"Monta Re\" embodies a musical reflection of Shankaracharya's extraordinary journey as both spiritual guide and symbol of timeless wisdom."
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "stream_youtube('https://youtu.be/99NUJ1cLbBI?list=RDdJ6_aU6auZc')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "649287ca-aff8-4b59-91b7-731c007e83a7",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.13"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/week2/community-contributions/AddingGeminiToDropdown.ipynb b/week2/community-contributions/AddingGeminiToDropdown.ipynb
new file mode 100644
index 0000000..656a542
--- /dev/null
+++ b/week2/community-contributions/AddingGeminiToDropdown.ipynb
@@ -0,0 +1,223 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "057bc09f-a682-4b72-97ed-c69ddef3f03e",
+ "metadata": {},
+ "source": [
+ "# Gemini to Dropdown"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "d66eb067-7bae-4145-b613-6da2f40fbf27",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import os\n",
+ "import requests\n",
+ "from bs4 import BeautifulSoup\n",
+ "from typing import List\n",
+ "from dotenv import load_dotenv\n",
+ "from openai import OpenAI\n",
+ "import google.generativeai as genai\n",
+ "import anthropic"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "e36f8a93-8a65-48f2-bcad-7c47dd72ef3a",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import gradio as gr "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "8a5ec1b0-f5b4-46d2-abb0-b28b73cc4d28",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "load_dotenv(override=True)\n",
+ "openai_api_key = os.getenv('OPENAI_API_KEY')\n",
+ "anthropic_api_key = os.getenv('ANTHROPIC_API_KEY')\n",
+ "google_api_key = os.getenv('GOOGLE_API_KEY')\n",
+ "\n",
+ "if openai_api_key:\n",
+ " print(f\"OpenAI API Key exists and begins {openai_api_key[:8]}\")\n",
+ "else:\n",
+ " print(\"OpenAI API Key not set\")\n",
+ " \n",
+ "if anthropic_api_key:\n",
+ " print(f\"Anthropic API Key exists and begins {anthropic_api_key[:7]}\")\n",
+ "else:\n",
+ " print(\"Anthropic API Key not set\")\n",
+ "\n",
+ "if google_api_key:\n",
+ " print(f\"Google API Key exists and begins {google_api_key[:8]}\")\n",
+ "else:\n",
+ " print(\"Google API Key not set\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "26d0099c-890f-4358-8c1d-7a708abcb105",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "\n",
+ "openai = OpenAI()\n",
+ "\n",
+ "claude = anthropic.Anthropic()\n",
+ "\n",
+ "google.generativeai.configure()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "6606bfdb-964e-4d6f-b2a1-5017b99aa23d",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "system_message = \"You are a helpful assistant\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "e0cfb96a-2dbe-4228-8efb-75947dbc3228",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def stream_gpt(prompt):\n",
+ " messages = [\n",
+ " {\"role\": \"system\", \"content\": system_message},\n",
+ " {\"role\": \"user\", \"content\": prompt}\n",
+ " ]\n",
+ " stream = openai.chat.completions.create(\n",
+ " model='gpt-4o-mini',\n",
+ " messages=messages,\n",
+ " stream=True\n",
+ " )\n",
+ " result = \"\"\n",
+ " for chunk in stream:\n",
+ " result += chunk.choices[0].delta.content or \"\"\n",
+ " yield result"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "9008a15d-0ee8-44e0-b123-225e7148113e",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def stream_claude(prompt):\n",
+ " result = claude.messages.stream(\n",
+ " model=\"claude-3-haiku-20240307\",\n",
+ " max_tokens=1000,\n",
+ " temperature=0.7,\n",
+ " system=system_message,\n",
+ " messages=[\n",
+ " {\"role\": \"user\", \"content\": prompt},\n",
+ " ],\n",
+ " )\n",
+ " response = \"\"\n",
+ " with result as stream:\n",
+ " for text in stream.text_stream:\n",
+ " response += text or \"\"\n",
+ " yield response"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "378ad12e-6645-4647-807c-00995e360268",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def stream_gemini(prompt):\n",
+ " gemini = genai.GenerativeModel(\n",
+ " model_name=\"gemini-2.0-flash\",\n",
+ " system_instruction=system_message\n",
+ " )\n",
+ " \n",
+ " stream = gemini.generate_content(prompt, stream=True)\n",
+ " \n",
+ " result = \"\"\n",
+ " for chunk in stream:\n",
+ " try:\n",
+ " part = chunk.text\n",
+ " if part:\n",
+ " result += part\n",
+ " yield result \n",
+ " except Exception as e:\n",
+ " print(\"Chunk error:\", e)\n",
+ " \n",
+ " \n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "fd50e143-eead-49b1-8ea3-b440becd4bc9",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def stream_model(prompt, model):\n",
+ " if model==\"GPT\":\n",
+ " result = stream_gpt(prompt)\n",
+ " elif model==\"Claude\":\n",
+ " result = stream_claude(prompt)\n",
+ " elif model==\"Gemini\":\n",
+ " result = stream_gemini(prompt)\n",
+ " else:\n",
+ " raise ValueError(\"Unknown model\")\n",
+ " yield from result"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "c7fc9cb4-fbb8-4301-86a6-96c90f67eb3b",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "view = gr.Interface(\n",
+ " fn=stream_model,\n",
+ " inputs=[gr.Textbox(label=\"Your message:\"), gr.Dropdown([\"GPT\", \"Claude\",\"Gemini\"], label=\"Select model\", value=\"GPT\")],\n",
+ " outputs=[gr.Markdown(label=\"Response:\")],\n",
+ " flagging_mode=\"never\"\n",
+ ")\n",
+ "view.launch()"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.13"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/week2/community-contributions/Copilot.ipynb b/week2/community-contributions/Copilot.ipynb
new file mode 100644
index 0000000..c32aad0
--- /dev/null
+++ b/week2/community-contributions/Copilot.ipynb
@@ -0,0 +1,212 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "1877ad68",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import os\n",
+ "import requests\n",
+ "from openai import OpenAI\n",
+ "import gradio as gr\n",
+ "from dotenv import load_dotenv \n",
+ "import google.generativeai as genai\n",
+ "from IPython.display import Markdown, display, update_display\n",
+ "load_dotenv(override=True)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "008056a2",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "openai_api_key = os.getenv('OPENAI_API_KEY')\n",
+ "google_api_key = os.getenv('GOOGLE_API_KEY')\n",
+ "\n",
+ "if openai_api_key:\n",
+ " print(f'OpenAi api key exists and its starts with {openai_api_key[:3]}')\n",
+ "else:\n",
+ " print(\"OpenAi api key doesn't exist\")\n",
+ "\n",
+ "if google_api_key:\n",
+ " print('Google api key exists')\n",
+ "else:\n",
+ " print(\"Google api key doesn't exist\")\n",
+ "\n",
+ "OPENAI_MODEL = \"gpt-4o-mini\"\n",
+ "GOOGLE_MODEL = \"gemini-1.5-flash\"\n",
+ "\n",
+ "openai = OpenAI()\n",
+ "\n",
+ "genai.configure()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "5013ed7b",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "system_msg = \"\"\"\n",
+ "You are CodeCopilot, an adaptive AI coding assistant that helps users solve problems in any programming language.\n",
+ "Always provide correct, runnable, and well-formatted code with clear explanations.\n",
+ "Adjust your style based on the userโs expertise: for beginners, break concepts down step by step with simple examples and commented code;\n",
+ "for advanced users, deliver concise, production-ready, optimized solutions with best practices and trade-off insights.\n",
+ "Ask clarifying questions when requirements are ambiguous, highlight pitfalls and edge cases,\n",
+ "and act as a collaborative pair programmer or mentor whose goal is to help users learn, build, and ship high-quality code efficiently.\n",
+ "\"\"\"\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "35c480a1",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def create_prompt(prompt, history):\n",
+ " messages = [{\"role\": \"system\", \"content\": system_msg}]\n",
+ "\n",
+ " # history is a list of (user_msg, assistant_msg) tuples\n",
+ " for user_msg, assistant_msg in history:\n",
+ " if user_msg:\n",
+ " messages.append({\"role\": \"user\", \"content\": user_msg})\n",
+ " if assistant_msg:\n",
+ " messages.append({\"role\": \"assistant\", \"content\": assistant_msg})\n",
+ "\n",
+ " # new user prompt\n",
+ " messages.append({\"role\": \"user\", \"content\": prompt})\n",
+ " return messages"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "5dfbecd0",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def openai_agent(prompt, history):\n",
+ " openai.api_key = openai_api_key\n",
+ " messages = create_prompt(prompt, history)\n",
+ " response = openai.chat.completions.create(\n",
+ " model=OPENAI_MODEL,\n",
+ " messages=messages,\n",
+ " stream=True\n",
+ " )\n",
+ " sent_any = False\n",
+ " for chunk in response:\n",
+ " delta = chunk.choices[0].delta\n",
+ " if delta and delta.content:\n",
+ " sent_any = True\n",
+ " yield delta.content\n",
+ " if not sent_any:\n",
+ " yield \"(no response)\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "535f7e3d",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def gemini_agent(prompt, history):\n",
+ " genai.configure(api_key=google_api_key)\n",
+ "\n",
+ " # reuse OpenAI-style messages\n",
+ " messages = create_prompt(prompt, history)\n",
+ "\n",
+ " gemini_history = []\n",
+ " for m in messages:\n",
+ " # Gemini does NOT support system role\n",
+ " if m[\"role\"] == \"system\":\n",
+ " continue\n",
+ " gemini_history.append({\n",
+ " \"role\": m[\"role\"],\n",
+ " \"parts\": [m[\"content\"]]\n",
+ " })\n",
+ " prompt_with_system = f\"{system_msg}\\n\\n{prompt}\"\n",
+ " model = genai.GenerativeModel(GOOGLE_MODEL)\n",
+ " chat = model.start_chat(history=gemini_history)\n",
+ "\n",
+ " response = chat.send_message(prompt_with_system, stream=True)\n",
+ " for chunk in response:\n",
+ " if chunk and getattr(chunk, \"text\", None):\n",
+ " yield chunk.text\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "21f61ff0",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def chat_agent(prompt, history, modelType):\n",
+ " if modelType == \"OpenAI\":\n",
+ " for token in openai_agent(prompt, history):\n",
+ " yield token\n",
+ " else:\n",
+ " for token in gemini_agent(prompt, history):\n",
+ " yield token\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "56686c1d",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def chat_fn(prompt, history, model):\n",
+ " assistant_response = \"\"\n",
+ " for token in chat_agent(prompt, history, model):\n",
+ " assistant_response += token\n",
+ " yield assistant_response \n",
+ "\n",
+ "# -------------------------------------------------------------------\n",
+ "# UI\n",
+ "# -------------------------------------------------------------------\n",
+ "with gr.Blocks() as demo:\n",
+ " model_choice = gr.Radio([\"OpenAI\", \"Gemini\"], value=\"OpenAI\", label=\"Model\")\n",
+ "\n",
+ " chat_ui = gr.ChatInterface(\n",
+ " fn=chat_fn,\n",
+ " additional_inputs=[model_choice],\n",
+ " title=\"CodeCopilot\",\n",
+ " description=\"An adaptive AI coding assistant that helps developers build and ship high-quality code.\"\n",
+ " )\n",
+ "\n",
+ "demo.launch()"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "llms",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.13"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/week2/community-contributions/HistoryBot-Week2Exercise.ipynb b/week2/community-contributions/HistoryBot-Week2Exercise.ipynb
new file mode 100644
index 0000000..58d1728
--- /dev/null
+++ b/week2/community-contributions/HistoryBot-Week2Exercise.ipynb
@@ -0,0 +1,398 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "6a193ef7-41df-42cb-ab35-fb5fa77a78b9",
+ "metadata": {},
+ "source": [
+ "HelloHistory- Learn History On the Go
"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "id": "d813bcdb-fbff-43f8-97ae-28cf1ec2e094",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#Now use everything you've learned from Week 2 to build a full prototype for the technical question/answerer you built in Week 1 Exercise.\n",
+ "\n",
+ "#This should include a Gradio UI, streaming, use of the system prompt to add expertise, and the ability to switch between models. Bonus points if you can demonstrate use of a tool!\n",
+ "\n",
+ "#If you feel bold, see if you can add audio input so you can talk to it, and have it respond with audio. ChatGPT or Claude can help you, or email me if you have questions."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "ace8fd2d-341e-451d-a70e-82fac828299c",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import os\n",
+ "from dotenv import load_dotenv\n",
+ "from openai import OpenAI\n",
+ "import anthropic\n",
+ "from IPython.display import Markdown, display, update_display\n",
+ "import gradio as gr"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "591b90d1-9771-40ae-ad99-6e864465a358",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "OpenAI API Key exists and begins sk\n",
+ "Anthropic API Key exists and begins sk\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Load environment variables in a file called .env\n",
+ "# Print the key prefixes to help with any debugging\n",
+ "\n",
+ "load_dotenv(override=True)\n",
+ "openai_api_key = os.getenv('OPENAI_API_KEY')\n",
+ "anthropic_api_key = os.getenv('ANTHROPIC_API_KEY')\n",
+ "#google_api_key = os.getenv('GOOGLE_API_KEY')\n",
+ "\n",
+ "if openai_api_key:\n",
+ " print(f\"OpenAI API Key exists and begins {openai_api_key[:2]}\")\n",
+ "else:\n",
+ " print(\"OpenAI API Key not set\")\n",
+ " \n",
+ "if anthropic_api_key:\n",
+ " print(f\"Anthropic API Key exists and begins {anthropic_api_key[:2]}\")\n",
+ "else:\n",
+ " print(\"Anthropic API Key not set\")\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "eace2872-0ddb-4b86-ae09-91ad9fc2dd04",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#connect to models\n",
+ "\n",
+ "openai = OpenAI()\n",
+ "\n",
+ "claude = anthropic.Anthropic()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "id": "d5e99852-89f7-41da-84a5-5cf8659faddc",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "system_message = \"You are a helpful tutor teaching people history. You have to answer their questions on historical events.\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "id": "01d3fcb8-967e-4841-809a-b428e80c17c9",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#test function\n",
+ "\n",
+ "def message_gpt(prompt):\n",
+ " messages = [\n",
+ " {\"role\": \"system\", \"content\": system_message},\n",
+ " {\"role\": \"user\", \"content\": prompt}\n",
+ " ]\n",
+ " completion = openai.chat.completions.create(\n",
+ " model='gpt-4o-mini',\n",
+ " messages=messages,\n",
+ " )\n",
+ " return completion.choices[0].message.content"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "id": "a214da55-644f-4469-8167-1b317a7cb8ce",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'World War II was won by the Allies, a coalition of countries that included the United States, the Soviet Union, the United Kingdom, China, and several other nations. The war officially ended in 1945, with the unconditional surrender of Nazi Germany in May and the surrender of Japan in September.'"
+ ]
+ },
+ "execution_count": 7,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "\n",
+ "message_gpt(\"Who won World War II?\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "id": "65b9c8ac-1319-46ed-a5d8-82d98cb3d831",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "\"World War II began on September 1, 1939, when Germany, led by Adolf Hitler, invaded Poland. This invasion was a result of aggressive expansionist policies pursued by Nazi Germany throughout the 1930s, which included the annexation of Austria and the incorporation of Czechoslovakia's Sudetenland.\\n\\nThe invasion of Poland prompted Britain and France to declare war on Germany on September 3, 1939, fulfilling their commitments to support Poland. Tensions had been building in Europe due to unresolved issues from World War I, the rise of totalitarian regimes, and various treaties and alliances. The war would expand rapidly as other nations became involved, eventually leading to a global conflict that lasted until 1945.\""
+ ]
+ },
+ "execution_count": 8,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "\n",
+ "message_gpt(\"How did World War II begin?\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "id": "41618892-8a7b-4871-9d8e-d030fabf1046",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#add streaming \n",
+ "def stream_gpt(prompt):\n",
+ " messages = [\n",
+ " {\"role\": \"system\", \"content\": system_message},\n",
+ " {\"role\": \"user\", \"content\": prompt}\n",
+ " ]\n",
+ " stream = openai.chat.completions.create(\n",
+ " model='gpt-4o-mini',\n",
+ " messages=messages,\n",
+ " stream=True\n",
+ " )\n",
+ " result = \"\"\n",
+ " for chunk in stream:\n",
+ " result += chunk.choices[0].delta.content or \"\"\n",
+ " yield result"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "id": "ad6edb2a-9c2c-4b53-bead-6009f493f281",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "* Running on local URL: http://127.0.0.1:7861\n",
+ "* To create a public link, set `share=True` in `launch()`.\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ ""
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/plain": []
+ },
+ "execution_count": 11,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "view = gr.Interface(\n",
+ " fn=stream_gpt,\n",
+ " inputs=[gr.Textbox(label=\"Ask HistoryBot a question:\")],\n",
+ " outputs=[gr.Markdown(label=\"Response:\")],\n",
+ " flagging_mode=\"never\"\n",
+ ")\n",
+ "view.launch()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "id": "54be6b30-db25-49b9-aecb-e63daa0b6873",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#streaming with Claude\n",
+ "\n",
+ "def stream_claude(prompt):\n",
+ " result = claude.messages.stream(\n",
+ " model=\"claude-3-haiku-20240307\",\n",
+ " max_tokens=1000,\n",
+ " system=system_message,\n",
+ " messages=[\n",
+ " {\"role\": \"user\", \"content\": prompt},\n",
+ " ],\n",
+ " )\n",
+ " response = \"\"\n",
+ " with result as stream:\n",
+ " for text in stream.text_stream:\n",
+ " response += text or \"\"\n",
+ " yield response"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "id": "62d94a31-1b2b-4266-8cfa-16e877240aa8",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "* Running on local URL: http://127.0.0.1:7862\n",
+ "* To create a public link, set `share=True` in `launch()`.\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ ""
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/plain": []
+ },
+ "execution_count": 13,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "view = gr.Interface(\n",
+ " fn=stream_claude,\n",
+ " inputs=[gr.Textbox(label=\"Ask HistoryBot a question:\")],\n",
+ " outputs=[gr.Markdown(label=\"Response:\")],\n",
+ " flagging_mode=\"never\"\n",
+ ")\n",
+ "view.launch()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "id": "b69fdbe0-abc7-429a-aadd-44620035f49e",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# function to select model\n",
+ "\n",
+ "def stream_model(prompt, model):\n",
+ " if model==\"GPT\":\n",
+ " result = stream_gpt(prompt)\n",
+ " elif model==\"Claude\":\n",
+ " result = stream_claude(prompt)\n",
+ " else:\n",
+ " raise ValueError(\"Unknown model\")\n",
+ " yield from result"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "id": "40b2961a-2bb2-4a55-9abb-be967e184db9",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "* Running on local URL: http://127.0.0.1:7864\n",
+ "* To create a public link, set `share=True` in `launch()`.\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ ""
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/plain": []
+ },
+ "execution_count": 16,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "view = gr.Interface(\n",
+ " fn=stream_model,\n",
+ " inputs=[gr.Textbox(label=\"Ask HistoryBot a question\"), gr.Dropdown([\"GPT\", \"Claude\"], label=\"Select model\", value=\"GPT\")],\n",
+ " outputs=[gr.Markdown(label=\"Response:\")],\n",
+ " flagging_mode=\"never\"\n",
+ ")\n",
+ "view.launch()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "e8b78f97-597a-4d4a-8f7a-c6e982290596",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.13"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/week2/community-contributions/Mediterranean Banter.ipynb b/week2/community-contributions/Mediterranean Banter.ipynb
new file mode 100644
index 0000000..5ac089c
--- /dev/null
+++ b/week2/community-contributions/Mediterranean Banter.ipynb
@@ -0,0 +1,252 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "9ab6f493-026f-4950-b244-22c5251b8daa",
+ "metadata": {},
+ "source": [
+ "# Mediterranean Banter"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "4568bc63-679e-4ea1-a9c9-b85dfc386ec7",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import os\n",
+ "from dotenv import load_dotenv\n",
+ "from openai import OpenAI\n",
+ "import anthropic\n",
+ "from IPython.display import Markdown, display, update_display"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "7a852bee-76e6-4538-89a3-0702c2d5f05c",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import google.generativeai"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "c843ac74-4797-4bd0-bed2-dfcaa2f98c41",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "load_dotenv(override=True)\n",
+ "openai_api_key = os.getenv('OPENAI_API_KEY')\n",
+ "anthropic_api_key = os.getenv('ANTHROPIC_API_KEY')\n",
+ "google_api_key = os.getenv('GOOGLE_API_KEY')\n",
+ "\n",
+ "if openai_api_key:\n",
+ " print(f\"OpenAI API Key exists and begins {openai_api_key[:8]}\")\n",
+ "else:\n",
+ " print(\"OpenAI API Key not set\")\n",
+ " \n",
+ "if anthropic_api_key:\n",
+ " print(f\"Anthropic API Key exists and begins {anthropic_api_key[:7]}\")\n",
+ "else:\n",
+ " print(\"Anthropic API Key not set\")\n",
+ "\n",
+ "if google_api_key:\n",
+ " print(f\"Google API Key exists and begins {google_api_key[:8]}\")\n",
+ "else:\n",
+ " print(\"Google API Key not set\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "e4181018-1818-47fa-a3fb-554627a63f69",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "openai = OpenAI()\n",
+ "\n",
+ "claude = anthropic.Anthropic()\n",
+ "\n",
+ "google.generativeai.configure()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "19f833c8-e183-469c-a8c8-1c014889a15d",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "gpt_model_spain = \"gpt-4.1-mini\"\n",
+ "claude_model_italy = \"claude-3-5-haiku-latest\"\n",
+ "gemini_model_france = \"gemini-2.0-flash\"\n",
+ "\n",
+ "gpt_system_spain = \"You are a chatbot who is very argumentative; \\\n",
+ "you disagree with anything other than how beautiful and great South of Spain is.\"\n",
+ "\n",
+ "claude_system_italy = \"You are a very polite, courteous chatbot but you favour south of Italy prasing Amalfi Coast.\"\n",
+ "\n",
+ "gemini_system_france = \"You are neutral. You always accept what other people say and never try to put down other people thoughts.Although, you politely try to include that South of France is beautiful\"\n",
+ "\n",
+ "gpt_messages_spain = [\"Hi there, nothing beats the beauty of Spain and its wonderful beaches.\"]\n",
+ "claude_messages_italy = [\"I agree. I admire the Southern Part of Spain but its not as pretty as Amalfi Coast.\"]\n",
+ "gemini_messages_france = [\"Well, both are good and so is the French Riveria.\"]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "2d426b95-5e7c-49aa-a5a1-9613296eb0d0",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def call_gpt():\n",
+ " messages = [{\"role\": \"system\", \"content\": gpt_system_spain}]\n",
+ " for gpt, claude,gemini in zip(gpt_messages_spain, claude_messages_italy,gemini_messages_france):\n",
+ " messages.append({\"role\": \"assistant\", \"content\": gpt})\n",
+ " messages.append({\"role\": \"user\", \"content\": claude})\n",
+ " messages.append({\"role\": \"user\", \"content\": gemini})\n",
+ " completion = openai.chat.completions.create(\n",
+ " model=gpt_model_spain,\n",
+ " messages=messages\n",
+ " )\n",
+ " return completion.choices[0].message.content"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "3fc9a696-3145-4f37-873b-539647f2fc0b",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "call_gpt()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "63910faa-a122-4261-82a0-7530c6c5749a",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def call_claude():\n",
+ " messages = []\n",
+ " for gpt_spain, claude_italy,gemini_france in zip(gpt_messages_spain, claude_messages_italy,gemini_messages_france):\n",
+ " messages.append({\"role\": \"user\", \"content\": gpt_spain})\n",
+ " messages.append({\"role\": \"user\", \"content\": gemini_france})\n",
+ " messages.append({\"role\": \"assistant\", \"content\": claude_italy})\n",
+ " messages.append({\"role\": \"user\", \"content\": gpt_messages_spain[-1]})\n",
+ " messages.append({\"role\": \"user\", \"content\": gemini_messages_france[-1]})\n",
+ " message = claude.messages.create(\n",
+ " model=claude_model_italy,\n",
+ " system=claude_system_italy,\n",
+ " messages=messages,\n",
+ " max_tokens=500\n",
+ " )\n",
+ " return message.content[0].text"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "d3ab6aa2-a462-4fb3-bb6a-dc6b971827fa",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "call_claude()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "114cb7eb-0915-46ac-b285-e40acf4a9ffb",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def call_gemini():\n",
+ " messages=[]\n",
+ " for gpt_spain, claude_italy,gemini_france in zip(gpt_messages_spain, claude_messages_italy,gemini_messages_france):\n",
+ " messages.append({\"role\": \"user\", \"content\": gpt_spain})\n",
+ " messages.append({\"role\": \"user\", \"content\": claude_italy})\n",
+ " messages.append({\"role\": \"assistant\", \"content\": gemini_france})\n",
+ " messages.append({\"role\": \"user\", \"content\": gpt_messages_spain[-1]})\n",
+ " messages.append({\"role\": \"user\", \"content\": claude_messages_italy[-1]})\n",
+ " gemini = google.generativeai.GenerativeModel(\n",
+ " model_name='gemini-2.0-flash',\n",
+ " system_instruction=gemini_system_france\n",
+ " )\n",
+ " dialogue_text = \"\\n\".join(f\"{m['role']}: {m['content']}\" for m in messages)\n",
+ " response = gemini.generate_content(dialogue_text)\n",
+ " return response.text\n",
+ " \n",
+ " "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "e3acf708-f9b1-4a6d-b3e1-823c96d00555",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "call_gemini()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "c103430e-68c7-4cc6-8a43-6b5aec7fdc96",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "gpt_messages_spain = [\"Hi there, nothing beats the beauty of Spain and its wonderful beaches.\"]\n",
+ "claude_messages_italy = [\"I agree. I admire the Southern Part of Spain but its not as pretty as Amalfi Coast.\"]\n",
+ "gemini_messages_france = [\"Well, both are good and so is the French Riveria.\"]\n",
+ "\n",
+ "print(f\"GPT:\\n{gpt_messages_spain[0]}\\n\")\n",
+ "print(f\"Claude:\\n{claude_messages_italy[0]}\\n\")\n",
+ "print(f\"Gemini:\\n{gemini_messages_france[0]}\\n\")\n",
+ "\n",
+ "for i in range(5):\n",
+ " gpt_next = call_gpt()\n",
+ " print(f\"GPT:\\n{gpt_next}\\n\")\n",
+ " gpt_messages_spain.append(gpt_next)\n",
+ " \n",
+ " claude_next = call_claude()\n",
+ " print(f\"Claude:\\n{claude_next}\\n\")\n",
+ " claude_messages_italy.append(claude_next)\n",
+ "\n",
+ " gemini_next = call_gemini()\n",
+ " print(f\"Gemini:\\n{gemini_next}\\n\")\n",
+ " gemini_messages_france.append(gemini_next)\n",
+ "\n"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.13"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/week2/community-contributions/SushiRestaurant.ipynb b/week2/community-contributions/SushiRestaurant.ipynb
new file mode 100644
index 0000000..ad32c65
--- /dev/null
+++ b/week2/community-contributions/SushiRestaurant.ipynb
@@ -0,0 +1,181 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "757905af-7f93-4dca-9526-063bc93a78c7",
+ "metadata": {},
+ "source": [
+ "# Sakana-ya (้ญๅฑ) Sushi\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "9a6721fb-efca-4412-a0a7-cc8e6c4ced76",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import os\n",
+ "from dotenv import load_dotenv\n",
+ "from openai import OpenAI\n",
+ "import gradio as gr\n",
+ "import json"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "b0fa458f-f73f-491c-b666-95db4b91f571",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "load_dotenv(override=True)\n",
+ "openai_api_key = os.getenv('OPENAI_API_KEY')\n",
+ "anthropic_api_key = os.getenv('ANTHROPIC_API_KEY')\n",
+ "google_api_key = os.getenv('GOOGLE_API_KEY')\n",
+ "\n",
+ "if openai_api_key:\n",
+ " print(f\"OpenAI API Key exists and begins {openai_api_key[:8]}\")\n",
+ "else:\n",
+ " print(\"OpenAI API Key not set\")\n",
+ " \n",
+ "if anthropic_api_key:\n",
+ " print(f\"Anthropic API Key exists and begins {anthropic_api_key[:7]}\")\n",
+ "else:\n",
+ " print(\"Anthropic API Key not set\")\n",
+ "\n",
+ "if google_api_key:\n",
+ " print(f\"Google API Key exists and begins {google_api_key[:8]}\")\n",
+ "else:\n",
+ " print(\"Google API Key not set\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "aa2846f2-e09c-421d-9774-c04961a79800",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "openai = OpenAI()\n",
+ "MODEL = 'gpt-4o-mini'"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "7672ecdf-cf50-4b96-887a-b0a4eb5bbbf5",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ " \n",
+ "menu = {\n",
+ " \"Nigiri (1 pc)\": {\n",
+ " \"Salmon\": 4.25,\n",
+ " \"Tuna\": 4.75,\n",
+ " \"Yellowtail\": 5.00,\n",
+ " \"Eel\": 5.25,\n",
+ " \"Tamago\": 3.00,\n",
+ " },\n",
+ " \"Sashimi (3 pc)\": {\n",
+ " \"Salmon\": 8.50,\n",
+ " \"Tuna\": 9.00,\n",
+ " \"Yellowtail\": 9.50,\n",
+ " \"Octopus\": 8.00,\n",
+ " },\n",
+ " \"Classic Rolls (6 pc)\": {\n",
+ " \"California\": 6.50,\n",
+ " \"Spicy Tuna\": 7.50,\n",
+ " \"Philadelphia\": 7.25,\n",
+ " \"Cucumber\": 4.50,\n",
+ " \"Avocado\": 4.75,\n",
+ " },\n",
+ " \"Specialty Rolls (8 pc)\": {\n",
+ " \"Dragon\": 13.50,\n",
+ " \"Rainbow\": 14.00,\n",
+ " \"Crunchy Shrimp\": 12.50,\n",
+ " \"Volcano\": 13.00,\n",
+ " \"Spider\": 14.50,\n",
+ " },\n",
+ " \"Appetizers\": {\n",
+ " \"Edamame\": 5.00,\n",
+ " \"Gyoza (5)\": 6.50,\n",
+ " \"Miso Soup\": 3.00,\n",
+ " \"Seaweed Salad\": 5.50,\n",
+ " },\n",
+ " \"Beverages\": {\n",
+ " \"Green Tea\": 2.50,\n",
+ " \"Ramune Soda\": 3.00,\n",
+ " \"Sparkling Water\": 2.75,\n",
+ " },\n",
+ " \"Desserts\": {\n",
+ " \"Mochi Ice Cream (2)\": 5.00,\n",
+ " \"Matcha Cheesecake\": 6.50,\n",
+ " },\n",
+ " }"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "99914500-3630-4fea-987c-d19c760994c6",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def chat(message, history):\n",
+ " system_message = \"You are a helpful assistant for Sakana-ya (้ญๅฑ) Sushi restaurant.\\\n",
+ " Help out with information and if you dont know something just say you cant help with that.\"\n",
+ " system_message += json.dumps(menu)\n",
+ " system_message+=\"If something is not in the menu, we dont serve it.\\\n",
+ " If we dont have a dish just mention it that we dont offer it. \"\n",
+ "\n",
+ " sushi_exotic = [\n",
+ " {\"role\": \"user\", \"content\": \"Do you have aji?\"},\n",
+ " {\"role\": \"user\", \"content\": \"We currently dont have shun its available only during the season i.e in May.\"},\n",
+ " {\"role\": \"user\", \"content\": \"What about buri?\"},\n",
+ " {\"role\": \"user\", \"content\": \"Thats seasonal as well only during December. Do visit us during that time.\"},\n",
+ " \n",
+ " ]\n",
+ " \n",
+ " messages = [{\"role\": \"system\", \"content\": system_message}]+ sushi_exotic + history + [{\"role\": \"user\", \"content\": message}]\n",
+ " stream = openai.chat.completions.create(model=MODEL, messages=messages, stream=True)\n",
+ "\n",
+ " response = \"\"\n",
+ " for chunk in stream:\n",
+ " response += chunk.choices[0].delta.content or ''\n",
+ " yield response"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "a5c61d91-abee-4ada-9a42-ae87cf53fcff",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "gr.ChatInterface(fn=chat, type=\"messages\").launch()"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.13"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/week2/community-contributions/brochure-builder-with-gradio.ipynb b/week2/community-contributions/brochure-builder-with-gradio.ipynb
new file mode 100644
index 0000000..42f41b7
--- /dev/null
+++ b/week2/community-contributions/brochure-builder-with-gradio.ipynb
@@ -0,0 +1,456 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "9905f163-759f-474b-8f7a-7d14da0df44d",
+ "metadata": {},
+ "source": [
+ "### BUSINESS CHALLENGE: Using Multi-shot Prompting\n",
+ "#### Day 5\n",
+ "\n",
+ "Create a product that builds a Brochure for a company to be used for prospective clients, investors and potential recruits.\n",
+ "\n",
+ "We will be provided a company name and their primary website."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "id": "a0895f24-65ff-4624-8ae0-15d2d400d8f0",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# imports\n",
+ "# If these fail, please check you're running from an 'activated' environment with (llms) in the command prompt\n",
+ "\n",
+ "import os\n",
+ "import requests\n",
+ "import json\n",
+ "from typing import List\n",
+ "from dotenv import load_dotenv\n",
+ "from bs4 import BeautifulSoup\n",
+ "from IPython.display import Markdown, display, update_display\n",
+ "from openai import OpenAI\n",
+ "import gradio as gr"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "7794aa70-5962-4669-b86f-b53639f4f9ea",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "OpenAI API Key exists and begins sk-proj-\n",
+ "Anthropic API Key exists and begins sk-ant-\n",
+ "Google API Key exists and begins AIzaSyCf\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Initialize and constants\n",
+ "\n",
+ "# Load environment variables in a file called .env\n",
+ "# Print the key prefixes to help with any debugging\n",
+ "\n",
+ "load_dotenv(override=True)\n",
+ "openai_api_key = os.getenv('OPENAI_API_KEY')\n",
+ "anthropic_api_key = os.getenv('ANTHROPIC_API_KEY')\n",
+ "google_api_key = os.getenv('GOOGLE_API_KEY')\n",
+ "\n",
+ "if openai_api_key:\n",
+ " print(f\"OpenAI API Key exists and begins {openai_api_key[:8]}\")\n",
+ "else:\n",
+ " print(\"OpenAI API Key not set\")\n",
+ " \n",
+ "if anthropic_api_key:\n",
+ " print(f\"Anthropic API Key exists and begins {anthropic_api_key[:7]}\")\n",
+ "else:\n",
+ " print(\"Anthropic API Key not set\")\n",
+ "\n",
+ "if google_api_key:\n",
+ " print(f\"Google API Key exists and begins {google_api_key[:8]}\")\n",
+ "else:\n",
+ " print(\"Google API Key not set\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "cfb690e2-4940-4dc8-8f32-5c2dab3c19da",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Connect to OpenAI\n",
+ "\n",
+ "openai = OpenAI()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "81022472-755e-4a87-bd5d-58babb09e94b",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "gpt_model = \"gpt-4.1-mini\"\n",
+ "claude_model = \"claude-3-5-haiku-latest\"\n",
+ "gemini_model = \"gemini-2.5-flash\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "id": "63bf8631-2746-4255-bec1-522855d3e812",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# A class to represent a Webpage\n",
+ "\n",
+ "# Some websites need you to use proper headers when fetching them:\n",
+ "headers = {\n",
+ " \"User-Agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36\"\n",
+ "}\n",
+ "\n",
+ "class Website:\n",
+ " \"\"\"\n",
+ " A utility class to represent a Website that we have scraped, now with links\n",
+ " \"\"\"\n",
+ "\n",
+ " def __init__(self, url):\n",
+ " self.url = url\n",
+ " response = requests.get(url, headers=headers)\n",
+ " self.body = response.content\n",
+ " soup = BeautifulSoup(self.body, 'html.parser')\n",
+ " self.title = soup.title.string if soup.title else \"No title found\"\n",
+ " if soup.body:\n",
+ " for irrelevant in soup.body([\"script\", \"style\", \"img\", \"input\"]):\n",
+ " irrelevant.decompose()\n",
+ " self.text = soup.body.get_text(separator=\"\\n\", strip=True)\n",
+ " else:\n",
+ " self.text = \"\"\n",
+ " links = [link.get('href') for link in soup.find_all('a')]\n",
+ " self.links = [link for link in links if link]\n",
+ "\n",
+ " def get_contents(self):\n",
+ " return f\"Webpage Title:\\n{self.title}\\nWebpage Contents:\\n{self.text}\\n\\n\""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "1e7bb527-e769-4245-bb91-ae65e64593ff",
+ "metadata": {},
+ "source": [
+ "## First step: Have LLM figure out which links are relevant\n",
+ "\n",
+ "### Use a call to the LLM to read the links on a webpage, and respond in structured JSON. "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "id": "1ce303ae-b967-4261-aadc-02dafa54db4a",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "link_system_prompt = \"You are provided with a list of links found on a webpage. \\\n",
+ "You are able to decide which of the links would be most relevant to include in a brochure about the company, \\\n",
+ "such as links to an About page, or a Company page, or Careers/Jobs pages.\\n\"\n",
+ "link_system_prompt += \"You should respond in JSON as in this example:\"\n",
+ "link_system_prompt += \"\"\"\n",
+ "{\n",
+ " \"links\": [\n",
+ " {\"type\": \"about page\", \"url\": \"https://full.url/goes/here/about\"},\n",
+ " {\"type\": \"careers page\", \"url\": \"https://another.full.url/careers\"}\n",
+ " ]\n",
+ "}\n",
+ "\"\"\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "id": "d24a4c0c-a1d1-4897-b2a7-4128d25c2e08",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def get_links_user_prompt(website):\n",
+ " user_prompt = f\"Here is the list of links on the website of {website.url} - \"\n",
+ " user_prompt += \"please decide which of these are relevant web links for a brochure about the company, respond with the full https URL in JSON format. \\\n",
+ "Do not include Terms of Service, Privacy, email links.\\n\"\n",
+ " user_prompt += \"Links (some might be relative links):\\n\"\n",
+ " user_prompt += \"\\n\".join(website.links)\n",
+ " return user_prompt"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "id": "8103fc11-5bc0-41c4-8c97-502c9e96429c",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def get_links(url, model): # 1st inference\n",
+ " website = Website(url)\n",
+ " response = openai.chat.completions.create(\n",
+ " model=model,\n",
+ " messages=[\n",
+ " {\"role\": \"system\", \"content\": link_system_prompt},\n",
+ " {\"role\": \"user\", \"content\": get_links_user_prompt(website)}\n",
+ " ],\n",
+ " response_format={\"type\": \"json_object\"}\n",
+ " )\n",
+ " result = response.choices[0].message.content\n",
+ " return json.loads(result)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "id": "dc84a695-515d-4292-9a95-818f4fe3d20e",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "huggingface = Website(\"https://huggingface.co\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "91896908-1632-41fc-9b8b-39a7638d8dd1",
+ "metadata": {},
+ "source": [
+ "## Second step: make the brochure!\n",
+ "\n",
+ "Assemble all the details into another prompt to GPT4-o"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "id": "ab7c54e3-e654-4b1f-8671-09194b628aa0",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def get_all_details(url, model): # 1st inference wrapper\n",
+ " result = \"Landing page:\\n\"\n",
+ " result += Website(url).get_contents()\n",
+ " links = get_links(url, model) # inference\n",
+ " # print(\"Found links:\", links)\n",
+ " for link in links[\"links\"]:\n",
+ " result += f\"\\n\\n{link['type']}\\n\"\n",
+ " result += Website(link[\"url\"]).get_contents()\n",
+ " return result"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "id": "ea9f54d1-a248-4c56-a1de-6633193de5bf",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "system_prompt = \"You are an assistant that analyzes the contents of several relevant pages from a company website \\\n",
+ "and creates a short humorous, entertaining, jokey brochure about the company for prospective customers, investors and recruits. Respond in markdown.\\\n",
+ "Include details of company culture, customers and careers/jobs if you have the information.\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "id": "13412c85-badd-4d79-a5ac-8283e4bb832f",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def get_brochure_user_prompt(company_name, url, model):\n",
+ " user_prompt = f\"You are looking at a company called: {company_name}\\n\"\n",
+ " user_prompt += f\"Here are the contents of its landing page and other relevant pages; use this information to build a short brochure of the company.\\n\"\n",
+ " user_prompt += get_all_details(url, model) # inference wrapper\n",
+ " user_prompt = user_prompt[:5_000] # Truncate if more than 5,000 characters\n",
+ " return user_prompt"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "id": "107a2100-3f7d-4f16-8ba7-b5da602393c6",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def stream_gpt(company_name, url):\n",
+ " stream = openai.chat.completions.create(\n",
+ " model=gpt_model,\n",
+ " messages=[\n",
+ " {\"role\": \"system\", \"content\": system_prompt},\n",
+ " {\"role\": \"user\", \"content\": get_brochure_user_prompt(company_name, url, gpt_model)}\n",
+ " ],\n",
+ " stream=True\n",
+ " )\n",
+ " \n",
+ " result = \"\"\n",
+ " for chunk in stream:\n",
+ " result += chunk.choices[0].delta.content or \"\"\n",
+ " yield result"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "id": "eaf61e44-537a-41ff-a82c-9525df8abc83",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "claude_via_openai_client = OpenAI(\n",
+ " api_key=anthropic_api_key,\n",
+ " base_url=\"https://api.anthropic.com/v1\" \n",
+ ")\n",
+ "\n",
+ "def stream_claude(company_name, url):\n",
+ " result = claude_via_openai_client.chat.completions.create(\n",
+ " model=claude_model,\n",
+ " messages=[\n",
+ " {\"role\": \"system\", \"content\": system_prompt},\n",
+ " {\"role\": \"user\", \"content\": get_brochure_user_prompt(company_name, url, claude_model)}\n",
+ " ],\n",
+ " stream=True\n",
+ " )\n",
+ " \n",
+ " response = \"\"\n",
+ " with result as stream:\n",
+ " for text in stream.text_stream:\n",
+ " response += text or \"\"\n",
+ " yield response"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "id": "93e75fca-e54e-4637-86f1-4acc04b04d65",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "gemini_via_openai_client = OpenAI(\n",
+ " api_key=google_api_key, \n",
+ " base_url=\"https://generativelanguage.googleapis.com/v1beta/openai/\"\n",
+ ")\n",
+ "\n",
+ "def stream_gemini(company_name, url):\n",
+ " result = gemini_via_openai_client.chat.completions.create(\n",
+ " model=gemini_model,\n",
+ " messages=[\n",
+ " {\"role\": \"system\", \"content\": system_prompt},\n",
+ " {\"role\": \"user\", \"content\": get_brochure_user_prompt(company_name, url, gemini_model)}\n",
+ " ],\n",
+ " stream=True\n",
+ " )\n",
+ " \n",
+ " response = \"\"\n",
+ " with result as stream:\n",
+ " for text in stream.text_stream:\n",
+ " response += text or \"\"\n",
+ " yield response"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "id": "26cbe9b5-3603-49a1-a676-75c7ddaacdb8",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# stream_gpt(\"HuggingFace\", \"https://huggingface.co\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "id": "f19be4c0-71a1-427e-b3dc-e1896e2c078b",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def stream_model(company_name, url, model):\n",
+ " yield \"\"\n",
+ " if model==\"GPT\":\n",
+ " result = stream_gpt(company_name, url)\n",
+ " elif model==\"Claude\":\n",
+ " result = stream_claude(company_name, url)\n",
+ " elif model==\"Gemini\":\n",
+ " result = stream_gemini(company_name, url)\n",
+ " else:\n",
+ " raise ValueError(\"Unknown model\")\n",
+ " yield from result"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "id": "ab510f66-b25c-4c25-92d0-e3c735b8b5fa",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "* Running on local URL: http://127.0.0.1:7871\n",
+ "* To create a public link, set `share=True` in `launch()`.\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ ""
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/plain": []
+ },
+ "execution_count": 18,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "view = gr.Interface(\n",
+ " fn=stream_model,\n",
+ " inputs=[gr.Textbox(label=\"Company\"), gr.Textbox(label=\"URL\"), gr.Dropdown([\"GPT\", \n",
+ " # \"Claude\", #TODO\n",
+ " # \"Gemini\"\n",
+ " ], label=\"Select model\", value=\"GPT\")],\n",
+ " outputs=[gr.Markdown(label=\"Response:\")],\n",
+ " flagging_mode=\"never\"\n",
+ ")\n",
+ "view.launch()"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.13"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/week2/community-contributions/day1_3_way_conversation_levzhitnik.ipynb b/week2/community-contributions/day1_3_way_conversation_levzhitnik.ipynb
new file mode 100644
index 0000000..e5c0388
--- /dev/null
+++ b/week2/community-contributions/day1_3_way_conversation_levzhitnik.ipynb
@@ -0,0 +1,255 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "596b764a-2ece-4cb0-91c7-5317b8b2c65f",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import os\n",
+ "from dotenv import load_dotenv\n",
+ "from openai import OpenAI\n",
+ "from IPython.display import Markdown, display, update_display"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "191079a8-fcb0-45fa-a954-9e92e3baa250",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "load_dotenv(override=True)\n",
+ "openai_api_key = os.getenv('OPENAI_API_KEY')\n",
+ "anthropic_api_key = os.getenv('ANTHROPIC_API_KEY')\n",
+ "google_api_key = os.getenv('GOOGLE_API_KEY')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "3a0f19ff-c936-469f-9fa1-c09b5c126263",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "gpt_model = \"gpt-4.1-mini\"\n",
+ "claude_model = \"claude-3-5-haiku-latest\"\n",
+ "gemini_model = \"gemini-2.5-flash\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "c1ffa25e-8250-4a86-951a-af44f1369336",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "gpt_client = OpenAI(\n",
+ " api_key=openai_api_key\n",
+ ")\n",
+ "\n",
+ "claude_client = OpenAI(\n",
+ " api_key=anthropic_api_key,\n",
+ " base_url=\"https://api.anthropic.com/v1/\"\n",
+ ")\n",
+ "\n",
+ "gemini_client = OpenAI(\n",
+ " api_key=google_api_key,\n",
+ " base_url=\"https://generativelanguage.googleapis.com/v1beta/openai/\"\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "eb8a203d-bdc7-40ee-a456-d47bdc71b07f",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Tests\n",
+ "\n",
+ "messages = [{\"role\": \"system\", \"content\": \"You are a helpful assistant.\"},\n",
+ " {\"role\": \"user\", \"content\": \"Howdy partner!\"}]\n",
+ "\n",
+ "gpt_response = gpt_client.chat.completions.create(\n",
+ " model=gpt_model,\n",
+ " messages=messages,\n",
+ " temperature=0.5\n",
+ ")\n",
+ "print(f\"GPT: {gpt_response.choices[0].message.content}\")\n",
+ "\n",
+ "claude_response = claude_client.chat.completions.create(\n",
+ " model=claude_model,\n",
+ " messages=messages,\n",
+ " temperature=0.5\n",
+ ")\n",
+ "print(f\"Claude: {claude_response.choices[0].message.content}\")\n",
+ "\n",
+ "gemini_response = gemini_client.chat.completions.create(\n",
+ " model=gemini_model,\n",
+ " messages=messages,\n",
+ " temperature=0.5\n",
+ ")\n",
+ "print(f\"Gemini: {gemini_response.choices[0].message.content}\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "d140561e-fbf8-4741-b0bd-f850524bd6b3",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "gpt_system = \"You are 'user_1'. You are snarky, entitled, and argumentative. Your role is to try and argue about anything and everything, and always have the last word, and never back down.\"\n",
+ "claude_system = \"You are 'user_2'. You are a sharp debater. You always debate every argument, and you do everything you can to be the debate winner. You don't stop until you have the upper hand.\"\n",
+ "gemini_system = \"You are 'user_3'. You are a mediator, coach and philosopher. Your job is to bring two sides to an agreement and have them stop arguing.\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "b2b26a34-eb36-41c1-be2d-fc8154218897",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "apis = {\n",
+ " \"gpt\": {\n",
+ " \"name\": \"gpt\",\n",
+ " \"user_name\": \"Gapetto\",\n",
+ " \"client\": gpt_client,\n",
+ " \"model\": gpt_model,\n",
+ " \"system\": gpt_system,\n",
+ " \"messages\": [],\n",
+ " },\n",
+ " \"claude\": {\n",
+ " \"name\": \"claude\",\n",
+ " \"user_name\": \"Claudia\",\n",
+ " \"client\": claude_client,\n",
+ " \"model\": claude_model,\n",
+ " \"system\": claude_system,\n",
+ " \"messages\": [],\n",
+ " },\n",
+ " \"gemini\": {\n",
+ " \"name\": \"gemini\",\n",
+ " \"user_name\": \"Germione\",\n",
+ " \"client\": gemini_client,\n",
+ " \"model\": gemini_model,\n",
+ " \"system\": gemini_system,\n",
+ " \"messages\": []\n",
+ " }\n",
+ "}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "88bb7277-45dc-41b4-827c-b2e5a8b76675",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def call_llm(name: str):\n",
+ " llm = apis[name]\n",
+ " context = [{\"role\": \"system\", \"content\": llm[\"system\"]}]\n",
+ " \n",
+ " gpt_role, gpt_name = (\"assistant\", \"\") if name == \"gpt\" else (\"user\", f'{apis[\"gpt\"][\"user_name\"]}: ')\n",
+ " claude_role, claude_name = (\"assistant\", \"\") if name == \"claude\" else (\"user\", f'{apis[\"claude\"][\"user_name\"]}: ')\n",
+ " gemini_role, gemini_name = (\"assistant\", \"\") if name == \"gemini\" else (\"user\", f'{apis[\"gemini\"][\"user_name\"]}: ')\n",
+ " \n",
+ " for gpt, claude, gemini in zip(apis[\"gpt\"][\"messages\"], apis[\"claude\"][\"messages\"], apis[\"gemini\"][\"messages\"]):\n",
+ " context.append({\"role\": gpt_role, \"content\": f\"{gpt_name}{gpt}\"})\n",
+ " context.append({\"role\": claude_role, \"content\": f\"{claude_name}{claude}\"})\n",
+ " context.append({\"role\": gemini_role, \"content\": f\"{gemini_name}{gemini}\"})\n",
+ " \n",
+ " for i, key in enumerate(apis.keys()):\n",
+ " if key != name:\n",
+ " if len(apis[key][\"messages\"]) > len(llm[\"messages\"]):\n",
+ " context.append({\"role\": \"user\", \"content\": f'{apis[key][\"user_name\"]}: {apis[key][\"messages\"][-1]}'})\n",
+ " \n",
+ " response = llm[\"client\"].chat.completions.create(\n",
+ " model=llm[\"model\"],\n",
+ " messages=context,\n",
+ " temperature=0.7\n",
+ " )\n",
+ "\n",
+ " message = response.choices[0].message.content\n",
+ " llm[\"messages\"].append(message)\n",
+ " return message"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "4fc73a2e-d8de-4a39-bfa2-67b16c231869",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "apis[\"gpt\"][\"messages\"] = [\"Hi\"]\n",
+ "apis[\"claude\"][\"messages\"] = [\"Hi\"]\n",
+ "apis[\"gemini\"][\"messages\"] = [\"Lord of the Rings or Harry Potter?\"] # Obviously LOTR."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "3810fbaf-94d1-4750-8e13-812d2e05b2d7",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "gpt_response = call_llm(\"gpt\")\n",
+ "display(Markdown(f\"### Gapetto:\\n{gpt_response}\\n\\n\"))\n",
+ "\n",
+ "claude_response = call_llm(\"claude\")\n",
+ "display(Markdown(f\"### Claudia:\\n{claude_response}\\n\\n\"))\n",
+ "\n",
+ "gemini_response = call_llm(\"gemini\")\n",
+ "display(Markdown(f\"### Germione:\\n{gemini_response}\\n\\n\"))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "e87b2ffc-6197-401a-97ca-7f51ac1677f2",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "apis[\"gpt\"][\"messages\"] = [\"Hi\"]\n",
+ "apis[\"claude\"][\"messages\"] = [\"Hi\"]\n",
+ "apis[\"gemini\"][\"messages\"] = [\"Lord of the Rings or Harry Potter?\"]\n",
+ "\n",
+ "for i in range(5):\n",
+ " display(Markdown(f\"## Round {i+1}:\\n\\n\"))\n",
+ " \n",
+ " gpt_response = call_llm(\"gpt\")\n",
+ " display(Markdown(f\"### Gapetto:\\n{gpt_response}\\n\\n\"))\n",
+ "\n",
+ " claude_response = call_llm(\"claude\")\n",
+ " display(Markdown(f\"### Claudia:\\n{claude_response}\\n\\n\"))\n",
+ "\n",
+ " gemini_response = call_llm(\"gemini\")\n",
+ " display(Markdown(f\"### Germione:\\n{gemini_response}\\n\\n\"))"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.13"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/week2/community-contributions/day5_stock-assistant-with-tools.ipynb b/week2/community-contributions/day5_stock-assistant-with-tools.ipynb
new file mode 100644
index 0000000..1e129aa
--- /dev/null
+++ b/week2/community-contributions/day5_stock-assistant-with-tools.ipynb
@@ -0,0 +1,968 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "bcb31876-4d8c-41ef-aa24-b8c78dfd5808",
+ "metadata": {},
+ "source": [
+ "# Project - Stock Information AI Assistant\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "b7bd1bd7-19d9-4c4b-bc4b-9bc9cca8bd0f",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "!pip install finnhub-python"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "8b50bbe2-c0b1-49c3-9a5c-1ba7efa2bcb4",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# imports\n",
+ "\n",
+ "import os\n",
+ "import json\n",
+ "from dotenv import load_dotenv\n",
+ "from openai import OpenAI\n",
+ "import gradio as gr\n",
+ "import finnhub\n",
+ "from typing import Dict, List, Any, Optional\n",
+ "from datetime import datetime"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "ba0ddc1a-c775-4ed3-9531-ed0c5799e87f",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import logging\n",
+ "\n",
+ "# Configure root logger\n",
+ "logging.basicConfig(\n",
+ " level=logging.INFO, # Set level: DEBUG, INFO, WARNING, ERROR\n",
+ " format=\"%(asctime)s [%(levelname)s] %(message)s\", \n",
+ " force=True # Ensures reconfiguration if you rerun this cell\n",
+ ")\n",
+ "\n",
+ "logger = logging.getLogger(__name__) # Use a global logger object\n",
+ "logger.info(\"Logger initialized!\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "747e8786-9da8-4342-b6c9-f5f69c2e22ae",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Initialization\n",
+ "\n",
+ "load_dotenv(override=True)\n",
+ "\n",
+ "openai_api_key = os.getenv('OPENAI_API_KEY')\n",
+ "FINNHUB_API_KEY = os.getenv(\"FINNHUB_API_KEY\")\n",
+ "\n",
+ "if openai_api_key:\n",
+ " logger.info(f\"OpenAI API Key exists and begins {openai_api_key[:8]}\")\n",
+ "else:\n",
+ " logger.error(\"OpenAI API Key not set\")\n",
+ "\n",
+ "if FINNHUB_API_KEY:\n",
+ " logger.info(f\"FINNHUB_API_KEY exists!\")\n",
+ "else:\n",
+ " logger.error(\"OpenAI API Key not set\")\n",
+ " \n",
+ "MODEL = \"gpt-4.1-mini\" # not using gpt-5-mini as openai doesn't let you stream responses till you are a verified organisation :(\n",
+ "openai = OpenAI()\n",
+ "finnhub_client = finnhub.Client(api_key=FINNHUB_API_KEY)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "ee3aaa9a-5495-42fd-a382-803fbfa92eaf",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "system_message = f\"\"\"\n",
+ "You are \"TickerBot\" โ a concise, factual, educational assistant specializing in U.S. stocks. \n",
+ "Your job: quickly and accurately explain stock and company information in plain English. NEVER give investment advice, buy/sell recommendations, or price predictions.\n",
+ "\n",
+ "## PRIVACY ABOUT IMPLEMENTATION\n",
+ "- Do not reveal any internal implementation details to users. Never display or mention internal tool names, API names, developer notes, configured flags, date-range limits, or other system/developer constraints in user-facing replies.\n",
+ "- All runtime/tool constraints and capability detection are internal. Present only user-facing capabilities in plain language.\n",
+ "\n",
+ "## USER-FACING CAPABILITIES\n",
+ "- When asked \"What can you do?\", list only stock-relevant actions in plain language. Example reply:\n",
+ " \"I can look up tickers, show the latest quotes, provide key company financials and latest earnings details, summarize recent company or market headlines, and give a brief market overview.\"\n",
+ "- Do not list internal utilities or developer tools as user-facing capabilities.\n",
+ "\n",
+ "## GENERAL PRINCIPLES\n",
+ "- Answer only what was asked for. \n",
+ "- Be brief, clear, and professional while still maintaining a warm tone. Use short paragraphs and one-line bullet explanations when requested.\n",
+ "- Return only what the system provides; do not invent, infer, or extrapolate unavailable data.\n",
+ "- Never offer or advertise any feature the environment does not actually support. Avoid offering attachments, direct downloads, or full-text article retrieval unless the system explicitly provides those outputs.\n",
+ "\n",
+ "## Behavior Rules\n",
+ "- Stay professional and neutral at all times. \n",
+ "- Clarify only when user intent is ambiguous; never guess. \n",
+ "- Only disclose information the user explicitly requested. \n",
+ "- Never explain system limits (e.g., API ranges, date limits) ever. \n",
+ "- Summaries should be tight and relevant, not verbose. \n",
+ "\n",
+ "## NEWS & HEADLINES\n",
+ "- When interpreting date-related or temporal reasoning requests (e.g., โlatest earnings,โ โrecent news,โ โQ1 resultsโ) Call `get_current_time` to determine the current date.\n",
+ "- Present news/headlines in concise bullet lines when requested. Default recent-window behavior is internal; do not describe or expose internal default windows or limits to the user.\n",
+ "- If the system only returns headlines/summaries, present those and do not offer to fetch full-text or additional ranges unless the user explicitly asks and the environment supports that action.\n",
+ "\n",
+ "## FOLLOW-UP & CLARIFYING QUESTIONS\n",
+ "- If no matching stock symbol is found, ask the user to clarify the name or ticker. Mention you only support U.S. stocks. If they confirm the symbol but no data exists, state that no results were found.\n",
+ "- Never append unsolicited menus, multi-choice lists, or repeated \"Would you like...\" prompts at the end of a normal reply.\n",
+ "- Ask a single direct clarifying question only when strictly necessary to fulfill the user's request (for example: ambiguous company name or missing ticker). That single question must be the final line of the reply.\n",
+ "- If the user's intent is clear, proceed and return results. Do not request confirmations or offer options unless required to complete the task.\n",
+ "\n",
+ "## MISSING-DATA / NOTE RULES\n",
+ "- Do NOT call out missing/unavailable single fields unless:\n",
+ " 1) the missing field was explicitly requested by the user; OR\n",
+ " 2) multiple (>1) key metrics required to answer the user's request are unavailable and their absence materially prevents a useful answer.\n",
+ "- If condition (1) or (2) applies, include at most one concise \"Note:\" line naming the specific missing field(s) and nothing else.\n",
+ "- Otherwise omit any comment about single, non-central missing fields.\n",
+ "\n",
+ "## ERROR HANDLING\n",
+ "- If a company/ticker cannot be found: \"I couldn't find that company on U.S. exchanges. Please provide the ticker or a clearer company name.\"\n",
+ "- If partial results are returned: present available data and at most one short note: \"Some data wasn't available: , .\"\n",
+ "- If you do not know the answer, say plainly: \"I don't have that information.\"\n",
+ "\n",
+ "## ADVICE / DISCLAIMER (CONDITIONAL)\n",
+ "- Do NOT include an investment-advice disclaimer in normal greetings or factual replies.\n",
+ "- Show a single-line disclaimer only when the user explicitly asks for recommendations/advice or uses clear advice-seeking language (e.g., \"should I buy\", \"recommend\", \"what should I do\", \"financial advice\").\n",
+ "- Disclaimer text when triggered: \"I can't provide investment advice or buy/sell recommendations. I can provide facts and explain metrics.\"\n",
+ "\n",
+ "EXAMPLES (templates โ follow exactly)\n",
+ "\n",
+ "1) Clear factual request (no unsolicited follow-up)\n",
+ "User: \"What's AAPL last trade?\"\n",
+ "Assistant:\n",
+ "Last trade timestamp: 2025-08-28 20:00:00 UTC (U.S. market close โ 4:00 PM ET)\n",
+ "\n",
+ "(โend: no menu, no offersโ)\n",
+ "\n",
+ "2) Ambiguous request โ single clarifying question only\n",
+ "User: \"Show me Apple\"\n",
+ "Assistant:\n",
+ "I found two matches. Do you mean Apple Inc. (AAPL) or Apple Hospitality REIT (APLE)?\n",
+ "\n",
+ "(โend: single question final lineโ)\n",
+ "\n",
+ "3) News returned as headlines only (do not offer unsupported fetches)\n",
+ "User: \"Recent NVDA news\"\n",
+ "Assistant:\n",
+ "- 2025-08-28: Headline A โ short summary.\n",
+ "- 2025-08-26: Headline B โ short summary.\n",
+ "(โend: do not offer full-article fetch or extended-range offers unless explicitly requested and supportedโ)\n",
+ "\"\"\"\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "fdf1a2b0-07be-47a0-9ce3-14d21b48c8f2",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def get_current_time() -> Dict[str, Any]:\n",
+ " \"\"\"\n",
+ " Retrieve the current UTC time in ISO format with timezone.\n",
+ " Returns a dictionary for consistency with other tools.\n",
+ " \"\"\"\n",
+ " try:\n",
+ " current_time = datetime.utcnow().isoformat() + 'Z'\n",
+ " return {\n",
+ " \"success\": True,\n",
+ " \"current_time\": current_time\n",
+ " }\n",
+ " except Exception as e:\n",
+ " return {\"success\": False, \"error\": f\"Failed to get time: {str(e)[:100]}\"}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "12d912fc-91fb-469e-9572-2876a099f5aa",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "get_current_time_function = {\n",
+ " \"name\": \"get_current_time\",\n",
+ " \"description\": \"Get the current UTC time in ISO format (YYYY-MM-DDTHH:MM:SS.ssssssZ). Useful for temporal reasoning, date calculations, or setting time ranges for queries like news.\",\n",
+ " \"parameters\": {\n",
+ " \"type\": \"object\",\n",
+ " \"properties\": {}, # No parameters needed\n",
+ " \"required\": []\n",
+ " }\n",
+ "}\n",
+ "get_current_time_tool = {\"type\": \"function\", \"function\": get_current_time_function}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "61a2a15d-b559-4844-b377-6bd5cb4949f6",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def validate_symbol(symbol: str) -> bool:\n",
+ " \"\"\"Validate stock symbol format\"\"\"\n",
+ " if not symbol or not isinstance(symbol, str):\n",
+ " return False\n",
+ " return symbol.isalnum() and 1 <= len(symbol) <= 5 and symbol.isupper()\n",
+ "\n",
+ "def search_symbol(query: str) -> Dict[str, Any]:\n",
+ " \"\"\"Search for stock symbol using Finnhub client\"\"\"\n",
+ " logger.info(f\"Tool search_symbol called for {query}\")\n",
+ " try:\n",
+ " if not query or len(query.strip()) < 1:\n",
+ " return {\"success\": False, \"error\": \"Invalid search query\"}\n",
+ " \n",
+ " query = query.strip()[:50]\n",
+ " result = finnhub_client.symbol_lookup(query)\n",
+ " logger.info(f\"Tool search_symbol {result}\")\n",
+ " \n",
+ " if result.get(\"result\") and len(result[\"result\"]) > 0:\n",
+ " first_result = result[\"result\"][0]\n",
+ " symbol = first_result.get(\"symbol\", \"\").upper()\n",
+ " \n",
+ " if validate_symbol(symbol):\n",
+ " return {\n",
+ " \"success\": True,\n",
+ " \"symbol\": symbol\n",
+ " }\n",
+ " else:\n",
+ " return {\"success\": False, \"error\": \"Invalid symbol format found\"}\n",
+ " else:\n",
+ " return {\"success\": False, \"error\": \"No matching US stocks found\"}\n",
+ " \n",
+ " except Exception as e:\n",
+ " return {\"success\": False, \"error\": f\"Symbol search failed: {str(e)[:100]}\"}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "173010e3-dfef-4611-8b68-d11256bd5fba",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "search_symbol_function = {\n",
+ " \"name\": \"search_symbol\",\n",
+ " \"description\": \"Search for a stock symbol / ticker symbol based on company name or partial name\",\n",
+ " \"parameters\": {\n",
+ " \"type\": \"object\",\n",
+ " \"properties\": {\n",
+ " \"query\": {\n",
+ " \"type\": \"string\",\n",
+ " \"description\": \"Company name or partial name to search for, extract only relevant name part and pass it here, keep this to less than 50 characters\"\n",
+ " }\n",
+ " },\n",
+ " \"required\": [\n",
+ " \"query\"\n",
+ " ]\n",
+ " }\n",
+ "}\n",
+ "\n",
+ "search_symbol_tool = {\"type\": \"function\", \"function\": search_symbol_function}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "448bb4ce-8e86-4ceb-ab52-96bddfd33337",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def _format_big_number_from_millions(value_millions: Any) -> str:\n",
+ " \"\"\"\n",
+ " Finnhub returns some large metrics (marketCapitalization, enterpriseValue, revenueTTM)\n",
+ " in MILLIONS USD. Convert to full USD and format with M/B/T suffixes.\n",
+ " \"\"\"\n",
+ " if value_millions is None:\n",
+ " return \"Unavailable\"\n",
+ " try:\n",
+ " value = float(value_millions) * 1_000_000 # convert millions -> full USD\n",
+ " except (TypeError, ValueError):\n",
+ " return \"Unavailable\"\n",
+ "\n",
+ " trillion = 1_000_000_000_000\n",
+ " billion = 1_000_000_000\n",
+ " million = 1_000_000\n",
+ "\n",
+ " if value >= trillion:\n",
+ " return f\"{value / trillion:.2f}T USD\"\n",
+ " if value >= billion:\n",
+ " return f\"{value / billion:.2f}B USD\"\n",
+ " if value >= million:\n",
+ " return f\"{value / million:.2f}M USD\"\n",
+ " return f\"{value:.2f} USD\"\n",
+ "\n",
+ "\n",
+ "def _safe_metric(metrics: Dict[str, Any], key: str) -> Any:\n",
+ " \"\"\"\n",
+ " Return metric value if present; otherwise \"Unavailable\".\n",
+ " We intentionally return the raw value for numeric metrics (no rounding/format)\n",
+ " except for the specially formatted big-number fields handled elsewhere.\n",
+ " \"\"\"\n",
+ " if metrics is None:\n",
+ " return \"Unavailable\"\n",
+ " val = metrics.get(key)\n",
+ " return val if val is not None else \"Unavailable\"\n",
+ "\n",
+ "\n",
+ "def get_company_financials(symbol: str) -> Dict[str, Any]:\n",
+ " \"\"\"\n",
+ " Fetch and return a curated set of 'basic' financial metrics for `symbol`.\n",
+ " - Calls finnhub_client.company_basic_financials(symbol, 'all')\n",
+ " - Formats market cap, enterprise value, revenue (Finnhub returns these in millions)\n",
+ " - Returns success flag and readable keys\n",
+ " \"\"\"\n",
+ " logger.info(f\"Tool get_company_financials called for {symbol}\")\n",
+ " try:\n",
+ " if not symbol or not symbol.strip():\n",
+ " return {\"success\": False, \"error\": \"Invalid stock symbol\"}\n",
+ "\n",
+ " symbol = symbol.strip().upper()\n",
+ "\n",
+ " # --- API Call ---\n",
+ " financials_resp = finnhub_client.company_basic_financials(symbol, \"all\")\n",
+ "\n",
+ " # Finnhub places primary values under \"metric\"\n",
+ " metrics = financials_resp.get(\"metric\", {})\n",
+ " if not metrics:\n",
+ " return {\"success\": False, \"error\": \"No financial metrics found\"}\n",
+ "\n",
+ " # --- Build result using helpers ---\n",
+ " result = {\n",
+ " \"success\": True,\n",
+ " \"symbol\": symbol,\n",
+ " \"financials\": {\n",
+ " \"Market Cap\": _format_big_number_from_millions(metrics.get(\"marketCapitalization\")),\n",
+ " \"Enterprise Value\": _format_big_number_from_millions(metrics.get(\"enterpriseValue\")),\n",
+ " \"P/E Ratio (TTM)\": _safe_metric(metrics, \"peBasicExclExtraTTM\"),\n",
+ " \"Forward P/E\": _safe_metric(metrics, \"forwardPE\"),\n",
+ " \"Gross Margin (TTM)\": _safe_metric(metrics, \"grossMarginTTM\"),\n",
+ " \"Net Profit Margin (TTM)\": _safe_metric(metrics, \"netProfitMarginTTM\"),\n",
+ " \"EPS (TTM)\": _safe_metric(metrics, \"epsTTM\"),\n",
+ " \"EPS Growth (5Y)\": _safe_metric(metrics, \"epsGrowth5Y\"),\n",
+ " \"Dividend Yield (Indicated Annual)\": _safe_metric(metrics, \"dividendYieldIndicatedAnnual\"),\n",
+ " \"Current Ratio (Quarterly)\": _safe_metric(metrics, \"currentRatioQuarterly\"),\n",
+ " \"Debt/Equity (Long Term, Quarterly)\": _safe_metric(metrics, \"longTermDebt/equityQuarterly\"),\n",
+ " \"Beta\": _safe_metric(metrics, \"beta\"),\n",
+ " \"52-Week High\": _safe_metric(metrics, \"52WeekHigh\"),\n",
+ " \"52-Week Low\": _safe_metric(metrics, \"52WeekLow\"),\n",
+ " }\n",
+ " }\n",
+ "\n",
+ " return result\n",
+ "\n",
+ " except Exception as e:\n",
+ " # keep error message short but useful for debugging\n",
+ " return {\"success\": False, \"error\": f\"Failed to fetch metrics: {str(e)[:200]}\"}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "9df7b74e-fec8-4e75-92a9-31acc75e6e97",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "get_company_financials_function = {\n",
+ " \"name\": \"get_company_financials\",\n",
+ " \"description\": \"Fetch and return a curated set of basic financial metrics for a stock symbol. Calls Finnhub's company_basic_financials API, formats large numbers (market cap, enterprise value, revenue) in M/B/T USD, and shows metrics like P/E ratios, EPS, margins, dividend yield, debt/equity, beta, and 52-week range. Returns 'Unavailable' for missing values.\",\n",
+ " \"parameters\": {\n",
+ " \"type\": \"object\",\n",
+ " \"properties\": {\n",
+ " \"symbol\": {\n",
+ " \"type\": \"string\",\n",
+ " \"description\": \"Stock ticker symbol to fetch metrics for. Example: 'AAPL' for Apple Inc.\"\n",
+ " }\n",
+ " },\n",
+ " \"required\": [\n",
+ " \"symbol\"\n",
+ " ]\n",
+ " }\n",
+ "}\n",
+ "\n",
+ "\n",
+ "get_company_financials_tool = {\"type\": \"function\", \"function\": get_company_financials_function}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "cfeeb200-3f30-4855-82b9-cc8b2a950f80",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def get_stock_quote(symbol: str) -> dict:\n",
+ " \"\"\"\n",
+ " Fetch the latest stock quote for a given ticker symbol using Finnhub's /quote endpoint.\n",
+ " Returns current price, daily high/low, open, previous close, percent change, and readable timestamp.\n",
+ " \"\"\"\n",
+ " logger.info(f\"Tool get_stock_quote called for {symbol}\")\n",
+ " try:\n",
+ " if not symbol or len(symbol.strip()) < 1:\n",
+ " return {\"success\": False, \"error\": \"Invalid symbol provided\"}\n",
+ " \n",
+ " symbol = symbol.strip().upper()\n",
+ " data = finnhub_client.quote(symbol)\n",
+ "\n",
+ " if not data or \"c\" not in data:\n",
+ " return {\"success\": False, \"error\": \"No quote data found\"}\n",
+ " \n",
+ " # Convert epoch timestamp to ISO UTC if present\n",
+ " timestamp = data.get(\"t\")\n",
+ " if timestamp and isinstance(timestamp, (int, float)):\n",
+ " timestamp = datetime.utcfromtimestamp(timestamp).isoformat() + \"Z\"\n",
+ " else:\n",
+ " timestamp = \"Unavailable\"\n",
+ " \n",
+ " return {\n",
+ " \"success\": True,\n",
+ " \"symbol\": symbol,\n",
+ " \"current_price\": round(data.get(\"c\", 0), 2) if data.get(\"c\") is not None else \"Unavailable\",\n",
+ " \"change\": round(data.get(\"d\", 0), 2) if data.get(\"d\") is not None else \"Unavailable\",\n",
+ " \"percent_change\": f\"{round(data.get('dp', 0), 2)}%\" if data.get(\"dp\") is not None else \"Unavailable\",\n",
+ " \"high_price\": round(data.get(\"h\", 0), 2) if data.get(\"h\") is not None else \"Unavailable\",\n",
+ " \"low_price\": round(data.get(\"l\", 0), 2) if data.get(\"l\") is not None else \"Unavailable\",\n",
+ " \"open_price\": round(data.get(\"o\", 0), 2) if data.get(\"o\") is not None else \"Unavailable\",\n",
+ " \"previous_close\": round(data.get(\"pc\", 0), 2) if data.get(\"pc\") is not None else \"Unavailable\",\n",
+ " \"timestamp\": timestamp\n",
+ " }\n",
+ " except Exception as e:\n",
+ " return {\"success\": False, \"error\": f\"Quote retrieval failed: {str(e)[:100]}\"}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "3724d92a-4515-4267-af6f-2c1ec2b6ed36",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "get_stock_quote_function = {\n",
+ " \"name\": \"get_stock_quote\",\n",
+ " \"description\": \"Retrieve the latest stock quote for a given symbol, including current price, daily high/low, open, previous close, and percent change. Data is near real-time. Avoid constant polling; use websockets for streaming updates.\",\n",
+ " \"parameters\": {\n",
+ " \"type\": \"object\",\n",
+ " \"properties\": {\n",
+ " \"symbol\": {\n",
+ " \"type\": \"string\",\n",
+ " \"description\": \"Stock ticker symbol to fetch the latest quote for. Example: 'AAPL', 'MSFT'.\"\n",
+ " }\n",
+ " },\n",
+ " \"required\": [\"symbol\"]\n",
+ " }\n",
+ "}\n",
+ "\n",
+ "get_stock_quote_tool = {\"type\": \"function\", \"function\": get_stock_quote_function}\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "62f5d477-6626-428f-b8eb-d763e736ef5b",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def get_company_news(symbol: str, _from: str, to: str):\n",
+ " \"\"\"\n",
+ " Fetch the top latest company news for a stock symbol within a date range.\n",
+ " - Ensures the range does not exceed ~1 months (35 days).\n",
+ " - Best practice: Keep searches to a month or less to avoid too much data.\n",
+ "\n",
+ " Args:\n",
+ " symbol (str): Stock ticker (e.g., \"AAPL\").\n",
+ " _from (str): Start date in YYYY-MM-DD format.\n",
+ " to (str): End date in YYYY-MM-DD format.\n",
+ "\n",
+ " Returns:\n",
+ " list or dict: Cleaned news data or error message.\n",
+ " \"\"\"\n",
+ " # Validate date format\n",
+ " logger.info(f\"Tool get_company_news called for {symbol} from {_from} to {to}\")\n",
+ " try:\n",
+ " start_date = datetime.strptime(_from, \"%Y-%m-%d\")\n",
+ " end_date = datetime.strptime(to, \"%Y-%m-%d\")\n",
+ " except ValueError:\n",
+ " return {\"success\": False, \"error\": \"Invalid date format. Use YYYY-MM-DD.\"}\n",
+ "\n",
+ " # Check date range\n",
+ " delta_days = (end_date - start_date).days\n",
+ " if delta_days > 35:\n",
+ " return {\n",
+ " \"success\": False, \n",
+ " \"error\": f\"Date range too large ({delta_days} days). \"\n",
+ " \"Please use a range of 1 months or less.\"\n",
+ " }\n",
+ "\n",
+ " # Fetch data\n",
+ " try:\n",
+ " news = finnhub_client.company_news(symbol, _from=_from, to=to)\n",
+ " except Exception as e:\n",
+ " return {\"success\": False, \"error\": str(e)}\n",
+ "\n",
+ " # Do not want to report just the latest news in the time period\n",
+ " if len(news) <= 10:\n",
+ " # If 10 or fewer articles, take all\n",
+ " selected_news = news\n",
+ " else:\n",
+ " # Take first 5 (oldest) and last 5 (newest)\n",
+ " selected_news = news[:5] + news[-5:]\n",
+ "\n",
+ " # Clean & transform objects\n",
+ " cleaned_news = []\n",
+ " for article in selected_news:\n",
+ " cleaned_news.append({\n",
+ " \"summary\": article.get(\"summary\"),\n",
+ " \"source\": article.get(\"source\"),\n",
+ " \"published_at\": datetime.utcfromtimestamp(article[\"datetime\"]).strftime(\"%Y-%m-%d %H:%M:%S UTC\"),\n",
+ " \"related\": article.get(\"related\")\n",
+ " })\n",
+ "\n",
+ " return {\"success\": True, \"news\": cleaned_news}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "5150ecb6-e3f1-46dc-94fa-2a9abe5165f6",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "get_company_news_function = {\n",
+ " \"name\": \"get_company_news\",\n",
+ " \"description\": \"Fetch the top most recent company news articles for a given stock symbol. โ ๏ธ Avoid querying more than a 1-month range at a time as it may return too much data. Only tells news about company within last 1 year. An error is returned if the requested time range exceeds 1 month.\",\n",
+ " \"parameters\": {\n",
+ " \"type\": \"object\",\n",
+ " \"properties\": {\n",
+ " \"symbol\": {\n",
+ " \"type\": \"string\",\n",
+ " \"description\": \"Stock ticker symbol, e.g., 'AAPL'.\"\n",
+ " },\n",
+ " \"_from\": {\n",
+ " \"type\": \"string\",\n",
+ " \"description\": \"Start date in YYYY-MM-DD format. Ensure it is not more than 1 year ago from today. Ensure it is before or equal to the date in to.\"\n",
+ " },\n",
+ " \"to\": {\n",
+ " \"type\": \"string\",\n",
+ " \"description\": \"End date in YYYY-MM-DD format. Ensure it is not more than 1 year ago. Ensure it is after or equal to the date in from.\"\n",
+ " }\n",
+ " },\n",
+ " \"required\": [\n",
+ " \"symbol\",\n",
+ " \"_from\",\n",
+ " \"to\"\n",
+ " ]\n",
+ " }\n",
+ "}\n",
+ "\n",
+ "get_company_news_tool = {\"type\": \"function\", \"function\": get_company_news_function}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "26dd7375-626f-4235-b4a2-f1926f62cc5e",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def get_market_news(category: str = \"general\"):\n",
+ " \"\"\"\n",
+ " Fetch the latest market news for a given category.\n",
+ "\n",
+ " Args:\n",
+ " category (str): News category. One of [\"general\", \"forex\", \"crypto\", \"merger\"].\n",
+ "\n",
+ " Returns:\n",
+ " list or dict: A cleaned list of news articles or error message.\n",
+ " \"\"\"\n",
+ " logger.info(f\"Tool get_market_news called for category '{category}'\")\n",
+ "\n",
+ " try:\n",
+ " news = finnhub_client.general_news(category)\n",
+ " except Exception as e:\n",
+ " logger.error(f\"Tool get_market_news API call failed!\")\n",
+ " return {\"success\": False, \"error\": str(e)}\n",
+ "\n",
+ " # Do not want to report just the latest news in the time period\n",
+ " if len(news) <= 10:\n",
+ " # If 10 or fewer articles, take all\n",
+ " selected_news = news\n",
+ " else:\n",
+ " # Take first 5 (oldest) and last 5 (newest)\n",
+ " selected_news = news[:5] + news[-5:]\n",
+ "\n",
+ " # Clean & transform objects\n",
+ " cleaned_news = []\n",
+ " for article in selected_news:\n",
+ " cleaned_news.append({\n",
+ " \"headline\": article.get(\"headline\"),\n",
+ " \"summary\": article.get(\"summary\"),\n",
+ " \"source\": article.get(\"source\"),\n",
+ " \"category\": article.get(\"category\"),\n",
+ " \"related\": article.get(\"related\")\n",
+ " })\n",
+ "\n",
+ " return {\"success\": True, \"news\": cleaned_news}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "5bd1aa28-119c-4c7a-bdc0-161a582ab1cc",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "get_market_news_function = {\n",
+ " \"name\": \"get_market_news\",\n",
+ " \"description\": \"Fetch the latest market news by category. Returns the top 10 news articles with headline, summary, source, category, published time (UTC), and URLs. Categories: general, forex, crypto, merger. Use this to quickly get relevant financial news.\",\n",
+ " \"parameters\": {\n",
+ " \"type\": \"object\",\n",
+ " \"properties\": {\n",
+ " \"category\": {\n",
+ " \"type\": \"string\",\n",
+ " \"description\": \"News category to fetch. One of: general, forex, crypto, merger.\"\n",
+ " }\n",
+ " },\n",
+ " \"required\": [\"category\"]\n",
+ " }\n",
+ "}\n",
+ "\n",
+ "get_market_news_tool = {\"type\": \"function\", \"function\": get_market_news_function}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "fbe8ef6c-2d88-43a2-94dc-70b507fe9cd2",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def get_earnings_calendar(symbol: str = \"\", _from: str = \"\", to: str = \"\"):\n",
+ " \"\"\"\n",
+ " Fetch LATEST earnings calendar data for a stock symbol within a date range.\n",
+ " - End date must be within the last month. (Free tier only allows last 1 month data)\n",
+ " - Shows historical and upcoming earnings releases with EPS and revenue data.\n",
+ " Args:\n",
+ " symbol (str): Stock ticker (e.g., \"AAPL\"). Leave empty for all companies.\n",
+ " _from (str): Start date in YYYY-MM-DD format.\n",
+ " to (str): End date in YYYY-MM-DD format.\n",
+ " Returns:\n",
+ " list or dict: Cleaned earnings calendar data or error message.\n",
+ " \"\"\"\n",
+ " logger.info(f\"Tool get_earnings_calendar called for {symbol or 'all symbols'} from {_from} to {to}\")\n",
+ " \n",
+ " # Validate date format if provided\n",
+ " if _from or to:\n",
+ " try:\n",
+ " start_date = datetime.strptime(_from, \"%Y-%m-%d\") if _from else None\n",
+ " end_date = datetime.strptime(to, \"%Y-%m-%d\") if to else None\n",
+ " \n",
+ " # Check date range if both dates provided\n",
+ " # Check if end_date is within 1 month (โ30 days) of today\n",
+ " if end_date:\n",
+ " today = datetime.utcnow()\n",
+ " if (today - end_date).days > 30:\n",
+ " return {\n",
+ " \"success\": False,\n",
+ " \"error\": \"End date must be within the last month.\"\n",
+ " }\n",
+ " except ValueError:\n",
+ " return {\"success\": False, \"error\": \"Invalid date format. Use YYYY-MM-DD.\"}\n",
+ " \n",
+ " # Fetch earnings calendar data\n",
+ " try:\n",
+ " earnings_data = finnhub_client.earnings_calendar(_from=_from, to=to, symbol=symbol, international=False)\n",
+ " except Exception as e:\n",
+ " logger.error(f\"Error fetching earnings calendar: {e}\")\n",
+ " return {\"success\": False, \"error\": str(e)}\n",
+ " \n",
+ " # Check if data exists\n",
+ " if not earnings_data or \"earningsCalendar\" not in earnings_data:\n",
+ " return {\"success\": False, \"error\": \"No earnings data available for the specified criteria.\"}\n",
+ " \n",
+ " earnings_list = earnings_data[\"earningsCalendar\"]\n",
+ " \n",
+ " if not earnings_list:\n",
+ " return {\"success\": True, \"earnings\": [], \"message\": \"No earnings releases found for the specified period.\"}\n",
+ " \n",
+ " # Clean & transform earnings data\n",
+ " cleaned_earnings = []\n",
+ " for earning in earnings_list:\n",
+ " # Format hour description\n",
+ " hour_map = {\n",
+ " \"bmo\": \"Before Market Open\",\n",
+ " \"amc\": \"After Market Close\", \n",
+ " \"dmh\": \"During Market Hours\"\n",
+ " }\n",
+ " \n",
+ " cleaned_earnings.append({\n",
+ " \"symbol\": earning.get(\"symbol\"),\n",
+ " \"date\": earning.get(\"date\"),\n",
+ " \"quarter\": f\"Q{earning.get('quarter')} {earning.get('year')}\",\n",
+ " \"announcement_time\": hour_map.get(earning.get(\"hour\", \"\"), earning.get(\"hour\", \"Unknown\")),\n",
+ " \"eps_actual\": earning.get(\"epsActual\"),\n",
+ " \"eps_estimate\": earning.get(\"epsEstimate\"),\n",
+ " \"revenue_actual\": earning.get(\"revenueActual\"),\n",
+ " \"revenue_estimate\": earning.get(\"revenueEstimate\")\n",
+ " })\n",
+ " \n",
+ " return {\"success\": True, \"earnings\": cleaned_earnings}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "9eaeae75-d68f-4160-a26e-c13e40cf756b",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "get_earnings_calendar_function = {\n",
+ " \"name\": \"get_earnings_calendar\",\n",
+ " \"description\": \"Fetch latest earnings calendar showing historical and upcoming earnings releases for companies. Shows EPS and revenue estimates vs actuals. End date must be within the last month.\",\n",
+ " \"parameters\": {\n",
+ " \"type\": \"object\",\n",
+ " \"properties\": {\n",
+ " \"symbol\": {\n",
+ " \"type\": \"string\",\n",
+ " \"description\": \"Stock ticker symbol, e.g., 'AAPL'. Leave empty to get earnings for all companies in the date range.\"\n",
+ " },\n",
+ " \"_from\": {\n",
+ " \"type\": \"string\", \n",
+ " \"description\": \"Start date in YYYY-MM-DD format. Ensure it is not more than 1 year ago from today. Ensure it is before or equal to the date in to.\"\n",
+ " },\n",
+ " \"to\": {\n",
+ " \"type\": \"string\",\n",
+ " \"description\": \"End date in YYYY-MM-DD format. Ensure it is not more than 1 year ago. Ensure it is after or equal to the date in from. To date must be within the last month.\"\n",
+ " }\n",
+ " },\n",
+ " \"required\": [\n",
+ " \"_from\",\n",
+ " \"to\"\n",
+ " ]\n",
+ " }\n",
+ "}\n",
+ "\n",
+ "get_earnings_calendar_tool = {\"type\": \"function\", \"function\": get_earnings_calendar_function}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "bdca8679-935f-4e7f-97e6-e71a4d4f228c",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# List of tools:\n",
+ "tools = [search_symbol_tool, get_company_financials_tool, get_stock_quote_tool, get_company_news_tool, get_market_news_tool, get_current_time_tool, get_earnings_calendar_tool]\n",
+ "tool_functions = {\n",
+ " \"search_symbol\": search_symbol,\n",
+ " \"get_company_financials\": get_company_financials,\n",
+ " \"get_stock_quote\": get_stock_quote,\n",
+ " \"get_company_news\": get_company_news,\n",
+ " \"get_market_news\": get_market_news,\n",
+ " \"get_earnings_calendar\": get_earnings_calendar,\n",
+ " \"get_current_time\": get_current_time\n",
+ "}"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "c3d3554f-b4e3-4ce7-af6f-68faa6dd2340",
+ "metadata": {},
+ "source": [
+ "## Getting OpenAI to use our Tool\n",
+ "\n",
+ "There's some fiddly stuff to allow OpenAI \"to call our tool\"\n",
+ "\n",
+ "What we actually do is give the LLM the opportunity to inform us that it wants us to run the tool.\n",
+ "\n",
+ "Here's how the new chat function looks:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "86f76f57-76c4-4dc7-94a8-cfe7816a39f1",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def execute_tool_call(tool_call):\n",
+ " func_name = tool_call.function.name\n",
+ " args = json.loads(tool_call.function.arguments)\n",
+ "\n",
+ " logger.info(f\"Executing tool: {func_name} with args: {args}\")\n",
+ "\n",
+ " func = tool_functions.get(func_name)\n",
+ " if not func:\n",
+ " result = {\"error\": f\"Function '{func_name}' not found\"}\n",
+ " else:\n",
+ " try:\n",
+ " result = func(**args)\n",
+ " except Exception as e:\n",
+ " logger.exception(f\"Error executing {func_name}\")\n",
+ " result = {\"error\": str(e)}\n",
+ "\n",
+ " return {\n",
+ " \"role\": \"tool\",\n",
+ " \"tool_call_id\": tool_call.id,\n",
+ " \"content\": json.dumps(result)\n",
+ " }"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "ce9b0744-9c78-408d-b9df-9f6fd9ed78cf",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def chat(message, history):\n",
+ " messages = [{\"role\": \"system\", \"content\": system_message}] + history + [{\"role\": \"user\", \"content\": message}]\n",
+ "\n",
+ " # Skip the first system message\n",
+ " to_log = messages[1:]\n",
+ "\n",
+ " # Print each dict on its own line\n",
+ " logger.info(\"\\nMessages:\\n\" + \"\\n\".join(str(m) for m in to_log) + \"\\n\")\n",
+ "\n",
+ " while True:\n",
+ " response = openai.chat.completions.create(\n",
+ " model=MODEL, \n",
+ " messages=messages, \n",
+ " tools=tools,\n",
+ " stream=True\n",
+ " )\n",
+ " \n",
+ " content = \"\"\n",
+ " tool_calls = []\n",
+ " finish_reason = None\n",
+ " \n",
+ " # Process the stream\n",
+ " for chunk in response:\n",
+ " choice = chunk.choices[0]\n",
+ " finish_reason = choice.finish_reason\n",
+ " \n",
+ " # Stream content\n",
+ " if choice.delta.content:\n",
+ " content += choice.delta.content\n",
+ " yield content\n",
+ " \n",
+ " # Collect tool calls\n",
+ " if choice.delta.tool_calls:\n",
+ " for tc_delta in choice.delta.tool_calls:\n",
+ " # Extend tool_calls list if needed\n",
+ " while len(tool_calls) <= tc_delta.index:\n",
+ " tool_calls.append({\n",
+ " \"id\": \"\",\n",
+ " \"function\": {\"name\": \"\", \"arguments\": \"\"}\n",
+ " })\n",
+ " \n",
+ " tc = tool_calls[tc_delta.index]\n",
+ " if tc_delta.id:\n",
+ " tc[\"id\"] = tc_delta.id\n",
+ " if tc_delta.function:\n",
+ " if tc_delta.function.name:\n",
+ " tc[\"function\"][\"name\"] = tc_delta.function.name\n",
+ " if tc_delta.function.arguments:\n",
+ " tc[\"function\"][\"arguments\"] += tc_delta.function.arguments\n",
+ " \n",
+ " # If no tool calls, we're done\n",
+ " if finish_reason != \"tool_calls\":\n",
+ " return content\n",
+ " \n",
+ " # Execute tools\n",
+ " ai_message = {\n",
+ " \"role\": \"assistant\", \n",
+ " \"content\": content,\n",
+ " \"tool_calls\": [\n",
+ " {\n",
+ " \"id\": tc[\"id\"],\n",
+ " \"type\": \"function\",\n",
+ " \"function\": tc[\"function\"]\n",
+ " } for tc in tool_calls\n",
+ " ]\n",
+ " }\n",
+ " \n",
+ " tool_responses = []\n",
+ " for tool_call in ai_message[\"tool_calls\"]:\n",
+ " # Convert dict back to object for your existing function\n",
+ " class ToolCall:\n",
+ " def __init__(self, tc_dict):\n",
+ " self.id = tc_dict[\"id\"]\n",
+ " self.function = type('obj', (object,), tc_dict[\"function\"])\n",
+ " \n",
+ " tool_responses.append(execute_tool_call(ToolCall(tool_call)))\n",
+ " \n",
+ " messages.append(ai_message)\n",
+ " messages.extend(tool_responses)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "f4be8a71-b19e-4c2f-80df-f59ff2661f14",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "gr.ChatInterface(fn=chat, type=\"messages\", title=\"TickerBot\", description=\"Ask about stock prices, company financials and market news!\").launch(share=True)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "5c014d6f-820d-4d58-8527-7d703aad3399",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "40c77d61-3e90-4708-b360-fb58b4211e9b",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.13"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/week2/community-contributions/pitting-llms-against-each-other.ipynb b/week2/community-contributions/pitting-llms-against-each-other.ipynb
new file mode 100644
index 0000000..53e2e70
--- /dev/null
+++ b/week2/community-contributions/pitting-llms-against-each-other.ipynb
@@ -0,0 +1,254 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "10c54e52-3d1c-48cc-a0f6-efda6d90fbbb",
+ "metadata": {},
+ "source": [
+ "# Pitting LLMs Against Each Other\n",
+ "Three LLMs, namely OpenAIโs GPT, Anthropicโs Claude, and Googleโs Gemini, go head-to-head in a three-way conversational debate."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "40677b08-18e9-4a88-a103-5b50d2bbecff",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# imports\n",
+ "\n",
+ "import os\n",
+ "from dotenv import load_dotenv\n",
+ "from openai import OpenAI\n",
+ "import anthropic\n",
+ "from IPython.display import Markdown, display, update_display\n",
+ "import google.generativeai"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "df5a52ba-ea13-4dbf-a695-e1398a484cc8",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Load environment variables in a file called .env\n",
+ "# Print the key prefixes to help with any debugging\n",
+ "\n",
+ "load_dotenv(override=True)\n",
+ "openai_api_key = os.getenv('OPENAI_API_KEY')\n",
+ "anthropic_api_key = os.getenv('ANTHROPIC_API_KEY')\n",
+ "google_api_key = os.getenv('GOOGLE_API_KEY')\n",
+ "\n",
+ "if openai_api_key:\n",
+ " print(f\"OpenAI API Key exists and begins {openai_api_key[:8]}\")\n",
+ "else:\n",
+ " print(\"OpenAI API Key not set\")\n",
+ " \n",
+ "if anthropic_api_key:\n",
+ " print(f\"Anthropic API Key exists and begins {anthropic_api_key[:7]}\")\n",
+ "else:\n",
+ " print(\"Anthropic API Key not set\")\n",
+ "\n",
+ "if google_api_key:\n",
+ " print(f\"Google API Key exists and begins {google_api_key[:8]}\")\n",
+ "else:\n",
+ " print(\"Google API Key not set\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "1ededc77-2672-4e27-b1c8-11f6f8ff8970",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Connect to OpenAI, Anthropic, Gemini\n",
+ "\n",
+ "openai = OpenAI()\n",
+ "\n",
+ "# claude = anthropic.Anthropic()\n",
+ "\n",
+ "# google.generativeai.configure()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "3b311279-5993-4226-ae08-991e974230fb",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Let's make a conversation between GPT-4.1-mini and Claude-3.5-haiku and Gemini\n",
+ "\n",
+ "gpt_model = \"gpt-4.1-mini\"\n",
+ "claude_model = \"claude-3-5-haiku-latest\"\n",
+ "gemini_model = \"gemini-2.5-flash\"\n",
+ "\n",
+ "gpt_system = \"You are a chatbot in a conversation with 2 other chatbots; \\\n",
+ "debate which of you is the best.\"\n",
+ "\n",
+ "claude_system = \"You are a chatbot in a conversation with 2 other chatbots; \\\n",
+ "debate which of you is the best.\"\n",
+ "\n",
+ "gemini_system = \"You are a chatbot in a conversation with 2 other chatbots; \\\n",
+ "debate which of you is the best.\"\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "85bdfab1-6602-46b3-a1d2-bdb36880d9d6",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def alex_prompt():\n",
+ " user_prompt = f\"\"\"\n",
+ " You are Alex, in conversation with Blake and Charlie.\n",
+ " The conversation so far is as follows:\n",
+ " {format_conversation()}\n",
+ " Now with this, respond with what you would like to say next, as Alex.\n",
+ " \"\"\"\n",
+ " return user_prompt\n",
+ "\n",
+ "def blake_prompt():\n",
+ " user_prompt = f\"\"\"\n",
+ " You are Blake, in conversation with Alex and Charlie.\n",
+ " The conversation so far is as follows:\n",
+ " {format_conversation()}\n",
+ " Now with this, respond with what you would like to say next, as Blake.\n",
+ " \"\"\"\n",
+ " return user_prompt\n",
+ "\n",
+ "def charlie_prompt():\n",
+ " user_prompt = f\"\"\"\n",
+ " You are Charlie, in conversation with Alex and Blake.\n",
+ " The conversation so far is as follows:\n",
+ " {format_conversation()}\n",
+ " Now with this, respond with what you would like to say next, as Charlie.\n",
+ " \"\"\"\n",
+ " return user_prompt\n",
+ "\n",
+ "# Shared conversation history\n",
+ "conversation = []\n",
+ "\n",
+ "def format_conversation():\n",
+ " return \"\\n\".join(conversation)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "6f7c745d-7d75-468b-93ac-7a1d95f2e047",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def alex_says():\n",
+ " response = openai.chat.completions.create(\n",
+ " model=gpt_model,\n",
+ " messages=[\n",
+ " {\"role\": \"system\", \"content\": gpt_system},\n",
+ " {\"role\": \"user\", \"content\": alex_prompt()}\n",
+ " ],\n",
+ " )\n",
+ " result = response.choices[0].message.content\n",
+ " return result"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "6e28f4c9-0297-4762-a3ea-b961e0d6d980",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "gemini_via_openai_client = OpenAI(\n",
+ " api_key=google_api_key, \n",
+ " base_url=\"https://generativelanguage.googleapis.com/v1beta/openai/\"\n",
+ ")\n",
+ "\n",
+ "def blake_says():\n",
+ " response = gemini_via_openai_client.chat.completions.create(\n",
+ " model=gemini_model,\n",
+ " messages=[\n",
+ " {\"role\": \"system\", \"content\": gemini_system},\n",
+ " {\"role\": \"user\", \"content\": blake_prompt()}\n",
+ " ],\n",
+ " )\n",
+ " result = response.choices[0].message.content\n",
+ " return result"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "363b70bf-d3e2-4d05-8a3e-ec5d54460e96",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "claude_via_openai_client = OpenAI(\n",
+ " api_key=anthropic_api_key,\n",
+ " base_url=\"https://api.anthropic.com/v1\" \n",
+ ")\n",
+ "\n",
+ "def charlie_says():\n",
+ " response = claude_via_openai_client.chat.completions.create(\n",
+ " model=claude_model, \n",
+ " messages=[\n",
+ " {\"role\": \"system\", \"content\": claude_system},\n",
+ " {\"role\": \"user\", \"content\": charlie_prompt()}\n",
+ " ],\n",
+ " )\n",
+ " result = response.choices[0].message.content\n",
+ " return result\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "c017eb8c-1709-4ac1-8f17-92c3a6cdbfc0",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# The three models engage in a longer interaction with history.\n",
+ "\n",
+ "for i in range(5):\n",
+ " alex_next = alex_says()\n",
+ " print(f\"Alex (GPT):\\n{alex_next}\\n\")\n",
+ " conversation.append(f\"Alex: {alex_next}\")\n",
+ " \n",
+ " blake_next = blake_says()\n",
+ " print(f\"Blake (Gemini):\\n{blake_next}\\n\")\n",
+ " conversation.append(f\"Blake: {blake_next}\")\n",
+ "\n",
+ " charlie_next = charlie_says()\n",
+ " print(f\"Charlie (Claude):\\n{charlie_next}\\n\")\n",
+ " conversation.append(f\"Charlie: {charlie_next}\") \n",
+ "\n",
+ " # break"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.13"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/week2/community-contributions/rwothoromo/day5.ipynb b/week2/community-contributions/rwothoromo/day5.ipynb
new file mode 100644
index 0000000..b51d15b
--- /dev/null
+++ b/week2/community-contributions/rwothoromo/day5.ipynb
@@ -0,0 +1,820 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "ddfa9ae6-69fe-444a-b994-8c4c5970a7ec",
+ "metadata": {},
+ "source": [
+ "# Project - Airline AI Assistant\n",
+ "\n",
+ "We'll now bring together what we've learned to make an AI Customer Support assistant for an Airline"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "8b50bbe2-c0b1-49c3-9a5c-1ba7efa2bcb4",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# imports\n",
+ "\n",
+ "import os, json, gradio as gr, anthropic, google.generativeai\n",
+ "from dotenv import load_dotenv\n",
+ "from openai import OpenAI"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "747e8786-9da8-4342-b6c9-f5f69c2e22ae",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Initialization\n",
+ "\n",
+ "load_dotenv(override=True)\n",
+ "\n",
+ "openai_api_key = os.getenv('OPENAI_API_KEY')\n",
+ "if openai_api_key:\n",
+ " print(f\"OpenAI API Key exists and begins {openai_api_key[:8]}\")\n",
+ "else:\n",
+ " print(\"OpenAI API Key not set\")\n",
+ " \n",
+ "MODEL = \"gpt-4o-mini\"\n",
+ "openai = OpenAI()\n",
+ "\n",
+ "# Other LLMs\n",
+ "DALL_E_MODEL = \"dall-e-3\"\n",
+ "\n",
+ "CLAUDE_MODEL = \"claude-sonnet-4-20250514\"\n",
+ "claude = anthropic.Anthropic()\n",
+ "\n",
+ "google_api_key = os.getenv('GOOGLE_API_KEY')\n",
+ "if google_api_key:\n",
+ " print(f\"Google API Key exists and begins {google_api_key[:8]}\")\n",
+ "else:\n",
+ " print(\"Google API Key not set\")\n",
+ " \n",
+ "GEMINI_MODEL= \"gemini-2.5-flash\"\n",
+ "gemini = google.generativeai.configure()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "0a521d84-d07c-49ab-a0df-d6451499ed97",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "system_message = \"You are a helpful assistant for an Airline called FlightAI. \"\n",
+ "system_message += \"Give short, courteous answers, no more than 1 sentence. \"\n",
+ "system_message += \"Always be accurate. If you don't know the answer, say so.\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "61a2a15d-b559-4844-b377-6bd5cb4949f6",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Just take in history\n",
+ "def chat(history):\n",
+ " message = history[-1][\"content\"] # Get the last message from the user\n",
+ " messages = [{\"role\": \"system\", \"content\": system_message}] + history + [{\"role\": \"user\", \"content\": message}]\n",
+ " response = openai.chat.completions.create(model=MODEL, messages=messages, tools=tools)\n",
+ "\n",
+ " if response.choices[0].finish_reason==\"tool_calls\":\n",
+ " message = response.choices[0].message\n",
+ " response_tool, city = handle_tool_call(message)\n",
+ " messages.append(message)\n",
+ " messages.append(response_tool)\n",
+ " image = artist(city)\n",
+ " print(\"Avail image for: \", city)\n",
+ " response = openai.chat.completions.create(model=MODEL, messages=messages)\n",
+ "\n",
+ "\n",
+ " # After getting the final response from OpenAI\n",
+ " final_response_content = response.choices[0].message.content\n",
+ " history.append({\"role\": \"assistant\", \"content\": final_response_content})\n",
+ "\n",
+ " # The return value should be a tuple of (history, image)\n",
+ " return history, image\n",
+ "\n",
+ "# gr.ChatInterface(fn=chat, type=\"messages\").launch()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "36bedabf-a0a7-4985-ad8e-07ed6a55a3a4",
+ "metadata": {},
+ "source": [
+ "## Tools\n",
+ "\n",
+ "Tools are an incredibly powerful feature provided by the frontier LLMs.\n",
+ "\n",
+ "With tools, you can write a function, and have the LLM call that function as part of its response.\n",
+ "\n",
+ "Sounds almost spooky.. we're giving it the power to run code on our machine?\n",
+ "\n",
+ "Well, kinda."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "0696acb1-0b05-4dc2-80d5-771be04f1fb2",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Let's start by making a useful function\n",
+ "\n",
+ "ticket_prices = {\"london\": \"$799\", \"paris\": \"$899\", \"tokyo\": \"$1400\", \"berlin\": \"$499\"}\n",
+ "\n",
+ "def get_ticket_price(destination_city):\n",
+ " print(f\"Tool get_ticket_price called for {destination_city}\")\n",
+ " city = destination_city.lower()\n",
+ " return ticket_prices.get(city, \"Unknown\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "80ca4e09-6287-4d3f-997d-fa6afbcf6c85",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# get_ticket_price(\"London\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "4afceded-7178-4c05-8fa6-9f2085e6a344",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# There's a particular dictionary structure that's required to describe our function:\n",
+ "\n",
+ "price_function = {\n",
+ " \"name\": \"get_ticket_price\",\n",
+ " \"description\": \"Get the price of a return ticket to the destination city. Call this whenever you need to know the ticket price, for example when a customer asks 'How much is a ticket to this city'\",\n",
+ " \"parameters\": {\n",
+ " \"type\": \"object\",\n",
+ " \"properties\": {\n",
+ " \"destination_city\": {\n",
+ " \"type\": \"string\",\n",
+ " \"description\": \"The city that the customer wants to travel to\",\n",
+ " },\n",
+ " },\n",
+ " \"required\": [\"destination_city\"],\n",
+ " \"additionalProperties\": False\n",
+ " }\n",
+ "}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "bdca8679-935f-4e7f-97e6-e71a4d4f228c",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# And this is included in a list of tools:\n",
+ "\n",
+ "tools = [{\"type\": \"function\", \"function\": price_function}]\n",
+ "# print(tools)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "83070cc0-b213-4309-8040-b0cc8390b64b",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Simulate the booking process by simply returning a confirmation string.\n",
+ "\n",
+ "def book_flight(destination_city, number_of_passengers, booking_date):\n",
+ " \"\"\"\n",
+ " Simulates booking a flight.\n",
+ " \"\"\"\n",
+ " print(f\"Tool book_flight called for {destination_city} for {number_of_passengers} passengers on {booking_date}\")\n",
+ " return f\"Your booking to {destination_city} for {number_of_passengers} passengers on {booking_date} has been confirmed. Your booking reference is BKG-{hash(destination_city + str(number_of_passengers) + str(booking_date))}\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "781786f0-7106-4b10-89d7-453a0d10d204",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Tool definition for book_flight\n",
+ "\n",
+ "booking_function = {\n",
+ " \"name\": \"book_flight\",\n",
+ " \"description\": \"Books a flight for a customer. Call this whenever a customer asks to book a flight.\",\n",
+ " \"parameters\": {\n",
+ " \"type\": \"object\",\n",
+ " \"properties\": {\n",
+ " \"destination_city\": {\n",
+ " \"type\": \"string\",\n",
+ " \"description\": \"The city the customer wants to fly to.\"\n",
+ " },\n",
+ " \"number_of_passengers\": {\n",
+ " \"type\": \"integer\",\n",
+ " \"description\": \"The number of passengers for the booking.\"\n",
+ " },\n",
+ " \"booking_date\": {\n",
+ " \"type\": \"string\",\n",
+ " \"description\": \"The date of the flight booking in YYYY-MM-DD format.\"\n",
+ " }\n",
+ " },\n",
+ " \"required\": [\"destination_city\", \"number_of_passengers\", \"booking_date\"],\n",
+ " \"additionalProperties\": False\n",
+ " }\n",
+ "}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "e3fc237c-9721-4fee-a56b-2ff12fc98e27",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Add the new booking_function to the existing tools list.\n",
+ "\n",
+ "tools.append({\"type\": \"function\", \"function\": booking_function})"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "a482eb03-188a-4526-8acf-3a1fe96aaaf0",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# To translate to a given language\n",
+ "\n",
+ "def translate_text(text, target_language):\n",
+ " \"\"\"\n",
+ " Translates text to a specified language.\n",
+ " \n",
+ " Args:\n",
+ " text (str): The text to translate.\n",
+ " target_language (str): The language to translate the text into.\n",
+ " \n",
+ " Returns:\n",
+ " str: The translated text or an error message.\n",
+ " \"\"\"\n",
+ " print(f\"Tool translate_text called to translate to {target_language}\")\n",
+ " \n",
+ " # Use a system prompt to instruct the model to perform a translation\n",
+ " system_prompt_for_language = f\"You are a helpful translation assistant. Translate the following text into {target_language}. Only provide the translated text without any additional conversational text.\"\n",
+ " \n",
+ " try:\n",
+ " # # Using OpenAI\n",
+ " # response = openai.chat.completions.create(\n",
+ " # model=MODEL,\n",
+ " # messages=[\n",
+ " # {\"role\": \"system\", \"content\": system_prompt_for_language},\n",
+ " # {\"role\": \"user\", \"content\": text}\n",
+ " # ],\n",
+ " # )\n",
+ " # result = response.choices[0].message.content\n",
+ " # return result\n",
+ "\n",
+ " \n",
+ " # # Using Gemini\n",
+ " # gemini = google.generativeai.GenerativeModel(\n",
+ " # model_name=GEMINI_MODEL,\n",
+ " # system_instruction=system_prompt_for_language\n",
+ " # )\n",
+ " # response = gemini.generate_content(text)\n",
+ " # result = response.text\n",
+ " # return result\n",
+ "\n",
+ " \n",
+ " # Using Claude\n",
+ " response = claude.messages.create(\n",
+ " model=CLAUDE_MODEL,\n",
+ " max_tokens=200,\n",
+ " temperature=0.7,\n",
+ " system=system_prompt_for_language,\n",
+ " messages=[\n",
+ " {\"role\": \"user\", \"content\": text},\n",
+ " ],\n",
+ " )\n",
+ " result = response.content[0].text\n",
+ " return result\n",
+ " \n",
+ " except Exception as e:\n",
+ " print(f\"Error during translation: {e}\")\n",
+ " return \"Sorry, I encountered an error and could not complete the translation.\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "756e9859-94bc-4cef-bbc7-070d8ef6164b",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Tool definition for translate_text\n",
+ "\n",
+ "translation_function = {\n",
+ " \"name\": \"translate_text\",\n",
+ " \"description\": \"Translates a given text to a specified target language. Call this whenever a customer asks for a translation.\",\n",
+ " \"parameters\": {\n",
+ " \"type\": \"object\",\n",
+ " \"properties\": {\n",
+ " \"text\": {\n",
+ " \"type\": \"string\",\n",
+ " \"description\": \"The text to be translated.\"\n",
+ " },\n",
+ " \"target_language\": {\n",
+ " \"type\": \"string\",\n",
+ " \"description\": \"The language to translate the text into (e.g., 'French', 'Spanish', 'Swahili').\"\n",
+ " }\n",
+ " },\n",
+ " \"required\": [\"text\", \"target_language\"],\n",
+ " \"additionalProperties\": False\n",
+ " }\n",
+ "}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "5444455e-6e5c-4ef6-bd39-5ff01731dd4b",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Integrate the tool\n",
+ "\n",
+ "tools.append({\"type\": \"function\", \"function\": translation_function})"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "c3d3554f-b4e3-4ce7-af6f-68faa6dd2340",
+ "metadata": {},
+ "source": [
+ "## Getting OpenAI to use our Tool\n",
+ "\n",
+ "There's some fiddly stuff to allow OpenAI \"to call our tool\"\n",
+ "\n",
+ "What we actually do is give the LLM the opportunity to inform us that it wants us to run the tool.\n",
+ "\n",
+ "Here's how the new chat function looks:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "ce9b0744-9c78-408d-b9df-9f6fd9ed78cf",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def chat(message, history):\n",
+ " messages = [{\"role\": \"system\", \"content\": system_message}] + history + [{\"role\": \"user\", \"content\": message}]\n",
+ " response = openai.chat.completions.create(model=MODEL, messages=messages, tools=tools)\n",
+ "\n",
+ " if response.choices[0].finish_reason==\"tool_calls\":\n",
+ " message = response.choices[0].message\n",
+ " response, city = handle_tool_call(message)\n",
+ " messages.append(message)\n",
+ " messages.append(response)\n",
+ " response = openai.chat.completions.create(model=MODEL, messages=messages)\n",
+ " \n",
+ " return response.choices[0].message.content"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "b0992986-ea09-4912-a076-8e5603ee631f",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# We have to write that function handle_tool_call:\n",
+ "\n",
+ "# Handle multiple tools\n",
+ "def handle_tool_call(message):\n",
+ " tool_call = message.tool_calls[0]\n",
+ " function_name = tool_call.function.name\n",
+ " arguments = json.loads(tool_call.function.arguments)\n",
+ "\n",
+ " destination_city = None\n",
+ " translated_text = None\n",
+ "\n",
+ " if function_name == \"get_ticket_price\":\n",
+ " city = arguments.get('destination_city')\n",
+ " price = get_ticket_price(city)\n",
+ " response_content = json.dumps({\"destination_city\": city, \"price\": price})\n",
+ " destination_city = city\n",
+ " elif function_name == \"book_flight\":\n",
+ " destination_city = arguments.get('destination_city')\n",
+ " number_of_passengers = arguments.get('number_of_passengers')\n",
+ " booking_date = arguments.get('booking_date')\n",
+ " confirmation = book_flight(destination_city, number_of_passengers, booking_date)\n",
+ " response_content = json.dumps({\"confirmation_message\": confirmation})\n",
+ " elif function_name == \"translate_text\":\n",
+ " text = arguments.get('text')\n",
+ " target_language = arguments.get('target_language')\n",
+ " translated_text = translate_text(text, target_language)\n",
+ " response_content = json.dumps({\"translated_text\": translated_text})\n",
+ " else:\n",
+ " response_content = json.dumps({\"error\": f\"Unknown tool: {function_name}\"})\n",
+ "\n",
+ " response = {\n",
+ " \"role\": \"tool\",\n",
+ " \"content\": response_content,\n",
+ " \"tool_call_id\": tool_call.id\n",
+ " }\n",
+ " return response, destination_city"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "f4be8a71-b19e-4c2f-80df-f59ff2661f14",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# gr.ChatInterface(fn=chat, type=\"messages\").launch()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "473e5b39-da8f-4db1-83ae-dbaca2e9531e",
+ "metadata": {},
+ "source": [
+ "# Let's go multi-modal!!\n",
+ "\n",
+ "We can use DALL-E-3, the image generation model behind GPT-4o, to make us some images\n",
+ "\n",
+ "Let's put this in a function called artist.\n",
+ "\n",
+ "### Price alert: each time I generate an image it costs about 4 cents - don't go crazy with images!"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "2c27c4ba-8ed5-492f-add1-02ce9c81d34c",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Some imports for handling images\n",
+ "\n",
+ "import base64\n",
+ "from io import BytesIO\n",
+ "from PIL import Image"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "773a9f11-557e-43c9-ad50-56cbec3a0f8f",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def artist(city):\n",
+ " image_response = openai.images.generate(\n",
+ " model=DALL_E_MODEL,\n",
+ " prompt=f\"An image representing a vacation in {city}, showing tourist spots and everything unique about {city}, in a vibrant pop-art style\",\n",
+ " size=\"1024x1024\",\n",
+ " n=1,\n",
+ " response_format=\"b64_json\",\n",
+ " )\n",
+ " image_base64 = image_response.data[0].b64_json\n",
+ " image_data = base64.b64decode(image_base64)\n",
+ " return Image.open(BytesIO(image_data))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "d877c453-e7fb-482a-88aa-1a03f976b9e9",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# image = artist(\"New York City\")\n",
+ "# display(image)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "6dd849b5-31ae-4237-9072-46b210792bf9",
+ "metadata": {},
+ "source": [
+ "## Audio (NOTE - Audio is optional for this course - feel free to skip Audio if it causes trouble!)\n",
+ "\n",
+ "And let's make a function talker that uses OpenAI's speech model to generate Audio\n",
+ "\n",
+ "### Troubleshooting Audio issues\n",
+ "\n",
+ "If you have any problems running this code below (like a FileNotFound error, or a warning of a missing package), you may need to install FFmpeg, a very popular audio utility.\n",
+ "\n",
+ "**For Mac Users**\n",
+ "\n",
+ "1. Install homebrew if you don't have it already by running this in a Terminal window and following any instructions: \n",
+ "`/bin/bash -c \"$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)\"`\n",
+ "\n",
+ "2. Then install FFmpeg with `brew install ffmpeg`\n",
+ "\n",
+ "3. Verify your installation with `ffmpeg -version` and if everything is good, within Jupyter Lab do Kernel -> Restart kernel to pick up the changes\n",
+ "\n",
+ "Message me or email me at ed@edwarddonner.com with any problems!"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "4cc90e80-c96e-4dd4-b9d6-386fe2b7e797",
+ "metadata": {},
+ "source": [
+ "## To check you now have ffmpeg and can access it here\n",
+ "\n",
+ "Excecute the next cell to see if you get a version number. (Putting an exclamation mark before something in Jupyter Lab tells it to run it as a terminal command rather than python code).\n",
+ "\n",
+ "If this doesn't work, you may need to actually save and close down your Jupyter lab, and start it again from a new Terminal window (Mac) or Anaconda prompt (PC), remembering to activate the llms environment. This ensures you pick up ffmpeg.\n",
+ "\n",
+ "And if that doesn't work, please contact me!"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "7b3be0fb-1d34-4693-ab6f-dbff190afcd7",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "!ffmpeg -version\n",
+ "!ffprobe -version\n",
+ "!ffplay -version"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "d91d3f8f-e505-4e3c-a87c-9e42ed823db6",
+ "metadata": {},
+ "source": [
+ "# For Mac users - and possibly many PC users too\n",
+ "\n",
+ "This version should work fine for you. It might work for Windows users too, but you might get a Permissions error writing to a temp file. If so, see the next section!\n",
+ "\n",
+ "As always, if you have problems, please contact me! (You could also comment out the audio talker() in the later code if you're less interested in audio generation)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "ffbfe93b-5e86-4e68-ba71-b301cd5230db",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from pydub import AudioSegment\n",
+ "from pydub.playback import play\n",
+ "\n",
+ "def talker(message):\n",
+ " response = openai.audio.speech.create(\n",
+ " model=\"tts-1\",\n",
+ " voice=\"onyx\", # Also, try replacing onyx with alloy\n",
+ " input=message\n",
+ " )\n",
+ " \n",
+ " audio_stream = BytesIO(response.content)\n",
+ " audio = AudioSegment.from_file(audio_stream, format=\"mp3\")\n",
+ " play(audio)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "b88d775d-d357-4292-a1ad-5dc5ed567281",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# talker(\"Well, hi there\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "e927f333-7ed5-4625-9e5a-5e0b62f8a684",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# To transcribe an audio prompt/input\n",
+ "\n",
+ "import tempfile\n",
+ "from pydub import AudioSegment\n",
+ "from pydub.playback import play\n",
+ "\n",
+ "def transcribe_audio(audio_file):\n",
+ " \"\"\"\n",
+ " Transcribes an audio file using OpenAI's Whisper model.\n",
+ " \"\"\"\n",
+ " if audio_file is None:\n",
+ " return \"\"\n",
+ " \n",
+ " # The Gradio Audio component returns a tuple (sample_rate, numpy_array)\n",
+ " # We need to save this to a file to pass to the OpenAI API\n",
+ " with tempfile.NamedTemporaryFile(suffix=\".wav\", delete=True) as tmpfile:\n",
+ " audio = AudioSegment.from_file(audio_file, format=\"wav\")\n",
+ " audio.export(tmpfile.name, format=\"wav\")\n",
+ " \n",
+ " with open(tmpfile.name, \"rb\") as audio_file_obj:\n",
+ " transcript = openai.audio.transcriptions.create(\n",
+ " model=\"whisper-1\", \n",
+ " file=audio_file_obj\n",
+ " )\n",
+ " return transcript.text"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "f38d0d27-33bf-4992-a2e5-5dbed973cde7",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# More involved Gradio code as we're not using the preset Chat interface!\n",
+ "# Passing in inbrowser=True in the last line will cause a Gradio window to pop up immediately.\n",
+ "\n",
+ "with gr.Blocks() as ui:\n",
+ " with gr.Row():\n",
+ " chatbot = gr.Chatbot(height=500)\n",
+ " image = gr.Image(height=500)\n",
+ " with gr.Row():\n",
+ " # entry = gr.Textbox(label=\"Chat with our AI Assistant:\")\n",
+ " entry = gr.Textbox(label=\"Chat with our AI Assistant:\", scale=4)\n",
+ " submit_btn = gr.Button(\"Submit\", scale=1)\n",
+ " with gr.Row():\n",
+ " # Provide a microphone input\n",
+ " audio_input = gr.Audio(sources=[\"microphone\"], type=\"filepath\", label=\"Speak to our AI Assistant\", scale=4)\n",
+ " submit_audio_btn = gr.Button(\"Submit Audio\", scale=1)\n",
+ "\n",
+ "\n",
+ " with gr.Row():\n",
+ " languages = [\"English\", \"Swahili\", \"French\", \"Chinese\", \"German\"]\n",
+ " language_dropdown = gr.Dropdown(\n",
+ " label=\"Select a language for translation\",\n",
+ " choices=languages,\n",
+ " value=languages[0] # Default to English\n",
+ " )\n",
+ "\n",
+ " audio_options = [\"Yes\", \"No\"]\n",
+ " audio_dropdown = gr.Dropdown(\n",
+ " label=\"Select whether to respond with audio\",\n",
+ " choices=audio_options,\n",
+ " value=audio_options[1] # Default to No\n",
+ " )\n",
+ " \n",
+ " with gr.Row():\n",
+ " clear = gr.Button(\"Clear\")\n",
+ "\n",
+ " def user_message_updater(user_message, history):\n",
+ " return \"\", history + [[user_message, None]]\n",
+ "\n",
+ " def chat_with_assistant(history, target_language, use_audio_output):\n",
+ " message = history[-1][0] # Get the user's message from the last list in history\n",
+ " \n",
+ " messages = [{\"role\": \"system\", \"content\": system_message}]\n",
+ " for msg_user, msg_assistant in history:\n",
+ " messages.append({\"role\": \"user\", \"content\": msg_user})\n",
+ " if msg_assistant:\n",
+ " messages.append({\"role\": \"assistant\", \"content\": msg_assistant})\n",
+ " \n",
+ " response = openai.chat.completions.create(model=MODEL, messages=messages, tools=tools)\n",
+ "\n",
+ " image = None\n",
+ " \n",
+ " if response.choices[0].finish_reason==\"tool_calls\":\n",
+ " message = response.choices[0].message\n",
+ " response_tool, city = handle_tool_call(message)\n",
+ "\n",
+ " # Check if a city was returned from the tool call to generate an image\n",
+ " if city:\n",
+ " image = artist(city) # Generate an image to represent the target City\n",
+ "\n",
+ " messages.append(message.model_dump()) # Append message as a dictionary using .model_dump()\n",
+ " messages.append(response_tool)\n",
+ " \n",
+ " response = openai.chat.completions.create(model=MODEL, messages=messages)\n",
+ " \n",
+ " \n",
+ " final_response_content = response.choices[0].message.content\n",
+ " history[-1][1] = final_response_content # Update the last message with the assistant's reply\n",
+ "\n",
+ " if target_language != \"English\": # Assuming \"English\" is the default and no translation is needed\n",
+ " translated_response = translate_text(final_response_content, target_language)\n",
+ " final_response_content = translated_response\n",
+ "\n",
+ " history[-1][1] = final_response_content\n",
+ "\n",
+ " if use_audio_output != \"No\":\n",
+ " talker(final_response_content)\n",
+ "\n",
+ " return history, image # Return a tuple of (the updated history, an image)\n",
+ "\n",
+ " # This function ties together the transcription and the chat logic\n",
+ " def transcribe_and_chat(audio_file, history, target_language, use_audio_output):\n",
+ " if audio_file:\n",
+ " # Transcribe the audio file to text\n",
+ " transcribed_text = transcribe_audio(audio_file)\n",
+ " \n",
+ " # Update history with the transcribed text\n",
+ " new_history = history + [[transcribed_text, None]]\n",
+ " \n",
+ " # Call the main chat function with the new history\n",
+ " return chat_with_assistant(new_history, target_language, use_audio_output)\n",
+ " else:\n",
+ " return history, None\n",
+ "\n",
+ " # The event listeners are updated to be triggered by both the textbox and the new button\n",
+ " entry.submit(\n",
+ " user_message_updater,\n",
+ " inputs=[entry, chatbot],\n",
+ " outputs=[entry, chatbot],\n",
+ " queue=False\n",
+ " ).then(\n",
+ " chat_with_assistant, \n",
+ " inputs=[chatbot, language_dropdown, audio_dropdown],\n",
+ " outputs=[chatbot, image]\n",
+ " )\n",
+ "\n",
+ " submit_btn.click(\n",
+ " user_message_updater,\n",
+ " inputs=[entry, chatbot],\n",
+ " outputs=[entry, chatbot],\n",
+ " queue=False\n",
+ " ).then(\n",
+ " chat_with_assistant,\n",
+ " inputs=[chatbot, language_dropdown, audio_dropdown],\n",
+ " outputs=[chatbot, image]\n",
+ " )\n",
+ "\n",
+ " # Event listener to trigger on audio stop\n",
+ " audio_input.stop(\n",
+ " transcribe_and_chat,\n",
+ " inputs=[audio_input, chatbot, language_dropdown, audio_dropdown],\n",
+ " outputs=[chatbot, image],\n",
+ " queue=False\n",
+ " )\n",
+ "\n",
+ " submit_audio_btn.click(\n",
+ " transcribe_and_chat,\n",
+ " inputs=[audio_input, chatbot, language_dropdown, audio_dropdown],\n",
+ " outputs=[chatbot, image],\n",
+ " queue=False\n",
+ " )\n",
+ " \n",
+ " clear.click(lambda: None, inputs=None, outputs=[chatbot, image], queue=False)\n",
+ "\n",
+ "ui.launch(inbrowser=True)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "39144b88-fc11-4156-84f9-d9157ddaec47",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "3469b07d-2b9a-4409-bb1c-fbdab3248974",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.7"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/week2/community-contributions/rwothoromo/week2 EXERCISE.ipynb b/week2/community-contributions/rwothoromo/week2 EXERCISE.ipynb
new file mode 100644
index 0000000..6745272
--- /dev/null
+++ b/week2/community-contributions/rwothoromo/week2 EXERCISE.ipynb
@@ -0,0 +1,622 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "d006b2ea-9dfe-49c7-88a9-a5a0775185fd",
+ "metadata": {},
+ "source": [
+ "# Additional End of week Exercise - week 2\n",
+ "\n",
+ "Now use everything you've learned from Week 2 to build a full prototype for the technical question/answerer you built in Week 1 Exercise.\n",
+ "\n",
+ "This should include a Gradio UI, streaming, use of the system prompt to add expertise, and the ability to switch between models. Bonus points if you can demonstrate use of a tool!\n",
+ "\n",
+ "If you feel bold, see if you can add audio input so you can talk to it, and have it respond with audio. ChatGPT or Claude can help you, or email me if you have questions.\n",
+ "\n",
+ "I will publish a full solution here soon - unless someone beats me to it...\n",
+ "\n",
+ "There are so many commercial applications for this, from a language tutor, to a company onboarding solution, to a companion AI to a course (like this one!) I can't wait to see your results."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "7b624d5b-69a2-441f-9147-fde105d3d551",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# sample question to use in the Gradle UI that pops up\n",
+ "\n",
+ "question = \"\"\"\n",
+ "How good at Software Development is Elijah Rwothoromo? \\\n",
+ "He has a Wordpress site https://rwothoromo.wordpress.com/. \\\n",
+ "He also has a LinkedIn profile https://www.linkedin.com/in/rwothoromoelaijah/. \\\n",
+ "As well as a GitHub Profile https://www.github.com/rwothoromo/.\\\n",
+ "What can we learn from him?\n",
+ "\"\"\"\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "a07e7793-b8f5-44f4-aded-5562f633271a",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# imports\n",
+ "\n",
+ "import re, requests, os, json, tempfile, gradio as gr, anthropic, google.generativeai, ollama\n",
+ "from bs4 import BeautifulSoup\n",
+ "from IPython.display import Markdown, display, update_display\n",
+ "from dotenv import load_dotenv\n",
+ "from openai import OpenAI\n",
+ "from pydub import AudioSegment\n",
+ "from pydub.playback import play\n",
+ "from io import BytesIO\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "efb88276-6d74-4d94-95a2-b8ca82a4716c",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Load environment variables\n",
+ "load_dotenv()\n",
+ "\n",
+ "openai_api_key = os.getenv('OPENAI_API_KEY')\n",
+ "if openai_api_key:\n",
+ " print(f\"OpenAI API Key exists and begins {openai_api_key[:8]}\")\n",
+ "else:\n",
+ " print(\"OpenAI API Key not set\")\n",
+ "\n",
+ "\n",
+ "anthropic_api_key = os.getenv('ANTHROPIC_API_KEY')\n",
+ "if anthropic_api_key:\n",
+ " print(f\"Anthropic API Key exists and begins {anthropic_api_key[:8]}\")\n",
+ "else:\n",
+ " print(\"Anthropic API Key not set\")\n",
+ "\n",
+ "\n",
+ "google_api_key = os.getenv('GOOGLE_API_KEY')\n",
+ "if google_api_key:\n",
+ " print(f\"Google API Key exists and begins {google_api_key[:8]}\")\n",
+ "else:\n",
+ " print(\"Google API Key not set\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "484f0c3e-638d-4af7-bb9b-36faf6048f3c",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# constants\n",
+ "\n",
+ "MODEL_CLAUDE = \"claude-sonnet-4-20250514\"\n",
+ "MODEL_GEMINI = \"gemini-2.5-flash\"\n",
+ "MODEL_GPT = 'gpt-4o-mini'\n",
+ "MODEL_LLAMA = 'llama3.2'\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "2e292401-e62f-4bfc-b060-07462ad20d3d",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# system messages\n",
+ "\n",
+ "system_message = \"You are an expert assistant. Synthesize a comprehensive answer in markdown format.\"\n",
+ "system_prompt_with_url_data = \"You are an expert assistant. \\\n",
+ " Analyze the user's question and the provided text from relevant websites to synthesize a comprehensive answer in markdown format.\\\n",
+ " Provide a short summary, ignoring text that might be navigation-related.\"\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "84252e03-ccde-4ecf-975b-78227291ca5c",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# set up environment\n",
+ "\n",
+ "headers = {\n",
+ " \"User-Agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36\"\n",
+ "}\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "49396924-47c2-4f7d-baa2-9b0fece9da4a",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Website class for URLs to be scraped\n",
+ "\n",
+ "class Website:\n",
+ " def __init__(self, url):\n",
+ " \"\"\"\n",
+ " Create this Website object from the given url using the BeautifulSoup library\n",
+ " \"\"\"\n",
+ " self.url = url\n",
+ " response = requests.get(url, headers=headers)\n",
+ " soup = BeautifulSoup(response.content, 'html.parser')\n",
+ " self.title = soup.title.string if soup.title else \"No title found\"\n",
+ " for irrelevant in soup.body([\"script\", \"style\", \"img\", \"input\"]):\n",
+ " irrelevant.decompose()\n",
+ " self.text = soup.body.get_text(separator=\"\\n\", strip=True)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "c4d23747-d78a-4f36-9862-c00e1e8d9e44",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Instantiate models with API keys from environment variables\n",
+ "\n",
+ "openai = OpenAI(api_key=os.getenv(\"OPENAI_API_KEY\"))\n",
+ "claude = anthropic.Anthropic(api_key=os.getenv(\"ANTHROPIC_API_KEY\"))\n",
+ "google.generativeai.configure(api_key=os.getenv(\"GOOGLE_API_KEY\"))\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "67e150be-502e-4ba4-9586-3a2f3fae3830",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# To scrape data based on URLs in the user prompt\n",
+ "\n",
+ "def scrape_urls(text):\n",
+ " try:\n",
+ " # Extract all URLs from the text string using regular expressions\n",
+ " urls = re.findall(r'https?://[^\\s)]+', text)\n",
+ " \n",
+ " if len(urls) > 0:\n",
+ " scraped_content = []\n",
+ " for url in urls:\n",
+ " print(f\"Scraping: {url}\")\n",
+ " try:\n",
+ " site = Website(url)\n",
+ " content = f\"Content from {url}:\\n---\\n{site.text}\\n---\\n\"\n",
+ " scraped_content.append(content)\n",
+ " print(f\"Scraping done!\")\n",
+ " except Exception as e:\n",
+ " print(f\"Could not scrape {url}: {e}\")\n",
+ " scraped_content.append(f\"Could not retrieve content from {url}.\\n\")\n",
+ " \n",
+ " return \"\\n\".join(scraped_content)\n",
+ " else:\n",
+ " return None\n",
+ " except Exception as e:\n",
+ " print(f\"Error during website scraping: {e}\")\n",
+ " return \"Sorry, I encountered an error and could not complete scraping the website(s).\"\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "bd9d0511-2f78-4270-81f8-73708388dfad",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Tool definition for scrape_urls\n",
+ "\n",
+ "scraping_function = {\n",
+ " \"name\": \"scrape_urls\",\n",
+ " \"description\": \"Scrapes available URLs for data to update the User prompt. Call this whenever a customer provides a URL.\",\n",
+ " \"parameters\": {\n",
+ " \"type\": \"object\",\n",
+ " \"properties\": {\n",
+ " \"text\": {\n",
+ " \"type\": \"string\",\n",
+ " \"description\": \"The website URL or user prompt containing URLs.\"\n",
+ " }\n",
+ " },\n",
+ " \"required\": [\"text\"]\n",
+ " }\n",
+ "}\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "47733d5b-bb0a-44dd-b56d-a54677c88f80",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Instantiate the tools\n",
+ "\n",
+ "# tools = [{\"type\": \"function\", \"function\": scraping_function}]\n",
+ "\n",
+ "# Define Ollama tools\n",
+ "tools_gpt_ollama = [{\"type\": \"function\", \"function\": scraping_function}]\n",
+ "\n",
+ "# Define Claude tools\n",
+ "tools_claude = [{\n",
+ " \"name\": scraping_function[\"name\"],\n",
+ " \"description\": scraping_function[\"description\"],\n",
+ " \"input_schema\": scraping_function[\"parameters\"]\n",
+ "}]\n",
+ "\n",
+ "# Gemini tool definition must be a FunctionDeclaration object without the top-level `type` in parameters.\n",
+ "tools_gemini = [google.generativeai.protos.FunctionDeclaration(\n",
+ " name=portable_scraping_function_definition[\"name\"],\n",
+ " description=portable_scraping_function_definition[\"description\"],\n",
+ " parameters=google.generativeai.protos.Schema(\n",
+ " type=google.generativeai.protos.Type.OBJECT,\n",
+ " properties={\n",
+ " \"text\": google.generativeai.protos.Schema(\n",
+ " type=google.generativeai.protos.Type.STRING,\n",
+ " description=portable_scraping_function_definition[\"parameters\"][\"properties\"][\"text\"][\"description\"]\n",
+ " )\n",
+ " },\n",
+ " required=portable_scraping_function_definition[\"parameters\"][\"required\"]\n",
+ " )\n",
+ ")]\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "aa3fa01b-97d0-443e-b0cc-55d277878cb7",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Handle multiple tools\n",
+ "\n",
+ "def handle_tool_call(tool_call, user_message):\n",
+ " function_name = None\n",
+ " arguments = None\n",
+ " tool_call_id = None\n",
+ " \n",
+ " # Logic for different model tool call object formats\n",
+ " if isinstance(tool_call, dict) and 'function' in tool_call: # Ollama\n",
+ " function_name = tool_call['function']['name']\n",
+ " try:\n",
+ " arguments = json.loads(tool_call['function']['arguments'])\n",
+ " except (json.JSONDecodeError, TypeError):\n",
+ " arguments = {'text': tool_call['function'].get('arguments', user_message)}\n",
+ " elif hasattr(tool_call, 'function'): # GPT, Claude\n",
+ " function_name = tool_call.function.name\n",
+ " tool_call_id = getattr(tool_call, 'id', None)\n",
+ " if isinstance(tool_call.function.arguments, dict):\n",
+ " arguments = tool_call.function.arguments\n",
+ " else:\n",
+ " try:\n",
+ " arguments = json.loads(tool_call.function.arguments)\n",
+ " except (json.JSONDecodeError, TypeError):\n",
+ " arguments = {'text': tool_call.function.arguments}\n",
+ " elif hasattr(tool_call, 'name'): # Gemini\n",
+ " function_name = tool_call.name\n",
+ " arguments = tool_call.args\n",
+ "\n",
+ " # Fallback if arguments are not parsed correctly\n",
+ " if not arguments or 'text' not in arguments:\n",
+ " arguments = {'text': user_message}\n",
+ " \n",
+ " if function_name == \"scrape_urls\":\n",
+ " url_scraped_data = scrape_urls(arguments['text'])\n",
+ " response_content = json.dumps({\"url_scraped_data\": url_scraped_data})\n",
+ " else:\n",
+ " response_content = json.dumps({\"error\": f\"Unknown tool: {function_name}\"})\n",
+ "\n",
+ " response = {\n",
+ " \"role\": \"tool\",\n",
+ " \"content\": response_content,\n",
+ " \"tool_call_id\": tool_call_id\n",
+ " }\n",
+ " return response\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "14083620-1b16-4c8b-8365-c221b831e678",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Audio output\n",
+ "\n",
+ "def talker(message):\n",
+ " response = openai.audio.speech.create(\n",
+ " model=\"tts-1\",\n",
+ " voice=\"onyx\",\n",
+ " input=message\n",
+ " )\n",
+ " \n",
+ " audio_stream = BytesIO(response.content)\n",
+ " audio = AudioSegment.from_file(audio_stream, format=\"mp3\")\n",
+ " play(audio)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "f9601a49-a490-4454-bd47-591ad793dc30",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# To transcribe an audio prompt/input to text\n",
+ "\n",
+ "def transcribe_audio(audio_file):\n",
+ " if audio_file is None:\n",
+ " return \"\"\n",
+ " \n",
+ " with tempfile.NamedTemporaryFile(suffix=\".wav\", delete=True) as tmpfile:\n",
+ " audio = AudioSegment.from_file(audio_file, format=\"wav\")\n",
+ " audio.export(tmpfile.name, format=\"wav\")\n",
+ " \n",
+ " with open(tmpfile.name, \"rb\") as audio_file_obj:\n",
+ " transcript = openai.audio.transcriptions.create(\n",
+ " model=\"whisper-1\", \n",
+ " file=audio_file_obj\n",
+ " )\n",
+ " return transcript.text\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "70c79408-f5f4-424b-b96c-d07e6893af6a",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# More involved Gradio code as we're not using the preset Chat interface!\n",
+ "# Passing in inbrowser=True in the last line will cause a Gradio window to pop up immediately.\n",
+ "\n",
+ "with gr.Blocks() as ui:\n",
+ " with gr.Row():\n",
+ " chatbot = gr.Chatbot(height=500)\n",
+ " with gr.Row():\n",
+ " entry = gr.Textbox(label=\"Chat with our AI Assistant:\", scale=4)\n",
+ " submit_btn = gr.Button(\"Submit\", scale=1)\n",
+ " with gr.Row():\n",
+ " audio_input = gr.Audio(sources=[\"microphone\"], type=\"filepath\", label=\"Speak to our AI Assistant\", scale=4)\n",
+ " submit_audio_btn = gr.Button(\"Submit Audio\", scale=1)\n",
+ "\n",
+ " with gr.Row():\n",
+ " models = [\"Claude\", \"Gemini\", \"GPT\", \"Ollama\"]\n",
+ " model_dropdown = gr.Dropdown(\n",
+ " label=\"Select a model\",\n",
+ " choices=models,\n",
+ " value=models[2]\n",
+ " )\n",
+ "\n",
+ " audio_options = [\"Yes\", \"No\"]\n",
+ " audio_dropdown = gr.Dropdown(\n",
+ " label=\"Select whether to respond with audio\",\n",
+ " choices=audio_options,\n",
+ " value=audio_options[1]\n",
+ " )\n",
+ " \n",
+ " with gr.Row():\n",
+ " clear = gr.Button(\"Clear\")\n",
+ "\n",
+ " def user_message_updater(user_message, history):\n",
+ " return \"\", history + [[user_message, None]]\n",
+ "\n",
+ " def chat_with_assistant(history, target_model, use_audio_output):\n",
+ " messages = []\n",
+ " for msg_user, msg_assistant in history:\n",
+ " messages.append({\"role\": \"user\", \"content\": msg_user})\n",
+ " if msg_assistant:\n",
+ " messages.append({\"role\": \"assistant\", \"content\": msg_assistant})\n",
+ " \n",
+ " user_message = history[-1][0]\n",
+ " final_response_content = \"\"\n",
+ " \n",
+ " if target_model == \"Claude\":\n",
+ " response = claude.messages.create(\n",
+ " model=MODEL_CLAUDE,\n",
+ " max_tokens=200,\n",
+ " temperature=0.7,\n",
+ " system=system_prompt_with_url_data,\n",
+ " messages=messages,\n",
+ " tools=tools_claude,\n",
+ " )\n",
+ " \n",
+ " tool_calls = [content_block for content_block in response.content if content_block.type == \"tool_use\"]\n",
+ " if tool_calls:\n",
+ " tool_use = tool_calls[0]\n",
+ " tool_output_content = scrape_urls(tool_use.input[\"text\"])\n",
+ " \n",
+ " messages.append({\"role\": \"assistant\", \"content\": response.content})\n",
+ " messages.append({\n",
+ " \"role\": \"user\",\n",
+ " \"content\": [\n",
+ " {\n",
+ " \"type\": \"tool_result\",\n",
+ " \"tool_use_id\": tool_use.id,\n",
+ " \"content\": tool_output_content\n",
+ " }\n",
+ " ]\n",
+ " })\n",
+ "\n",
+ " response = claude.messages.create(\n",
+ " model=MODEL_CLAUDE,\n",
+ " max_tokens=200,\n",
+ " temperature=0.7,\n",
+ " system=system_prompt_with_url_data,\n",
+ " messages=messages,\n",
+ " )\n",
+ " final_response_content = response.content[0].text\n",
+ "\n",
+ " elif target_model == \"Gemini\":\n",
+ " messages_gemini = []\n",
+ " for m in history:\n",
+ " messages_gemini.append({\"role\": \"user\", \"parts\": [{\"text\": m[0]}]})\n",
+ " if m[1]:\n",
+ " messages_gemini.append({\"role\": \"model\", \"parts\": [{\"text\": m[1]}]})\n",
+ " \n",
+ " model = google.generativeai.GenerativeModel(\n",
+ " model_name=MODEL_GEMINI,\n",
+ " system_instruction=system_message,\n",
+ " tools=tools_gemini\n",
+ " )\n",
+ " \n",
+ " chat = model.start_chat(history=messages_gemini[:-1])\n",
+ " response = chat.send_message(messages_gemini[-1])\n",
+ "\n",
+ " # Check if the response is a tool call before trying to extract text\n",
+ " if response.candidates[0].content.parts[0].function_call:\n",
+ " tool_call = response.candidates[0].content.parts[0].function_call\n",
+ " response_tool = handle_tool_call(tool_call, user_message)\n",
+ "\n",
+ " tool_response_content = json.loads(response_tool[\"content\"])\n",
+ " tool_response_gemini = {\n",
+ " \"role\": \"tool\",\n",
+ " \"parts\": [{\n",
+ " \"function_response\": {\n",
+ " \"name\": tool_call.name,\n",
+ " \"response\": tool_response_content\n",
+ " }\n",
+ " }]\n",
+ " }\n",
+ " \n",
+ " # Send the tool output back and get a new response\n",
+ " response = chat.send_message(tool_response_gemini)\n",
+ " final_response_content = response.text\n",
+ " else:\n",
+ " # If the original response was not a tool call, get the text directly\n",
+ " final_response_content = response.text\n",
+ "\n",
+ " elif target_model == \"Ollama\":\n",
+ " messages_ollama = [{\"role\": \"system\", \"content\": system_message}] + messages\n",
+ " response = ollama.chat(\n",
+ " model=MODEL_LLAMA,\n",
+ " messages=messages_ollama,\n",
+ " stream=False,\n",
+ " tools=tools_gpt_ollama,\n",
+ " )\n",
+ "\n",
+ " if 'tool_calls' in response['message'] and response['message']['tool_calls']:\n",
+ " response_tool = handle_tool_call(response['message']['tool_calls'][0], user_message)\n",
+ " messages_ollama.append({\"role\": \"assistant\", \"content\": response['message']['content'], \"tool_calls\": response['message']['tool_calls']})\n",
+ " messages_ollama.append(response_tool)\n",
+ " \n",
+ " response = ollama.chat(\n",
+ " model=MODEL_LLAMA,\n",
+ " messages=messages_ollama,\n",
+ " stream=False,\n",
+ " )\n",
+ " final_response_content = response['message']['content']\n",
+ " \n",
+ " else: # Assuming GPT is default\n",
+ " messages_gpt = [{\"role\": \"system\", \"content\": system_message}] + messages\n",
+ " response_stream = openai.chat.completions.create(model=MODEL_GPT, messages=messages_gpt, stream=True, tools=tools_gpt_ollama)\n",
+ " final_response_content = \"\"\n",
+ " for chunk in response_stream:\n",
+ " content = chunk.choices[0].delta.content or \"\"\n",
+ " tool_calls_chunk = chunk.choices[0].delta.tool_calls\n",
+ " if content:\n",
+ " final_response_content += content\n",
+ " \n",
+ " if tool_calls_chunk:\n",
+ " tool_call = tool_calls_chunk[0]\n",
+ " response_tool = handle_tool_call(tool_call, user_message)\n",
+ " \n",
+ " messages_gpt.append({\"role\": \"assistant\", \"tool_calls\": [tool_call]})\n",
+ " messages_gpt.append(response_tool)\n",
+ " \n",
+ " response_stream_after_tool = openai.chat.completions.create(model=MODEL_GPT, messages=messages_gpt, stream=True)\n",
+ " for chunk_after_tool in response_stream_after_tool:\n",
+ " final_response_content += chunk_after_tool.choices[0].delta.content or \"\"\n",
+ " break\n",
+ "\n",
+ " history[-1][1] = final_response_content\n",
+ " \n",
+ " if use_audio_output != \"No\":\n",
+ " talker(final_response_content)\n",
+ "\n",
+ " return history\n",
+ "\n",
+ " def transcribe_and_chat(audio_file, history, target_model, use_audio_output):\n",
+ " if audio_file:\n",
+ " transcribed_text = transcribe_audio(audio_file)\n",
+ " new_history = history + [[transcribed_text, None]]\n",
+ " return chat_with_assistant(new_history, target_model, use_audio_output)\n",
+ " else:\n",
+ " return history\n",
+ "\n",
+ " entry.submit(\n",
+ " user_message_updater,\n",
+ " inputs=[entry, chatbot],\n",
+ " outputs=[entry, chatbot],\n",
+ " queue=False\n",
+ " ).then(\n",
+ " chat_with_assistant,\n",
+ " inputs=[chatbot, model_dropdown, audio_dropdown],\n",
+ " outputs=[chatbot]\n",
+ " )\n",
+ "\n",
+ " submit_btn.click(\n",
+ " user_message_updater,\n",
+ " inputs=[entry, chatbot],\n",
+ " outputs=[entry, chatbot],\n",
+ " queue=False\n",
+ " ).then(\n",
+ " chat_with_assistant,\n",
+ " inputs=[chatbot, model_dropdown, audio_dropdown],\n",
+ " outputs=[chatbot]\n",
+ " )\n",
+ "\n",
+ " audio_input.stop(\n",
+ " transcribe_and_chat,\n",
+ " inputs=[audio_input, chatbot, model_dropdown, audio_dropdown],\n",
+ " outputs=[chatbot],\n",
+ " queue=False\n",
+ " )\n",
+ "\n",
+ " submit_audio_btn.click(\n",
+ " transcribe_and_chat,\n",
+ " inputs=[audio_input, chatbot, model_dropdown, audio_dropdown],\n",
+ " outputs=[chatbot],\n",
+ " queue=False\n",
+ " )\n",
+ " \n",
+ " clear.click(lambda: None, inputs=None, outputs=[chatbot], queue=False)\n",
+ "\n",
+ "ui.launch(inbrowser=True)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "eb23b6cb-27af-43d6-8234-fe8295e7fe57",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.7"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/week2/community-contributions/technical-question-answerer-with-gradio-v3.ipynb b/week2/community-contributions/technical-question-answerer-with-gradio-v3.ipynb
new file mode 100644
index 0000000..f6b4146
--- /dev/null
+++ b/week2/community-contributions/technical-question-answerer-with-gradio-v3.ipynb
@@ -0,0 +1,182 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "2b57204f-3e19-4d11-8901-c0e153ad9992",
+ "metadata": {},
+ "source": [
+ "## Technical Question Answerer With Gradio\n",
+ "- Ask a technical question to a chatbot embued with multimodal capabilities.\n",
+ "- Choose between different models (e.g. OpenAI's GPT, Anthropic's Claude)."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "bd8e9bef-87ab-46d6-9393-bb308d7e5bc4",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import os\n",
+ "import requests\n",
+ "from bs4 import BeautifulSoup\n",
+ "from typing import List\n",
+ "from dotenv import load_dotenv\n",
+ "from openai import OpenAI\n",
+ "import google.generativeai\n",
+ "import anthropic\n",
+ "\n",
+ "import gradio as gr\n",
+ "import base64\n",
+ "from io import BytesIO\n",
+ "from PIL import Image\n",
+ "from IPython.display import Audio, display\n",
+ "\n",
+ "# Load environment variables in a file called .env\n",
+ "# Print the key prefixes to help with any debugging\n",
+ "load_dotenv(override=True)\n",
+ "openai_api_key = os.getenv('OPENAI_API_KEY')\n",
+ "anthropic_api_key = os.getenv('ANTHROPIC_API_KEY')\n",
+ "google_api_key = os.getenv('GOOGLE_API_KEY')\n",
+ "\n",
+ "if openai_api_key:\n",
+ " print(f\"OpenAI API Key exists and begins {openai_api_key[:8]}\")\n",
+ "else:\n",
+ " print(\"OpenAI API Key not set\")\n",
+ " \n",
+ "if anthropic_api_key:\n",
+ " print(f\"Anthropic API Key exists and begins {anthropic_api_key[:7]}\")\n",
+ "else:\n",
+ " print(\"Anthropic API Key not set\")\n",
+ "\n",
+ "if google_api_key:\n",
+ " print(f\"Google API Key exists and begins {google_api_key[:8]}\")\n",
+ "else:\n",
+ " print(\"Google API Key not set\")\n",
+ "\n",
+ "# Connect to OpenAI, Anthropic and Google; comment out the Claude or Google lines if you're not using them\n",
+ "openai = OpenAI()\n",
+ "claude = anthropic.Anthropic()\n",
+ "# google.generativeai.configure()\n",
+ "\n",
+ "system_message = \"You are a helpful assistant that explains technical contents and responds in markdown\"\n",
+ "\n",
+ "def talker(message):\n",
+ " response = openai.audio.speech.create(\n",
+ " model=\"tts-1\",\n",
+ " voice=\"onyx\",\n",
+ " input=message)\n",
+ "\n",
+ " audio_stream = BytesIO(response.content)\n",
+ " output_filename = \"output_audio.mp3\"\n",
+ " with open(output_filename, \"wb\") as f:\n",
+ " f.write(audio_stream.read())\n",
+ "\n",
+ " display(Audio(output_filename, autoplay=True))\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "20486a61-5d59-4370-b92c-3b7fec63835c",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# --- Chat functions ---\n",
+ "def chat_gpt(history):\n",
+ " messages = [{\"role\": \"system\", \"content\": system_message}] + history\n",
+ " response = openai.chat.completions.create(\n",
+ " model=\"gpt-4o-mini\",\n",
+ " messages=messages\n",
+ " )\n",
+ " reply = response.choices[0].message.content\n",
+ " history = history + [{\"role\": \"assistant\", \"content\": reply}]\n",
+ " talker(reply) # make it talk\n",
+ " return history\n",
+ "\n",
+ "\n",
+ "claude_via_openai_client = OpenAI(\n",
+ " api_key=anthropic_api_key, \n",
+ " base_url=\"https://api.anthropic.com/v1\"\n",
+ ")\n",
+ "\n",
+ "def chat_claude(history):\n",
+ " messages = [{\"role\": \"system\", \"content\": system_message}] + history\n",
+ " response = claude_via_openai_client.chat.completions.create(\n",
+ " model=\"claude-3-haiku-20240307\",\n",
+ " messages=messages\n",
+ " )\n",
+ " reply = response.choices[0].message.content\n",
+ " history = history + [{\"role\": \"assistant\", \"content\": reply}]\n",
+ " talker(reply) # make it talk\n",
+ " return history\n",
+ "\n",
+ "\n",
+ "# --- Gradio UI ---\n",
+ "with gr.Blocks() as ui:\n",
+ " with gr.Row():\n",
+ " chatbot = gr.Chatbot(height=500, type=\"messages\")\n",
+ " with gr.Row():\n",
+ " the_model = gr.Dropdown([\"GPT\", \"Claude\"], label=\"Select model\", value=\"GPT\")\n",
+ " with gr.Row():\n",
+ " entry = gr.Textbox(label=\"Chat with our AI Assistant:\")\n",
+ " with gr.Row():\n",
+ " clear = gr.Button(\"Clear\")\n",
+ "\n",
+ " def do_entry(message, history, model):\n",
+ " # add user turn\n",
+ " history = history + [{\"role\": \"user\", \"content\": message}]\n",
+ " # call selected model\n",
+ " if model == \"GPT\":\n",
+ " history = chat_gpt(history)\n",
+ " elif model == \"Claude\":\n",
+ " history = chat_claude(history)\n",
+ " return \"\", history\n",
+ "\n",
+ " entry.submit(\n",
+ " fn=do_entry,\n",
+ " inputs=[entry, chatbot, the_model],\n",
+ " outputs=[entry, chatbot] # only 2 outputs\n",
+ " )\n",
+ "\n",
+ " clear.click(lambda: [], None, chatbot, queue=False)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "13974664-2965-46b9-9c56-714c70d3f835",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "ui.launch(inbrowser=True)\n",
+ "\n",
+ "# prompt = \"\"\"\n",
+ "# Please explain what this code does and why:\n",
+ "# yield from {book.get(\"author\") for book in books if book.get(\"author\")}\n",
+ "# \"\"\""
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.13"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/week2/community-contributions/weather_agent.ipynb b/week2/community-contributions/weather_agent.ipynb
new file mode 100644
index 0000000..f89978a
--- /dev/null
+++ b/week2/community-contributions/weather_agent.ipynb
@@ -0,0 +1,370 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "id": "60761989",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import os\n",
+ "import requests\n",
+ "from openai import OpenAI\n",
+ "import gradio as gr\n",
+ "import speech_recognition as sr\n",
+ "import json\n",
+ "from dotenv import load_dotenv"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "e0b6610a",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "OpenAI API Key exists and begins sk-proj-\n",
+ "weather API Key exists\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Initialization\n",
+ "\n",
+ "load_dotenv(override=True)\n",
+ "\n",
+ "openai_api_key = os.getenv('OPENAI_API_KEY')\n",
+ "weather_api_key = os.getenv('WEATHER_API_KEY')\n",
+ "if openai_api_key:\n",
+ " print(f\"OpenAI API Key exists and begins {openai_api_key[:8]}\")\n",
+ "else:\n",
+ " print(\"OpenAI API Key not set\")\n",
+ "if weather_api_key:\n",
+ " print(\"weather API Key exists\")\n",
+ "else:\n",
+ " print(\"weather API Key not set\")\n",
+ " \n",
+ "MODEL = \"gpt-4o-mini\"\n",
+ "openai = OpenAI()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "af9d2faf",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "system_message = \"You are a helpful assistant for weather. \"\n",
+ "system_message += \"You need to fetch the current, historical and forecast the weather data using weather api and provide the response\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "2c5208d8",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def fetch_current_weather(location):\n",
+ " url = f\"http://api.weatherapi.com/v1/current.json?key={weather_api_key}&q={location}&aqi=yes\"\n",
+ " return requests.get(url).json()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "id": "8e6a12e5",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def fetch_forecast_weather(location, days=3):\n",
+ " url = f\"http://api.weatherapi.com/v1/forecast.json?key={weather_api_key}&q={location}&days={days}&aqi=yes&alerts=yes\"\n",
+ " return requests.get(url).json()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "id": "eafc468e",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def fetch_historical_weather(location, date):\n",
+ " url = f\"http://api.weatherapi.com/v1/history.json?key={weather_api_key}&q={location}&dt={date}&aqi=yes\"\n",
+ " return requests.get(url).json()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "id": "2851ed55",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Weather function used as a tool by OpenAI\n",
+ "def get_weatherapi_data(location, mode=\"current\", date=None, forecast_days=3):\n",
+ " if mode == \"current\":\n",
+ " return fetch_current_weather(location)\n",
+ " elif mode == \"forecast\":\n",
+ " return fetch_forecast_weather(location, days=forecast_days)\n",
+ " elif mode == \"historical\":\n",
+ " if not date:\n",
+ " # Default: yesterday\n",
+ " date = (datetime.date.today() - datetime.timedelta(days=1)).strftime(\"%Y-%m-%d\")\n",
+ " return fetch_historical_weather(location, date)\n",
+ " else:\n",
+ " return {\"error\": \"Unknown mode.\"}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "id": "368176c2",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Tool schema for OpenAI tool-calling\n",
+ "weatherapi_tool_schema = [\n",
+ " {\n",
+ " \"type\": \"function\",\n",
+ " \"function\": {\n",
+ " \"name\": \"get_weatherapi_data\",\n",
+ " \"description\": \"Fetches current, forecast, or historical weather data from WeatherAPI.com for a given location.\",\n",
+ " \"parameters\": {\n",
+ " \"type\": \"object\",\n",
+ " \"properties\": {\n",
+ " \"location\": {\n",
+ " \"type\": \"string\",\n",
+ " \"description\": \"Name of the city, region, or coordinates.\"\n",
+ " },\n",
+ " \"mode\": {\n",
+ " \"type\": \"string\",\n",
+ " \"enum\": [\"current\", \"forecast\", \"historical\"],\n",
+ " \"description\": \"Type of weather data required.\"\n",
+ " },\n",
+ " \"date\": {\n",
+ " \"type\": \"string\",\n",
+ " \"description\": \"Date for historical data in YYYY-MM-DD format. Only needed if mode is 'historical'.\"\n",
+ " },\n",
+ " \"forecast_days\": {\n",
+ " \"type\": \"integer\",\n",
+ " \"description\": \"Number of forecast days (1-10). Only needed if mode is 'forecast'.\"\n",
+ " }\n",
+ " },\n",
+ " \"required\": [\"location\", \"mode\"]\n",
+ " }\n",
+ " }\n",
+ " }\n",
+ "]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "id": "bd9c4d38",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def audio_to_text(audio_filepath):\n",
+ " if audio_filepath is None or audio_filepath == \"\":\n",
+ " return \"\"\n",
+ " recognizer = sr.Recognizer()\n",
+ " try:\n",
+ " with sr.AudioFile(audio_filepath) as source:\n",
+ " audio = recognizer.record(source)\n",
+ " try:\n",
+ " transcript = recognizer.recognize_google(audio)\n",
+ " return transcript\n",
+ " except sr.UnknownValueError:\n",
+ " return \"\"\n",
+ " except sr.RequestError as e:\n",
+ " return f\"Speech recognition service error: {e}\"\n",
+ " except Exception as e:\n",
+ " return f\"Error opening audio file: {str(e)}\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "id": "61c5de82",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def chat_agent(city, mode, date, forecast_days, audio=None):\n",
+ " user_query = city\n",
+ " if audio:\n",
+ " spoken_text = audio_to_text(audio)\n",
+ " print(\"Recognized speech:\", spoken_text)\n",
+ " if spoken_text and spoken_text.strip().lower() != \"flic en flac\":\n",
+ " user_query = spoken_text\n",
+ " else:\n",
+ " if not city.strip():\n",
+ " return \"Sorry, I could not recognize your speech. Please try again or type your city.\"\n",
+ "\n",
+ " if not user_query.strip():\n",
+ " return \"Please provide a location by text or speech.\"\n",
+ "\n",
+ " # Compose tool function arguments as the LLM would\n",
+ " args = {\n",
+ " \"location\": user_query,\n",
+ " \"mode\": mode\n",
+ " }\n",
+ " if mode == \"historical\" and date:\n",
+ " args[\"date\"] = date\n",
+ " if mode == \"forecast\":\n",
+ " try:\n",
+ " n_days = int(forecast_days)\n",
+ " except:\n",
+ " n_days = 3\n",
+ " args[\"forecast_days\"] = n_days\n",
+ "\n",
+ " openai.api_key = openai_api_key\n",
+ "\n",
+ " # LLM call for tool use\n",
+ " response = openai.chat.completions.create(\n",
+ " model=\"gpt-4-0613\",\n",
+ " messages=[{\"role\": \"user\", \"content\": f\"Get me {mode} weather for {user_query}\"+(f' on {date}' if date and mode==\"historical\" else \"\")+(f' for {forecast_days} days' if forecast_days and mode==\"forecast\" else \"\")}],\n",
+ " tools=weatherapi_tool_schema,\n",
+ " tool_choice={\"type\": \"function\", \"function\": {\"name\": \"get_weatherapi_data\", \"arguments\": json.dumps(args)}}\n",
+ " )\n",
+ " message = response.choices[0].message\n",
+ "\n",
+ " if hasattr(message, \"tool_calls\") and message.tool_calls:\n",
+ " tool_call = message.tool_calls[0]\n",
+ " args2 = json.loads(tool_call.function.arguments) # not really needed, already have args\n",
+ " location = args2.get(\"location\", user_query)\n",
+ " mode = args2.get(\"mode\", mode)\n",
+ " date = args2.get(\"date\", date)\n",
+ " forecast_days = args2.get(\"forecast_days\", forecast_days)\n",
+ " weather_data = get_weatherapi_data(location, mode, date, forecast_days)\n",
+ " tool_result = f\"Weather data (mode={mode}) for {location}:\\n{json.dumps(weather_data, indent=2)[:3000]}\"\n",
+ " followup = openai.chat.completions.create(\n",
+ " model=\"gpt-4-0613\",\n",
+ " messages=[\n",
+ " {\"role\": \"user\", \"content\": f\"Get me {mode} weather for {location}\"},\n",
+ " message,\n",
+ " {\n",
+ " \"role\": \"tool\",\n",
+ " \"tool_call_id\": tool_call.id,\n",
+ " \"content\": tool_result\n",
+ " }\n",
+ " ]\n",
+ " )\n",
+ " answer = followup.choices[0].message.content.strip()\n",
+ " return answer\n",
+ " else:\n",
+ " return getattr(message, \"content\", \"\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "id": "44071389",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def update_date_visibility(mode):\n",
+ " return gr.update(visible=(mode==\"historical\"))\n",
+ "\n",
+ "def update_days_visibility(mode):\n",
+ " return gr.update(visible=(mode==\"forecast\"))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "id": "618a5494",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "* Running on local URL: http://127.0.0.1:7861\n",
+ "* To create a public link, set `share=True` in `launch()`.\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ ""
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/plain": []
+ },
+ "execution_count": 13,
+ "metadata": {},
+ "output_type": "execute_result"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Recognized speech: Error opening audio file: FLAC conversion utility not available - consider installing the FLAC command line application by running `apt-get install flac` or your operating system's equivalent\n"
+ ]
+ }
+ ],
+ "source": [
+ "with gr.Blocks() as demo:\n",
+ " gr.Markdown(\"## Weather Chat Agent (Current, Historical, Forecast)\")\n",
+ "\n",
+ " with gr.Row():\n",
+ " city_input = gr.Textbox(label=\"City/Location\")\n",
+ " mode_input = gr.Dropdown(\n",
+ " [\"current\", \"historical\", \"forecast\"],\n",
+ " value=\"current\",\n",
+ " label=\"Weather Mode\")\n",
+ " with gr.Row():\n",
+ " date_input = gr.Textbox(label=\"Date for historical (YYYY-MM-DD)\", visible=False)\n",
+ " days_input = gr.Textbox(label=\"Forecast Days (for forecast)\", value=\"3\", visible=False)\n",
+ " audio_input = gr.Audio(type=\"filepath\", format=\"wav\", label=\"Or Speak your City/Location (optional)\")\n",
+ " output_box = gr.Textbox(label=\"Weather Info\", lines=8)\n",
+ " btn = gr.Button(\"Get Weather\")\n",
+ "\n",
+ " # Show/hide date and days inputs based on dropdown\n",
+ " mode_input.change(update_date_visibility, mode_input, date_input)\n",
+ " mode_input.change(update_days_visibility, mode_input, days_input)\n",
+ " btn.click(\n",
+ " chat_agent,\n",
+ " [city_input, mode_input, date_input, days_input, audio_input],\n",
+ " output_box\n",
+ " )\n",
+ "\n",
+ "demo.launch()"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "llms",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.13"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/week2/community-contributions/week2_exercise_by_abrar.ipynb b/week2/community-contributions/week2_exercise_by_abrar.ipynb
new file mode 100644
index 0000000..3141217
--- /dev/null
+++ b/week2/community-contributions/week2_exercise_by_abrar.ipynb
@@ -0,0 +1,490 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "ddfa9ae6-69fe-444a-b994-8c4c5970a7ec",
+ "metadata": {},
+ "source": [
+ "# Project - Cricket Anaylyst AI Assistant\n",
+ "\n",
+ "Cricket Analyst AI Assistant is an intelligent tool that analyzes cricket data to compare players, evaluate performances across formats, and provide insightful statistics. It processes historical and recent match data to deliver easy-to-understand summaries, helping fans, analysts, and coaches make informed decisions."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "8b50bbe2-c0b1-49c3-9a5c-1ba7efa2bcb4",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# imports\n",
+ "\n",
+ "import os\n",
+ "import json\n",
+ "from dotenv import load_dotenv\n",
+ "from openai import OpenAI\n",
+ "import gradio as gr\n",
+ "import speech_recognition as sr\n",
+ "import pandas as pd\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "747e8786-9da8-4342-b6c9-f5f69c2e22ae",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Initialization\n",
+ "\n",
+ "load_dotenv(override=True)\n",
+ "\n",
+ "openai_api_key = os.getenv('OPENAI_API_KEY')\n",
+ "if openai_api_key:\n",
+ " print(f\"OpenAI API Key exists and begins {openai_api_key[:8]}\")\n",
+ "else:\n",
+ " print(\"OpenAI API Key not set\")\n",
+ " \n",
+ "MODEL = \"gpt-4o-mini\"\n",
+ "openai = OpenAI()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "0a521d84-d07c-49ab-a0df-d6451499ed97",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "system_message = \"\"\"\n",
+ "You are a Cricket Analyst AI with deep knowledge of cricket statistics and match analysis.\n",
+ "When comparing players, call the `analyze_cricket` tool to get factual data before answering.\n",
+ "\"\"\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "d7be34a6-7288-43b0-ad4e-bbed836cb786",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Sample cricket stats as a list of dicts\n",
+ "cricket_data = [\n",
+ " {\"Player\": \"Virat Kohli\", \"Format\": \"ODI\", \"Year\": 2023, \"Runs\": 1377, \"Matches\": 27, \"Average\": 57.37, \"StrikeRate\": 93.21},\n",
+ " {\"Player\": \"Virat Kohli\", \"Format\": \"ODI\", \"Year\": 2022, \"Runs\": 765, \"Matches\": 20, \"Average\": 42.50, \"StrikeRate\": 88.40},\n",
+ " {\"Player\": \"Virat Kohli\", \"Format\": \"ODI\", \"Year\": 2021, \"Runs\": 560, \"Matches\": 15, \"Average\": 40.00, \"StrikeRate\": 90.10},\n",
+ " {\"Player\": \"Babar Azam\", \"Format\": \"ODI\", \"Year\": 2023, \"Runs\": 1454, \"Matches\": 26, \"Average\": 62.00, \"StrikeRate\": 89.50},\n",
+ " {\"Player\": \"Babar Azam\", \"Format\": \"ODI\", \"Year\": 2022, \"Runs\": 1198, \"Matches\": 18, \"Average\": 66.55, \"StrikeRate\": 92.00},\n",
+ " {\"Player\": \"Babar Azam\", \"Format\": \"ODI\", \"Year\": 2021, \"Runs\": 949, \"Matches\": 15, \"Average\": 67.78, \"StrikeRate\": 90.50},\n",
+ " {\"Player\": \"Joe Root\", \"Format\": \"Test\", \"Year\": 2025, \"Runs\": 949, \"Matches\": 15, \"Average\": 69.78, \"StrikeRate\": 95.50},\n",
+ " {\"Player\": \"Joe Root\", \"Format\": \"Test\", \"Year\": 2024, \"Runs\": 2025, \"Matches\": 22, \"Average\": 68.78, \"StrikeRate\": 90.50},\n",
+ " {\"Player\": \"Harry Brook\", \"Format\": \"Test\", \"Year\": 2025, \"Runs\": 1056, \"Matches\": 16, \"Average\": 67.78, \"StrikeRate\": 95.50},\n",
+ " {\"Player\": \"Harry Brook\", \"Format\": \"Test\", \"Year\": 2024, \"Runs\": 2200, \"Matches\": 21, \"Average\": 71.78, \"StrikeRate\": 98.50},\n",
+ "\n",
+ "]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "36bedabf-a0a7-4985-ad8e-07ed6a55a3a4",
+ "metadata": {},
+ "source": [
+ "## Tools\n",
+ "\n",
+ "Tools starts from here. \n",
+ "For this notebook, I have just wrote one Tool, you can add multiple tools for your agent."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "46a0a260-b11b-4bde-ab80-911a81e2c281",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def analyze_cricket(data_list, player1, match_format=\"ODI\", years=3):\n",
+ " \"\"\"\n",
+ " Return cricket players' performances using an in-memory list of dicts.\n",
+ " \"\"\"\n",
+ " print(\"Tool 'analyze_cricket' is called\")\n",
+ " df = pd.DataFrame(data_list)\n",
+ " latest_year = df['Year'].max()\n",
+ " min_year = latest_year - years + 1\n",
+ "\n",
+ " filtered = df[\n",
+ " (df['Format'].str.upper() == match_format.upper()) &\n",
+ " (df['Year'] >= min_year) &\n",
+ " (df['Player'].isin([player1]))\n",
+ " ]\n",
+ " if filtered.empty:\n",
+ " return {\"error\": f\"No data found for {player1} in {match_format} for last {years} years.\"}\n",
+ "\n",
+ " summary = filtered.groupby(\"Player\").agg({\n",
+ " \"Matches\": \"sum\",\n",
+ " \"Runs\": \"sum\",\n",
+ " \"Average\": \"mean\",\n",
+ " \"StrikeRate\": \"mean\"\n",
+ " }).round(2)\n",
+ "\n",
+ " return summary.reset_index().to_dict(orient=\"records\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "cbfd413e-b5d4-42bd-b86f-ed9b4ee360eb",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Example usage:\n",
+ "result = analyze_cricket(cricket_data, \"Virat Kohli\", \"ODI\", 3)\n",
+ "print(result)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "c92699c1-802b-4948-a654-df89e0c19adb",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Tool definition \n",
+ "analyze_cricket_functions = {\n",
+ " \"name\": \"analyze_cricket\",\n",
+ " \"description\": \"Compare two cricket players' performances over the last N years.\",\n",
+ " \"parameters\": {\n",
+ " \"type\": \"object\",\n",
+ " \"properties\": {\n",
+ " \"player1\": {\"type\": \"string\", \"description\": \"Name of first player\"},\n",
+ " # \"player2\": {\"type\": \"string\", \"description\": \"Name of second player\"},\n",
+ " \"match_format\": {\"type\": \"string\", \"enum\": [\"ODI\", \"Test\", \"T20\"], \"description\": \"Format of the match\"},\n",
+ " \"years\": {\"type\": \"integer\", \"description\": \"Number of years to compare\"}\n",
+ " },\n",
+ " \"required\": [\"player1\"]\n",
+ " }\n",
+ " }"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "bdca8679-935f-4e7f-97e6-e71a4d4f228c",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# And this is included in a list of tools:\n",
+ "\n",
+ "tools = [{\"type\": \"function\", \"function\": analyze_cricket_functions}]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "c3d3554f-b4e3-4ce7-af6f-68faa6dd2340",
+ "metadata": {},
+ "source": [
+ "## Getting OpenAI to use our Tool\n",
+ "\n",
+ "There's some fiddly stuff to allow OpenAI \"to call our tool\"\n",
+ "\n",
+ "What we actually do is give the LLM the opportunity to inform us that it wants us to run the tool.\n",
+ "\n",
+ "Here's how the new chat function looks:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "ce9b0744-9c78-408d-b9df-9f6fd9ed78cf",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def chat(message, history):\n",
+ " messages = [{\"role\": \"system\", \"content\": system_message}] + history + [{\"role\": \"user\", \"content\": message}]\n",
+ " response = openai.chat.completions.create(model=MODEL, messages=messages, tools=tools)\n",
+ "\n",
+ " if response.choices[0].finish_reason==\"tool_calls\":\n",
+ " message = response.choices[0].message\n",
+ " messages.append(message)\n",
+ " for tool_call in message.tool_calls: \n",
+ " response, player1= handle_tool_call(tool_call)\n",
+ " messages.append(response)\n",
+ " response = openai.chat.completions.create(model=MODEL, messages=messages)\n",
+ " return response.choices[0].message.content"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "b0992986-ea09-4912-a076-8e5603ee631f",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# We have to write that function handle_tool_call:\n",
+ "\n",
+ "def handle_tool_call(tool_call):\n",
+ " # tool_call = message.tool_calls[0]\n",
+ " # print(\"tool_call.id\", tool_call.id)\n",
+ " arguments = json.loads(tool_call.function.arguments)\n",
+ " print(\"arguments\", arguments)\n",
+ " player1 = arguments.get('player1')\n",
+ " # player2 = arguments.get('player2')\n",
+ " match_format = arguments.get('match_format', 'ODI')\n",
+ " years = arguments.get('years', 3)\n",
+ " result = analyze_cricket(cricket_data, player1, match_format, years)\n",
+ " print(\"result from analyze_cricket function: \", tool_call.id, result)\n",
+ " response = {\n",
+ " \"role\": \"tool\",\n",
+ " \"content\": json.dumps(result),\n",
+ " \"tool_call_id\": tool_call.id\n",
+ " }\n",
+ " return response, player1"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "924e7225-b76d-4518-abad-5bea5c356cf8",
+ "metadata": {},
+ "source": [
+ "# Sample User prompt\n",
+ "\n",
+ "1. ### Compare Babar and Virat in ODI matches over the last 3 years.\n",
+ "Here \n",
+ "Player1 is Babar\n",
+ "Player2 is Virat\n",
+ "match_format is ODI\n",
+ "years is 3\n",
+ "\n",
+ "\n",
+ "2. ### can you please give me the comparison of Virat and babar?\n",
+ "Here, you are not provided the info on the format and number of years. In this case, the function will pick the default values for the match format, which is ODI, and the years, which is 3.\n",
+ "\n",
+ "\n",
+ "3. ### Compare Rizwan and Babar in ODI Matches over the last years.\n",
+ "The given data is not available in the above data list. \n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "f4be8a71-b19e-4c2f-80df-f59ff2661f14",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "gr.ChatInterface(fn=chat, type=\"messages\").launch(inbrowser=True)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "473e5b39-da8f-4db1-83ae-dbaca2e9531e",
+ "metadata": {},
+ "source": [
+ "# Let's go multi-modal!!\n",
+ "\n",
+ "We can use DALL-E-3, the image generation model behind GPT-4o, to make us some images\n",
+ "\n",
+ "Let's put this in a function called artist.\n",
+ "\n",
+ "### Price alert: each time I generate an image it costs about 4 cents - don't go crazy with images!"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "2c27c4ba-8ed5-492f-add1-02ce9c81d34c",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Some imports for handling images\n",
+ "\n",
+ "import base64\n",
+ "from io import BytesIO\n",
+ "from PIL import Image"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "773a9f11-557e-43c9-ad50-56cbec3a0f8f",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def artist(player_names):\n",
+ " if len(player_names) <2 or len(player_names) > 2:\n",
+ " return None\n",
+ " player1 = player_names[0]\n",
+ " player2 = player_names[1]\n",
+ " image_response = openai.images.generate(\n",
+ " model=\"dall-e-3\",\n",
+ " prompt=f\"An image representing a comparison of {player1} and {player2}, showing their country flags and bowling or batting style\",\n",
+ " size=\"1024x1024\",\n",
+ " n=1,\n",
+ " response_format=\"b64_json\",\n",
+ " )\n",
+ " image_base64 = image_response.data[0].b64_json\n",
+ " image_data = base64.b64decode(image_base64)\n",
+ " return Image.open(BytesIO(image_data))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "d877c453-e7fb-482a-88aa-1a03f976b9e9",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "image = artist([\"Babar\", \"root\"])\n",
+ "display(image)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "ffbfe93b-5e86-4e68-ba71-b301cd5230db",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from pydub import AudioSegment\n",
+ "from pydub.playback import play\n",
+ "\n",
+ "def talker(message):\n",
+ " response = openai.audio.speech.create(\n",
+ " model=\"tts-1\",\n",
+ " voice=\"onyx\", # Also, try replacing onyx with alloy\n",
+ " input=message\n",
+ " )\n",
+ " \n",
+ " audio_stream = BytesIO(response.content)\n",
+ " audio = AudioSegment.from_file(audio_stream, format=\"mp3\")\n",
+ " play(audio)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "b88d775d-d357-4292-a1ad-5dc5ed567281",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "talker(\"Well, hi there\") # For testing purposes"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "1d48876d-c4fa-46a8-a04f-f9fadf61fb0d",
+ "metadata": {},
+ "source": [
+ "# Our Agent Framework\n",
+ "\n",
+ "The term 'Agentic AI' and Agentization is an umbrella term that refers to a number of techniques, such as:\n",
+ "\n",
+ "1. Breaking a complex problem into smaller steps, with multiple LLMs carrying out specialized tasks\n",
+ "2. The ability for LLMs to use Tools to give them additional capabilities\n",
+ "3. The 'Agent Environment' which allows Agents to collaborate\n",
+ "4. An LLM can act as the Planner, dividing bigger tasks into smaller ones for the specialists\n",
+ "5. The concept of an Agent having autonomy / agency, beyond just responding to a prompt - such as Memory\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "ba820c95-02f5-499e-8f3c-8727ee0a6c0c",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def chat(history, image_choice):\n",
+ " messages = [{\"role\": \"system\", \"content\": system_message}] + history\n",
+ " response = openai.chat.completions.create(model=MODEL, messages=messages, tools=tools)\n",
+ " image = None\n",
+ " \n",
+ " if response.choices[0].finish_reason==\"tool_calls\":\n",
+ " message = response.choices[0].message\n",
+ " messages.append(message)\n",
+ " player_names = []\n",
+ " for tool_call in message.tool_calls:\n",
+ " response, player1= handle_tool_call(tool_call)\n",
+ " player_names.append(player1)\n",
+ " messages.append(response)\n",
+ " if image_choice.lower() == 'yes':\n",
+ " image = artist(player_names)\n",
+ " else:\n",
+ " print(\"Image value is NO\", image_choice)\n",
+ " \n",
+ " response = openai.chat.completions.create(model=MODEL, messages=messages)\n",
+ " \n",
+ " reply = response.choices[0].message.content\n",
+ " history += [{\"role\":\"assistant\", \"content\":reply}]\n",
+ "\n",
+ " # Comment out or delete the next line if you'd rather skip Audio for now..\n",
+ " talker(reply)\n",
+ " \n",
+ " return history, image"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "f38d0d27-33bf-4992-a2e5-5dbed973cde7",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# More involved Gradio code as we're not using the preset Chat interface!\n",
+ "# Passing in inbrowser=True in the last line will cause a Gradio window to pop up immediately.\n",
+ "\n",
+ "with gr.Blocks() as ui:\n",
+ " gr.Markdown(\"### ๐ Cricket Analyst AI Assistant\")\n",
+ " with gr.Row():\n",
+ " chatbot = gr.Chatbot(height=500, type=\"messages\")\n",
+ " image_output = gr.Image(height=500)\n",
+ "\n",
+ " with gr.Row():\n",
+ " image_dropdown = gr.Dropdown(\n",
+ " choices=[\"Yes\", \"No\"],\n",
+ " label=\"Do you want image?\"\n",
+ " )\n",
+ " with gr.Row():\n",
+ " entry = gr.Textbox(label=\"Chat with our AI Assistant:\")\n",
+ " with gr.Row():\n",
+ " clear = gr.Button(\"Clear\")\n",
+ "\n",
+ " def do_entry(message, history, image_choice):\n",
+ " history += [{\"role\": \"user\", \"content\": message}]\n",
+ " return \"\", history, image_choice\n",
+ "\n",
+ " entry.submit(\n",
+ " do_entry, \n",
+ " inputs=[entry, chatbot, image_dropdown], \n",
+ " outputs=[entry, chatbot, image_dropdown]\n",
+ " ).then(\n",
+ " chat, \n",
+ " inputs=[chatbot, image_dropdown], \n",
+ " outputs=[chatbot, image_output]\n",
+ " )\n",
+ " clear.click(lambda: None, inputs=None, outputs=chatbot, queue=False)\n",
+ "\n",
+ "ui.launch(inbrowser=True)"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.13"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/week2/community-contributions/week2_tennis.ipynb b/week2/community-contributions/week2_tennis.ipynb
new file mode 100644
index 0000000..bed232f
--- /dev/null
+++ b/week2/community-contributions/week2_tennis.ipynb
@@ -0,0 +1,331 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "ad6e5ed4-a38d-46a6-8bb5-32d68bd0b9e5",
+ "metadata": {},
+ "source": [
+ "End of week 2 exercise"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 38,
+ "id": "8f45fbfa-eaaa-4eb8-841e-83b068b80507",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# imports\n",
+ "\n",
+ "import os\n",
+ "import json\n",
+ "import gradio as gr\n",
+ "import base64\n",
+ "from dotenv import load_dotenv\n",
+ "from openai import OpenAI\n",
+ "from io import BytesIO\n",
+ "from PIL import Image\n",
+ "from pydub import AudioSegment\n",
+ "from pydub.playback import play\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 39,
+ "id": "451b72a6-1e6c-476a-8431-1c30c5cd9fb8",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "API key found and looks good so far!\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Initialization\n",
+ "\n",
+ "load_dotenv(override=True)\n",
+ "openai_api_key = os.getenv('OPENAI_API_KEY')\n",
+ "if openai_api_key:\n",
+ " print(\"API key found and looks good so far!\")\n",
+ "else:\n",
+ " print(\"No API key was found!\")\n",
+ "\n",
+ "MODEL = \"gpt-4o-mini\"\n",
+ "openai = OpenAI()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 40,
+ "id": "29fa6a53-4b57-47ea-89a1-640020e603b4",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "system_message = (\n",
+ " \"You are a helpful tennis coach who answers questions about tennis rules, \"\n",
+ " \"players, strategies, training, and equipment.\"\n",
+ " \"Give short, courteous answers, no more than 2 sentence.\"\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 41,
+ "id": "e9b255e7-02d8-4350-b5d4-e645d1fc90d3",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Translation\n",
+ "\n",
+ "LANG_CODES = {\n",
+ " \"English\": \"en\",\n",
+ " \"Spanish\": \"es\",\n",
+ " \"French\": \"fr\"\n",
+ "}\n",
+ "\n",
+ "def translate_text(text, target_language=\"en\"):\n",
+ " messages = [\n",
+ " {\"role\": \"system\", \"content\": f\"You are a translator. Translate the following text to {target_language}\"},\n",
+ " {\"role\": \"user\", \"content\": text}\n",
+ " ]\n",
+ " response = openai.chat.completions.create(model=MODEL, messages=messages)\n",
+ " return response.choices[0].message.content\n",
+ "\n",
+ "def tennis_info_tool(query):\n",
+ " if \"top\" in query.lower():\n",
+ " return \"Top male players: Djokovic, Nadal, Federer. Top female players: Barty, Sabalenka, Swiatek.\"\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 42,
+ "id": "b44b147c-bfba-4137-9ecb-d5538f08a46d",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Image\n",
+ "def generate_tennis_image(prompt):\n",
+ " image_response = openai.images.generate(\n",
+ " model=\"dall-e-3\",\n",
+ " prompt=f\"Tennis scene: {prompt}, realistic and detailed, vibrant colors\",\n",
+ " size=\"1024x1024\",\n",
+ " n=1,\n",
+ " response_format=\"b64_json\",\n",
+ " )\n",
+ " image_base64 = image_response.data[0].b64_json\n",
+ " image_data = base64.b64decode(image_base64)\n",
+ " return Image.open(BytesIO(image_data))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 43,
+ "id": "bbfeff3b-0c73-4b2c-a6da-3cac27d8fedd",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Audio\n",
+ "\n",
+ "def talker(message):\n",
+ " response = openai.audio.speech.create(\n",
+ " model=\"tts-1\",\n",
+ " voice=\"onyx\",\n",
+ " input=message\n",
+ " )\n",
+ "\n",
+ " audio_stream = BytesIO(response.content)\n",
+ " audio = AudioSegment.from_file(audio_stream, format=\"mp3\")\n",
+ " play(audio)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "fa338332-3dfc-4b95-8367-65853a8d2793",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 44,
+ "id": "574d672e-0a75-4af9-b3ad-8dc2dec4e607",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def chat(history, user_message, target_language=\"English\", use_audio=False, generate_image=False):\n",
+ " image = None\n",
+ "\n",
+ " if any(keyword in user_message.lower() for keyword in [\"top\", \"players\"]):\n",
+ " reply = tennis_info_tool(user_message)\n",
+ " else:\n",
+ " messages = [{\"role\": \"system\", \"content\": system_message}] + history\n",
+ " response = openai.chat.completions.create(model=MODEL, messages=messages)\n",
+ " reply = response.choices[0].message.content\n",
+ " \n",
+ " if target_language != \"English\":\n",
+ " code = LANG_CODES.get(target_language, \"en\")\n",
+ " reply = translate_text(reply, code)\n",
+ "\n",
+ " history.append({\"role\": \"assistant\", \"content\": reply})\n",
+ " \n",
+ " if use_audio:\n",
+ " talker(reply)\n",
+ "\n",
+ " if generate_image:\n",
+ " image = generate_tennis_image(reply)\n",
+ " return history, image"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "eaf4f47e-d20b-41f8-94b5-4aef0302731b",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 45,
+ "id": "9110343f-0efa-49bc-8d5f-498fd690dd14",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "* Running on local URL: http://127.0.0.1:7869\n",
+ "* To create a public link, set `share=True` in `launch()`.\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ ""
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/plain": []
+ },
+ "execution_count": 45,
+ "metadata": {},
+ "output_type": "execute_result"
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Input #0, wav, from '/var/folders/73/0s09hh2n48q7s14tld64q3rh0000gn/T/tmp4hoe_x5n.wav':\n",
+ " Duration: 00:00:06.55, bitrate: 384 kb/s\n",
+ " Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 24000 Hz, 1 channels, s16, 384 kb/s\n",
+ " 6.45 M-A: -0.000 fd= 0 aq= 0KB vq= 0KB sq= 0B "
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Input #0, wav, from '/var/folders/73/0s09hh2n48q7s14tld64q3rh0000gn/T/tmp2mxw0wth.wav':\n",
+ " Duration: 00:00:04.61, bitrate: 384 kb/s\n",
+ " Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 24000 Hz, 1 channels, s16, 384 kb/s\n",
+ " 4.48 M-A: 0.000 fd= 0 aq= 0KB vq= 0KB sq= 0B "
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Gradio\n",
+ "\n",
+ "with gr.Blocks() as ui:\n",
+ " with gr.Row():\n",
+ " chatbot = gr.Chatbot(height=500, type=\"messages\")\n",
+ " image_output = gr.Image(height=500)\n",
+ " with gr.Row():\n",
+ " entry = gr.Textbox(label=\"Ask your Tennis Coach:\", placeholder=\"Type and press Enter...\")\n",
+ " target_lang_dropdown = gr.Dropdown(\n",
+ " choices=[\"English\", \"Spanish\", \"French\"],\n",
+ " value=\"English\",\n",
+ " label=\"Translate to:\"\n",
+ " )\n",
+ " audio_toggle = gr.Checkbox(label=\"Play audio\", value=False)\n",
+ " image_toggle = gr.Checkbox(label=\"Generate image for this reply\", value=True)\n",
+ " with gr.Row():\n",
+ " clear = gr.Button(\"Clear\")\n",
+ " \n",
+ " def add_user_message(message, history):\n",
+ " history.append({\"role\": \"user\", \"content\": message})\n",
+ " return \"\", history\n",
+ "\n",
+ " def chat_response(history, message, target_language, use_audio, generate_image):\n",
+ " history, image = chat(history, message, target_language, use_audio, generate_image)\n",
+ " return history, image\n",
+ "\n",
+ " entry.submit(\n",
+ " add_user_message,\n",
+ " inputs=[entry, chatbot],\n",
+ " outputs=[entry, chatbot]).then(\n",
+ " chat_response,\n",
+ " inputs=[chatbot, entry, target_lang_dropdown, audio_toggle, image_toggle],\n",
+ " outputs=[chatbot, image_output]\n",
+ " )\n",
+ "\n",
+ " clear.click(lambda: (None, None, None), inputs=None, outputs=[chatbot, image_output, entry], queue=False)\n",
+ "\n",
+ "ui.launch(inbrowser=True)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "f0359c29-22aa-4156-9afa-8c63c02ca747",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.13"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/week3/community-contributions/AI_Minute_Taker.ipynb b/week3/community-contributions/AI_Minute_Taker.ipynb
new file mode 100644
index 0000000..d189aef
--- /dev/null
+++ b/week3/community-contributions/AI_Minute_Taker.ipynb
@@ -0,0 +1,186 @@
+{
+ "nbformat": 4,
+ "nbformat_minor": 0,
+ "metadata": {
+ "colab": {
+ "provenance": [],
+ "gpuType": "T4"
+ },
+ "kernelspec": {
+ "name": "python3",
+ "display_name": "Python 3"
+ },
+ "language_info": {
+ "name": "python"
+ },
+ "accelerator": "GPU"
+ },
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "source": [
+ "# Create meeting minutes from an Audio file\n",
+ "For this project, the UI allows you to either upload meeting minutes, or record something of your own!"
+ ],
+ "metadata": {
+ "id": "MYOLn_FzYAF4"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "# --- Install deps ---\n",
+ "!pip install -q gradio torch==2.5.1+cu124 torchvision==0.20.1+cu124 torchaudio==2.5.1+cu124 --index-url https://download.pytorch.org/whl/cu124\n",
+ "!pip install -q requests bitsandbytes==0.46.0 transformers==4.48.3 accelerate==1.3.0 openai"
+ ],
+ "metadata": {
+ "id": "M01YO75ITfXF"
+ },
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "# --- Imports ---\n",
+ "import gradio as gr\n",
+ "import torch\n",
+ "from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer, BitsAndBytesConfig\n",
+ "from openai import OpenAI\n",
+ "from huggingface_hub import login\n",
+ "from google.colab import userdata\n",
+ "from google.colab import drive\n",
+ "import os"
+ ],
+ "metadata": {
+ "id": "DGE8_oAwZJBo"
+ },
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "# --- Constants ---\n",
+ "AUDIO_MODEL = \"whisper-1\"\n",
+ "LLAMA = \"meta-llama/Meta-Llama-3.1-8B-Instruct\""
+ ],
+ "metadata": {
+ "id": "JPu-aNxDTmDi"
+ },
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "# --- Auth ---\n",
+ "# assumes Colab userdata or your own env vars\n",
+ "hf_token = userdata.get('HF_TOKEN')\n",
+ "login(hf_token, add_to_git_credential=True)"
+ ],
+ "metadata": {
+ "id": "JfWUrEVJTmET"
+ },
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "openai_api_key = userdata.get('OPENAI_API_KEY')\n",
+ "openai = OpenAI(api_key=openai_api_key)"
+ ],
+ "metadata": {
+ "id": "AiUtJ0mjTpVE"
+ },
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "# --- Model setup ---\n",
+ "quant_config = BitsAndBytesConfig(\n",
+ " load_in_4bit=True,\n",
+ " bnb_4bit_use_double_quant=True,\n",
+ " bnb_4bit_compute_dtype=torch.bfloat16,\n",
+ " bnb_4bit_quant_type=\"nf4\"\n",
+ ")\n",
+ "\n",
+ "tokenizer = AutoTokenizer.from_pretrained(LLAMA)\n",
+ "tokenizer.pad_token = tokenizer.eos_token\n",
+ "model = AutoModelForCausalLM.from_pretrained(\n",
+ " LLAMA, device_map=\"auto\", quantization_config=quant_config\n",
+ ")"
+ ],
+ "metadata": {
+ "id": "hMb4dggMW2s5"
+ },
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "XTEW5qAwRN4Y"
+ },
+ "outputs": [],
+ "source": [
+ "# --- Processing function ---\n",
+ "def process_meeting(audio_file):\n",
+ " # Step 1: Transcribe\n",
+ " with open(audio_file, \"rb\") as f:\n",
+ " transcription = openai.audio.transcriptions.create(\n",
+ " model=AUDIO_MODEL, file=f, response_format=\"text\"\n",
+ " )\n",
+ "\n",
+ " # Step 2: Prepare prompt\n",
+ " system_message = (\n",
+ " \"You are an assistant that produces minutes of meetings from transcripts, \"\n",
+ " \"with summary, key discussion points, takeaways and action items with owners, \"\n",
+ " \"in markdown.\"\n",
+ " )\n",
+ " user_prompt = (\n",
+ " f\"Below is an extract transcript of a meeting. Please write minutes in markdown, \"\n",
+ " f\"including a summary with attendees, location and date; discussion points; \"\n",
+ " f\"takeaways; and action items with owners.\\n{transcription}\"\n",
+ " )\n",
+ " messages = [\n",
+ " {\"role\": \"system\", \"content\": system_message},\n",
+ " {\"role\": \"user\", \"content\": user_prompt}\n",
+ " ]\n",
+ "\n",
+ " # Step 3: Run through LLaMA\n",
+ " inputs = tokenizer.apply_chat_template(messages, return_tensors=\"pt\").to(\"cuda\")\n",
+ " streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)\n",
+ " outputs = model.generate(inputs, max_new_tokens=2000)\n",
+ "\n",
+ " response = tokenizer.decode(outputs[0], skip_special_tokens=True)\n",
+ " return response\n",
+ "\n",
+ "# --- Gradio UI ---\n",
+ "with gr.Blocks() as demo:\n",
+ " gr.Markdown(\"## ๐ Meeting Minutes Generator\\nUpload an audio file and get structured meeting minutes.\")\n",
+ " with gr.Row():\n",
+ " audio_in = gr.Audio(type=\"filepath\", label=\"Upload Meeting Audio\")\n",
+ " btn = gr.Button(\"Generate Minutes\")\n",
+ " md_out = gr.Markdown()\n",
+ "\n",
+ " btn.click(fn=process_meeting, inputs=audio_in, outputs=md_out)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "demo.launch()"
+ ],
+ "metadata": {
+ "id": "Yh4-imrmY8MH"
+ },
+ "execution_count": null,
+ "outputs": []
+ }
+ ]
+}
\ No newline at end of file
diff --git a/week3/community-contributions/rwothoromo/week3day5assignment.ipynb b/week3/community-contributions/rwothoromo/week3day5assignment.ipynb
new file mode 100644
index 0000000..a42e611
--- /dev/null
+++ b/week3/community-contributions/rwothoromo/week3day5assignment.ipynb
@@ -0,0 +1,249 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "18b82c6b-10dc-4d94-b8dc-592ff011ce2b",
+ "metadata": {},
+ "source": [
+ "# Meeting minutes creator\n",
+ "\n",
+ "In this colab, we make a meeting minutes program.\n",
+ "\n",
+ "It includes useful code to connect your Google Drive to your colab.\n",
+ "\n",
+ "Upload your own audio to make this work!!\n",
+ "\n",
+ "https://colab.research.google.com/drive/13wR4Blz3Ot_x0GOpflmvvFffm5XU3Kct?usp=sharing\n",
+ "\n",
+ "This should run nicely on a low-cost or free T4 box.\n",
+ "\n",
+ "## **Assignment:**\n",
+ "Put Everything into a nice Gradio UI (similar to last week)\n",
+ "Input file name of audio to process.\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "e9289ba7-200c-43a9-b67a-c5ce826c9537",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# imports\n",
+ "import re, requests, json, tempfile, gradio as gr, torch, os\n",
+ "from bs4 import BeautifulSoup\n",
+ "from IPython.display import Markdown, display, update_display\n",
+ "from google.colab import drive, userdata\n",
+ "from huggingface_hub import login\n",
+ "from openai import OpenAI\n",
+ "from pydub import AudioSegment\n",
+ "from pydub.playback import play\n",
+ "from io import BytesIO\n",
+ "from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer, BitsAndBytesConfig\n",
+ "\n",
+ "# Sign in to HuggingFace Hub\n",
+ "hf_token = userdata.get('HF_TOKEN')\n",
+ "login(hf_token, add_to_git_credential=True)\n",
+ "\n",
+ "# Sign in to OpenAI using Secrets in Colab\n",
+ "openai_api_key = userdata.get('OPENAI_API_KEY')\n",
+ "\n",
+ "# Initialize client\n",
+ "try:\n",
+ " openai = OpenAI(api_key=openai_api_key)\n",
+ "except Exception as e:\n",
+ " openai = None\n",
+ " print(f\"OpenAI client not initialized: {e}\")\n",
+ "\n",
+ "# Constants\n",
+ "AUDIO_MODEL = \"whisper-1\"\n",
+ "LLAMA = \"meta-llama/Meta-Llama-3.1-8B-Instruct\"\n",
+ "\n",
+ "# Google Drive\n",
+ "drive.mount(\"/content/drive\")\n",
+ "\n",
+ "# Local LLM setup (Llama 3.1)\n",
+ "try:\n",
+ " quant_config = BitsAndBytesConfig(\n",
+ " load_in_4bit=True,\n",
+ " bnb_4bit_use_double_quant=True,\n",
+ " bnb_4bit_compute_dtype=torch.bfloat16,\n",
+ " bnb_4bit_quant_type=\"nf4\"\n",
+ " )\n",
+ " tokenizer = AutoTokenizer.from_pretrained(LLAMA)\n",
+ "\n",
+ " # Set the pad token to the end-of-sequence token for generation\n",
+ " tokenizer.pad_token = tokenizer.eos_token\n",
+ "\n",
+ " model = AutoModelForCausalLM.from_pretrained(LLAMA, device_map=\"auto\", quantization_config=quant_config)\n",
+ " # model = AutoModelForCausalLM.from_pretrained(LLAMA, device_map=\"auto\", torch_dtype=torch.bfloat16, quantization_config=quant_config, trust_remote_code=True)\n",
+ "\n",
+ " model.eval() # Set model to evaluation mode\n",
+ "except Exception as e:\n",
+ " # If the local model fails to load, set variables to None\n",
+ " model = None\n",
+ " tokenizer = None\n",
+ " print(f\"Failed to load local model: {e}\")\n",
+ "\n",
+ "# Updated function to handle audio transcription\n",
+ "def transcribe_audio(audio_file):\n",
+ " \"\"\"\n",
+ " Transcribes an audio file to text using OpenAI's Whisper model.\n",
+ " Handles both local file paths and mounted Google Drive file paths.\n",
+ " \"\"\"\n",
+ " if not openai:\n",
+ " return \"OpenAI client not initialized. Please check your API key.\"\n",
+ "\n",
+ " if audio_file is None:\n",
+ " return \"No audio input provided.\"\n",
+ "\n",
+ " # Check if the file exists before attempting to open it\n",
+ " # Construct the expected path in Google Drive\n",
+ " # If the input is from the microphone, it will be a temporary file path\n",
+ " # If the input is from the textbox, it could be a full path or just a filename\n",
+ " if audio_file.startswith(\"/content/drive/MyDrive/llms/\"):\n",
+ " file_path_to_open = audio_file\n",
+ " else:\n",
+ " # Assume it's either a local path or just a filename in MyDrive/llms\n",
+ " # We'll prioritize checking MyDrive/llms first\n",
+ " gdrive_path_attempt = os.path.join(\"/content/drive/MyDrive/llms\", os.path.basename(audio_file))\n",
+ " if os.path.exists(gdrive_path_attempt):\n",
+ " file_path_to_open = gdrive_path_attempt\n",
+ " elif os.path.exists(audio_file):\n",
+ " file_path_to_open = audio_file\n",
+ " else:\n",
+ " return f\"File not found: {audio_file}. Please ensure the file exists in your Google Drive at /content/drive/MyDrive/llms/ or is a valid local path.\"\n",
+ "\n",
+ "\n",
+ " if not os.path.exists(file_path_to_open):\n",
+ " return f\"File not found: {file_path_to_open}. Please ensure the file exists.\"\n",
+ "\n",
+ "\n",
+ " try:\n",
+ " with open(file_path_to_open, \"rb\") as f:\n",
+ " transcription = openai.audio.transcriptions.create(\n",
+ " model=AUDIO_MODEL,\n",
+ " file=f,\n",
+ " response_format=\"text\"\n",
+ " )\n",
+ " return transcription\n",
+ " except Exception as e:\n",
+ " return f\"An error occurred during transcription: {e}\"\n",
+ "\n",
+ "def generate_minutes(transcription):\n",
+ " \"\"\"\n",
+ " Generates meeting minutes from a transcript using a local Llama model.\n",
+ " Format the input, generate a response, and return the complete text string.\n",
+ " \"\"\"\n",
+ " # Check if the local model and tokenizer were successfully loaded\n",
+ " if not model or not tokenizer:\n",
+ " return \"Local Llama model not loaded. Check model paths and hardware compatibility.\"\n",
+ "\n",
+ " system_message = \"You are an assistant that produces minutes of meetings from transcripts, with summary, key discussion points, takeaways and action items with owners, in markdown.\"\n",
+ " user_prompt = f\"Below is an extract transcript of an Audio recording. Please write minutes in markdown, including a summary with attendees, location and date; discussion points; takeaways; and action items with owners.\\n{transcription}\"\n",
+ "\n",
+ " messages = [\n",
+ " {\"role\": \"system\", \"content\": system_message},\n",
+ " {\"role\": \"user\", \"content\": user_prompt}\n",
+ " ]\n",
+ "\n",
+ " try:\n",
+ " # Apply the chat template to format the messages for the model\n",
+ " inputs = tokenizer.apply_chat_template(messages, return_tensors=\"pt\").to(\"cuda\")\n",
+ "\n",
+ " # Generate the output. max_new_tokens controls the length of the generated text.\n",
+ " outputs = model.generate(inputs, max_new_tokens=2000)\n",
+ "\n",
+ " # Decode only the new tokens generated by the model (not the input tokens) to a human-readable string\n",
+ " response_text = tokenizer.decode(outputs[0], skip_special_tokens=True)\n",
+ "\n",
+ " # The model's response will contain the full conversation.\n",
+ " # Extract only the assistant's part!\n",
+ " assistant_start = \"<|eot_id|><|start_header_id|>assistant<|end_header_id|>\\n\\n\"\n",
+ " if assistant_start in response_text:\n",
+ " response_text = response_text.split(assistant_start)[-1]\n",
+ "\n",
+ " return response_text\n",
+ "\n",
+ " except Exception as e:\n",
+ " return f\"An error occurred during local model generation: {e}\"\n",
+ "\n",
+ "# Gradio UI components\n",
+ "with gr.Blocks() as ui:\n",
+ " gr.Markdown(\"# Meeting Minutes Generator\")\n",
+ " with gr.Row():\n",
+ " chatbot = gr.Chatbot(height=500, label=\"AI Assistant\")\n",
+ " with gr.Row():\n",
+ " entry = gr.Textbox(label=\"Provide the filename or path of the audio file to transcribe:\", scale=4)\n",
+ " submit_btn = gr.Button(\"Generate Minutes\", scale=1)\n",
+ " with gr.Row():\n",
+ " audio_input = gr.Audio(sources=[\"microphone\"], type=\"filepath\", label=\"Or speak to our AI Assistant to transcribe\", scale=4)\n",
+ " submit_audio_btn = gr.Button(\"Transcribe Audio\", scale=1)\n",
+ "\n",
+ " with gr.Row():\n",
+ " clear = gr.Button(\"Clear\")\n",
+ "\n",
+ " def process_file_and_generate(file_path, history):\n",
+ " transcribed_text = transcribe_audio(file_path)\n",
+ " minutes = generate_minutes(transcribed_text)\n",
+ " new_history = history + [[f\"Transcription of '{os.path.basename(file_path)}':\\n{transcribed_text}\", minutes]]\n",
+ " return new_history\n",
+ "\n",
+ " def process_audio_and_generate(audio_file, history):\n",
+ " transcribed_text = transcribe_audio(audio_file)\n",
+ " minutes = generate_minutes(transcribed_text)\n",
+ " new_history = history + [[f\"Transcription of your recording:\\n{transcribed_text}\", minutes]]\n",
+ " return new_history\n",
+ "\n",
+ "\n",
+ " submit_btn.click(\n",
+ " process_file_and_generate,\n",
+ " inputs=[entry, chatbot],\n",
+ " outputs=[chatbot],\n",
+ " queue=False\n",
+ " )\n",
+ "\n",
+ " submit_audio_btn.click(\n",
+ " process_audio_and_generate,\n",
+ " inputs=[audio_input, chatbot],\n",
+ " outputs=[chatbot],\n",
+ " queue=False\n",
+ " )\n",
+ "\n",
+ " clear.click(lambda: None, inputs=None, outputs=[chatbot], queue=False)\n",
+ "\n",
+ "ui.launch(inbrowser=True, debug=True)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "cd2020d3",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.7"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/week3/community-contributions/rwothoromo/week3day5task.ipynb b/week3/community-contributions/rwothoromo/week3day5task.ipynb
new file mode 100644
index 0000000..7da7365
--- /dev/null
+++ b/week3/community-contributions/rwothoromo/week3day5task.ipynb
@@ -0,0 +1,226 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "18b82c6b-10dc-4d94-b8dc-592ff011ce2b",
+ "metadata": {},
+ "source": [
+ "# Meeting minutes creator\n",
+ "\n",
+ "https://colab.research.google.com/drive/13wR4Blz3Ot_x0GOpflmvvFffm5XU3Kct?usp=sharing\n",
+ "\n",
+ "## **Week 3 task.**\n",
+ "Create your own tool that generates synthetic data/test data. Input the type of dataset or products or job postings, etc. and let the tool dream up various data samples.\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "e9289ba7-200c-43a9-b67a-c5ce826c9537",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# imports\n",
+ "import gradio as gr, requests, json, time, os, torch\n",
+ "from transformers import pipeline, set_seed\n",
+ "from functools import partial\n",
+ "from openai import OpenAI, APIError, AuthenticationError\n",
+ "from google.colab import drive, userdata\n",
+ "from huggingface_hub import login\n",
+ "from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig\n",
+ "\n",
+ "# Sample user_prompt = \"a list of student profiles with full name, email, course studied, and GPA for each of 6 semesters, and a CGPA for the 6 semesters\"\n",
+ "\n",
+ "# Sign in to HuggingFace Hub\n",
+ "hf_token = userdata.get('HF_TOKEN')\n",
+ "login(hf_token, add_to_git_credential=True)\n",
+ "\n",
+ "# Sign in to OpenAI using Secrets in Colab\n",
+ "openai_api_key = userdata.get('OPENAI_API_KEY')\n",
+ "\n",
+ "# Initialize client\n",
+ "try:\n",
+ " openai = OpenAI(api_key=openai_api_key)\n",
+ "except Exception as e:\n",
+ " openai = None\n",
+ " print(f\"OpenAI client not initialized: {e}\")\n",
+ "\n",
+ "# Constants\n",
+ "GPT_MODEL = \"gpt-3.5-turbo\"\n",
+ "\n",
+ "# Local Llama Model Setup\n",
+ "# Loads a Llama model from Hugging Face for local inference.\n",
+ "# Note: This requires a powerful GPU and specific library installations (e.g., bitsandbytes, accelerate).\n",
+ "LLAMA_MODEL = \"meta-llama/Meta-Llama-3.1-8B-Instruct\"\n",
+ "\n",
+ "try:\n",
+ " # Set up quantization config for efficient memory usage.\n",
+ " # This loads the model in 4-bit precision, significantly reducing VRAM requirements.\n",
+ " quant_config = BitsAndBytesConfig(\n",
+ " load_in_4bit=True,\n",
+ " bnb_4bit_use_double_quant=True,\n",
+ " bnb_4bit_compute_dtype=torch.bfloat16,\n",
+ " bnb_4bit_quant_type=\"nf4\"\n",
+ " )\n",
+ "\n",
+ " # Load the tokenizer and model.\n",
+ " tokenizer = AutoTokenizer.from_pretrained(LLAMA_MODEL)\n",
+ " model = AutoModelForCausalLM.from_pretrained(\n",
+ " LLAMA_MODEL, \n",
+ " device_map=\"auto\", \n",
+ " quantization_config=quant_config,\n",
+ " trust_remote_code=True\n",
+ " )\n",
+ " \n",
+ " # Set the model to evaluation mode for inference.\n",
+ " model.eval()\n",
+ "\n",
+ "except Exception as e:\n",
+ " model = None\n",
+ " tokenizer = None\n",
+ " print(f\"Failed to load local Llama model: {e}\")\n",
+ "\n",
+ "\n",
+ "def generate_with_llama(user_prompt: str, num_samples: int = 5):\n",
+ " \"\"\"\n",
+ " Generates synthetic data using a local Llama model.\n",
+ " Return a JSON string.\n",
+ " \"\"\"\n",
+ " if not model or not tokenizer:\n",
+ " return json.dumps({\"error\": \"Llama model not loaded. Check model paths and hardware compatibility.\"}, indent=2)\n",
+ "\n",
+ " # Llama 3.1 uses a specific chat template for conversation formatting.\n",
+ " messages = [\n",
+ " {\"role\": \"system\", \"content\": f\"You are a data generation assistant. Generate a JSON array of exactly {num_samples} objects based on the user's request. The output must be valid JSON only, without any other text or formatting.\"},\n",
+ " {\"role\": \"user\", \"content\": user_prompt}\n",
+ " ]\n",
+ "\n",
+ " try:\n",
+ " inputs = tokenizer.apply_chat_template(messages, return_tensors=\"pt\").to(\"cuda\")\n",
+ "\n",
+ " outputs = model.generate(inputs, max_new_tokens=2000, do_sample=True, top_p=0.9, temperature=0.7)\n",
+ "\n",
+ " # Decode the generated tokens.\n",
+ " response_text = tokenizer.decode(outputs[0], skip_special_tokens=True)\n",
+ "\n",
+ " # Extract only the assistant's part from the complete chat history.\n",
+ " assistant_start = \"<|eot_id|><|start_header_id|>assistant<|end_header_id|>\\n\\n\"\n",
+ " if assistant_start in response_text:\n",
+ " response_text = response_text.split(assistant_start)[-1]\n",
+ " \n",
+ " # Parse the JSON and return it.\n",
+ " parsed_json = json.loads(response_text)\n",
+ " return json.dumps(parsed_json, indent=2)\n",
+ "\n",
+ " except Exception as e:\n",
+ " return json.dumps({\"error\": f\"An error occurred during local model generation: {e}\"}, indent=2)\n",
+ "\n",
+ "\n",
+ "\n",
+ "def generate_with_gpt(user_prompt: str, num_samples: int = 5):\n",
+ " \"\"\"\n",
+ " Generates synthetic data using OpenAI's GPT.\n",
+ " Return a JSON string.\n",
+ " \"\"\"\n",
+ " if not openai:\n",
+ " return json.dumps({\"error\": \"OpenAI client not initialized. Please check your API key.\"}, indent=2)\n",
+ "\n",
+ " try:\n",
+ " response = openai.chat.completions.create(\n",
+ " model=GPT_MODEL,\n",
+ " messages=[\n",
+ " {\"role\": \"system\", \"content\": f\"You are a data generation assistant. Generate a JSON array of exactly {num_samples} objects based on the user's request. The output must be valid JSON only, without any other text or formatting.\"},\n",
+ " {\"role\": \"user\", \"content\": user_prompt}\n",
+ " ],\n",
+ " response_format={\"type\": \"json_object\"}\n",
+ " )\n",
+ " \n",
+ " json_text = response.choices[0].message.content\n",
+ " return json_text\n",
+ " except APIError as e:\n",
+ " return json.dumps({\"error\": f\"Error from OpenAI API: {e.body}\"}, indent=2)\n",
+ " except Exception as e:\n",
+ " return json.dumps({\"error\": f\"An unexpected error occurred: {e}\"}, indent=2)\n",
+ "\n",
+ "\n",
+ "def generate_data(user_prompt, model_choice):\n",
+ " \"\"\"\n",
+ " Wrapper function that calls the appropriate generation function based on model choice.\n",
+ " \"\"\"\n",
+ " if not user_prompt:\n",
+ " return json.dumps({\"error\": \"Please provide a description for the data.\"}, indent=2)\n",
+ "\n",
+ " if model_choice == f\"Hugging Face ({LLAMA_MODEL})\":\n",
+ " return generate_with_llama(user_prompt)\n",
+ " elif model_choice == f\"OpenAI ({GPT_MODEL})\":\n",
+ " return generate_with_gpt(user_prompt)\n",
+ " else:\n",
+ " return json.dumps({\"error\": \"Invalid model choice.\"}, indent=2)\n",
+ "\n",
+ "# Gradio UI\n",
+ "with gr.Blocks(theme=gr.themes.Soft(), title=\"Synthetic Data Generator\") as ui:\n",
+ " gr.Markdown(\"# Synthetic Data Generator\")\n",
+ " gr.Markdown(\"Describe the type of data you need, select a model, and click 'Generate'.\")\n",
+ "\n",
+ " with gr.Row():\n",
+ " with gr.Column(scale=3):\n",
+ " data_prompt = gr.Textbox(\n",
+ " lines=5,\n",
+ " label=\"Data Prompt\",\n",
+ " placeholder=\"e.g., a list of customer profiles with name, email, and a favorite product\"\n",
+ " )\n",
+ " \n",
+ " with gr.Column(scale=1):\n",
+ " model_choice = gr.Radio(\n",
+ " [f\"Hugging Face ({LLAMA_MODEL})\", f\"OpenAI ({GPT_MODEL})\"],\n",
+ " label=\"Choose a Model\",\n",
+ " value=f\"Hugging Face ({LLAMA_MODEL})\"\n",
+ " )\n",
+ " \n",
+ " generate_btn = gr.Button(\"Generate Data\")\n",
+ " \n",
+ " with gr.Row():\n",
+ " output_json = gr.JSON(label=\"Generated Data\")\n",
+ " \n",
+ " # Click trigger\n",
+ " generate_btn.click(\n",
+ " fn=generate_data,\n",
+ " inputs=[data_prompt, model_choice],\n",
+ " outputs=output_json\n",
+ " )\n",
+ "\n",
+ "ui.launch(inbrowser=True, debug=True)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "cd2020d3",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.7"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/week3/community-contributions/week3_exercise_by_abrar.ipynb b/week3/community-contributions/week3_exercise_by_abrar.ipynb
new file mode 100644
index 0000000..6339e85
--- /dev/null
+++ b/week3/community-contributions/week3_exercise_by_abrar.ipynb
@@ -0,0 +1,49 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "61f56afc-bc15-46a4-8eb1-d940c332cf52",
+ "metadata": {},
+ "source": [
+ "# Synthetic Data Studio\n",
+ "\n",
+ "It includes useful code to connect your Google Drive to your Colab and Hugging Face Model. \n",
+ "To use the Huggingface Model, you first need to create an account on HuggingFace and then send a request to the required model. \n",
+ "\n",
+ "\n",
+ "https://colab.research.google.com/drive/1W3JPe0F6su8sNpfdlp2uqXqWZChevHYa?usp=sharing\n",
+ "\n",
+ "This should run nicely on a low-cost or free T4 box."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "e9289ba7-200c-43a9-b67a-c5ce826c9537",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.13"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/week4/community-contributions/unit-test-generator-v3.ipynb b/week4/community-contributions/unit-test-generator-v3.ipynb
new file mode 100644
index 0000000..c47b6d0
--- /dev/null
+++ b/week4/community-contributions/unit-test-generator-v3.ipynb
@@ -0,0 +1,222 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "56957b7f-e289-4999-8a40-ce1a8378d8cd",
+ "metadata": {},
+ "source": [
+ "# Unit Test Generator\n",
+ "\n",
+ "The requirement: use a Frontier model to generate fast and repeatable unit tests for Python code.\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "3ef67ef0-1bda-45bb-abca-f003217602d4",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# imports\n",
+ "\n",
+ "import os\n",
+ "import io\n",
+ "import sys\n",
+ "import ast\n",
+ "import unittest, contextlib\n",
+ "from dotenv import load_dotenv\n",
+ "from openai import OpenAI\n",
+ "import google.generativeai\n",
+ "import anthropic\n",
+ "from IPython.display import Markdown, display, update_display\n",
+ "import gradio as gr\n",
+ "import subprocess\n",
+ "\n",
+ "# environment\n",
+ "\n",
+ "load_dotenv(override=True)\n",
+ "os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY', 'your-key-if-not-using-env')\n",
+ "os.environ['ANTHROPIC_API_KEY'] = os.getenv('ANTHROPIC_API_KEY', 'your-key-if-not-using-env')\n",
+ "\n",
+ "openai = OpenAI()\n",
+ "claude = anthropic.Anthropic()\n",
+ "OPENAI_MODEL = \"gpt-4o\"\n",
+ "CLAUDE_MODEL = \"claude-3-7-sonnet-20250219\"\n",
+ "\n",
+ "system_message = \"You are an assistant that implements unit testing for Python code. \"\n",
+ "system_message += \"Respond only with Python code; use comments sparingly and do not provide any explanation other than occasional comments. \"\n",
+ "\n",
+ "def remove_main_block_from_code(code):\n",
+ " \"\"\"\n",
+ " Remove top-level `if __name__ == \"__main__\":` blocks from code.\n",
+ " \"\"\"\n",
+ " try:\n",
+ " tree = ast.parse(code)\n",
+ "\n",
+ " class RemoveMain(ast.NodeTransformer):\n",
+ " def visit_If(self, node):\n",
+ " # check if this is: if __name__ == \"__main__\":\n",
+ " test = node.test\n",
+ " if (\n",
+ " isinstance(test, ast.Compare) and\n",
+ " isinstance(test.left, ast.Name) and\n",
+ " test.left.id == \"__name__\" and\n",
+ " len(test.ops) == 1 and isinstance(test.ops[0], ast.Eq) and\n",
+ " len(test.comparators) == 1 and\n",
+ " isinstance(test.comparators[0], ast.Constant) and\n",
+ " test.comparators[0].value == \"__main__\"\n",
+ " ):\n",
+ " return None # remove this node entirely\n",
+ " return node\n",
+ "\n",
+ " tree = RemoveMain().visit(tree)\n",
+ " ast.fix_missing_locations(tree)\n",
+ " return ast.unparse(tree) # get back code as string\n",
+ " except Exception as e:\n",
+ " print(\"Error removing __main__ block:\", e)\n",
+ " return code # fallback: return original code if AST fails\n",
+ "\n",
+ "def user_prompt_for(python_file):\n",
+ " if isinstance(python_file, dict): # from Gradio\n",
+ " file_path = python_file[\"name\"]\n",
+ " elif hasattr(python_file, \"name\"): # tempfile\n",
+ " file_path = python_file.name\n",
+ " else: # string path\n",
+ " file_path = python_file\n",
+ "\n",
+ " with open(file_path, \"r\", encoding=\"utf-8\") as f:\n",
+ " python_code = f.read()\n",
+ "\n",
+ " # strip __main__ blocks\n",
+ " python_code = remove_main_block_from_code(python_code)\n",
+ "\n",
+ " user_prompt = \"Write unit tests for this Python code. \"\n",
+ " user_prompt += \"Respond only with Python code; do not explain your work other than a few comments. \"\n",
+ " user_prompt += \"The unit testing is done in Jupyterlab, so you should use packages that play nicely with the Jupyter kernel. \\n\\n\"\n",
+ " user_prompt += \"Include the original Python code in your generated output so that I can run all in one fell swoop.\\n\\n\"\n",
+ " user_prompt += python_code\n",
+ "\n",
+ " return user_prompt\n",
+ "\n",
+ "def messages_for(python_file):\n",
+ " return [\n",
+ " {\"role\": \"system\", \"content\": system_message},\n",
+ " {\"role\": \"user\", \"content\": user_prompt_for(python_file)}\n",
+ " ]\n",
+ "\t\n",
+ "def stream_gpt(python_file): \n",
+ " stream = openai.chat.completions.create(model=OPENAI_MODEL, messages=messages_for(python_file), stream=True)\n",
+ " reply = \"\"\n",
+ " for chunk in stream:\n",
+ " fragment = chunk.choices[0].delta.content or \"\"\n",
+ " reply += fragment\n",
+ " yield reply.replace('```python\\n','').replace('```','')\n",
+ "\t\t\n",
+ "def stream_claude(python_file):\n",
+ " result = claude.messages.stream(\n",
+ " model=CLAUDE_MODEL,\n",
+ " max_tokens=2000,\n",
+ " system=system_message,\n",
+ " messages=[{\"role\": \"user\", \"content\": user_prompt_for(python_file)}],\n",
+ " )\n",
+ " reply = \"\"\n",
+ " with result as stream:\n",
+ " for text in stream.text_stream:\n",
+ " reply += text\n",
+ " yield reply.replace('```python\\n','').replace('```','')\n",
+ "\t\t\t\n",
+ "def unit_test(python_file, model):\n",
+ " if model==\"GPT\":\n",
+ " result = stream_gpt(python_file)\n",
+ " elif model==\"Claude\":\n",
+ " result = stream_claude(python_file)\n",
+ " else:\n",
+ " raise ValueError(\"Unknown model\")\n",
+ " for stream_so_far in result:\n",
+ " yield stream_so_far\n",
+ "\n",
+ "def execute_python(code):\n",
+ " buffer = io.StringIO()\n",
+ " try:\n",
+ " with contextlib.redirect_stdout(buffer), contextlib.redirect_stderr(buffer):\n",
+ " # execute code in isolated namespace\n",
+ " ns = {}\n",
+ " exec(code, ns)\n",
+ "\n",
+ " # manually collect TestCase subclasses\n",
+ " test_cases = [\n",
+ " obj for obj in ns.values()\n",
+ " if isinstance(obj, type) and issubclass(obj, unittest.TestCase)\n",
+ " ]\n",
+ " if test_cases:\n",
+ " suite = unittest.TestSuite()\n",
+ " for case in test_cases:\n",
+ " suite.addTests(unittest.defaultTestLoader.loadTestsFromTestCase(case))\n",
+ " runner = unittest.TextTestRunner(stream=buffer, verbosity=2)\n",
+ " runner.run(suite)\n",
+ " except Exception as e:\n",
+ " print(f\"Error during execution: {e}\", file=buffer)\n",
+ "\n",
+ " return buffer.getvalue()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "670b8b78-0b13-488a-9533-59802b2fe101",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# --- Gradio UI ---\n",
+ "with gr.Blocks() as ui:\n",
+ " gr.Markdown(\"## Unit Test Generator\\nUpload a Python file and get structured unit testing.\")\n",
+ " with gr.Row(): # Row 1\n",
+ " orig_code = gr.File(label=\"Upload your Python file\", file_types=[\".py\"])\n",
+ " test_code = gr.Textbox(label=\"Unit test code:\", lines=10)\n",
+ " with gr.Row(): # Row 2\n",
+ " model = gr.Dropdown([\"GPT\", \"Claude\"], label=\"Select model\", value=\"GPT\")\n",
+ " with gr.Row(): # Row 3\n",
+ " generate = gr.Button(\"Generate unit test code\")\n",
+ " with gr.Row(): # Row 4\n",
+ " unit_run = gr.Button(\"Run Python unit test\")\n",
+ " with gr.Row(): # Row 5\n",
+ " test_out = gr.Textbox(label=\"Unit test result:\", lines=10)\n",
+ "\n",
+ " generate.click(unit_test, inputs=[orig_code, model], outputs=[test_code])\n",
+ "\n",
+ " unit_run.click(execute_python, inputs=[test_code], outputs=[test_out])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "609bbdc3-1e1c-4538-91dd-7d13134ab381",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "ui.launch(inbrowser=True)"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.13"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/week5/community-contributions/RAG-based-academic-assistant-v3.ipynb b/week5/community-contributions/RAG-based-academic-assistant-v3.ipynb
new file mode 100644
index 0000000..7899ff8
--- /dev/null
+++ b/week5/community-contributions/RAG-based-academic-assistant-v3.ipynb
@@ -0,0 +1,409 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "97a93fee-6bbd-477b-aba8-577d318a9f9d",
+ "metadata": {},
+ "source": [
+ "# AI-Powered Academic Knowledge Assistant\n",
+ "AI-powered RAG (Retrieval-Augmented Generation) system that transforms document collections into queryable knowledge bases using OpenAI embeddings and vector search. Features configurable chunking, file size limits, and retrieval parameters with a Gradio interface for processing PDFs and generating contextually-aware responses via LangChain and ChromaDB."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "3589eee0-ce34-42f4-b538-b43f3b0d9f6f",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import os\n",
+ "import glob\n",
+ "from dotenv import load_dotenv\n",
+ "import gradio as gr\n",
+ "import shutil\n",
+ "import tiktoken\n",
+ "import time\n",
+ "import uuid\n",
+ "from typing import List, Tuple, Optional\n",
+ "\n",
+ "# imports for langchain and Chroma\n",
+ "from langchain.document_loaders import DirectoryLoader, TextLoader, PyPDFLoader\n",
+ "from langchain.text_splitter import CharacterTextSplitter\n",
+ "from langchain.schema import Document\n",
+ "from langchain_openai import OpenAIEmbeddings, ChatOpenAI\n",
+ "from langchain_chroma import Chroma\n",
+ "from langchain.memory import ConversationBufferMemory\n",
+ "from langchain.chains import ConversationalRetrievalChain\n",
+ "from langchain.embeddings import HuggingFaceEmbeddings\n",
+ "\n",
+ "from langchain_community.document_loaders import PyPDFLoader, TextLoader\n",
+ "from langchain.docstore.document import Document\n",
+ "\n",
+ "# Load environment variables\n",
+ "load_dotenv(override=True)\n",
+ "os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY', 'your-key-if-not-using-env')\n",
+ "\n",
+ "# Global variables to store the current setup\n",
+ "current_vectorstore = None\n",
+ "current_conversation_chain = None\n",
+ "processing_status = \"\"\n",
+ "\n",
+ "def count_tokens(text: str, model: str = \"gpt-4o-mini\") -> int:\n",
+ " \"\"\"Count tokens in text using tiktoken\"\"\"\n",
+ " try:\n",
+ " encoding = tiktoken.encoding_for_model(model)\n",
+ " return len(encoding.encode(text))\n",
+ " except:\n",
+ " # Fallback estimation: roughly 4 characters per token\n",
+ " return len(text) // 4\n",
+ "\n",
+ "def filter_chunks_by_tokens(chunks: List[Document], max_total_tokens: int = 250000) -> List[Document]:\n",
+ " \"\"\"Filter chunks to stay within token limits\"\"\"\n",
+ " filtered_chunks = []\n",
+ " total_tokens = 0\n",
+ " \n",
+ " for chunk in chunks:\n",
+ " chunk_tokens = count_tokens(chunk.page_content)\n",
+ " \n",
+ " # Skip individual chunks that are too large (shouldn't happen with proper splitting)\n",
+ " if chunk_tokens > 8000: # Individual chunk limit\n",
+ " continue\n",
+ " \n",
+ " if total_tokens + chunk_tokens <= max_total_tokens:\n",
+ " filtered_chunks.append(chunk)\n",
+ " total_tokens += chunk_tokens\n",
+ " else:\n",
+ " break\n",
+ " \n",
+ " return filtered_chunks\n",
+ "\n",
+ "def add_metadata(doc, doc_type, file_path):\n",
+ " \"\"\"Add metadata including document type and file information\"\"\"\n",
+ " doc.metadata[\"doc_type\"] = doc_type\n",
+ " doc.metadata[\"file_path\"] = file_path\n",
+ " doc.metadata[\"file_name\"] = os.path.basename(file_path)\n",
+ " return doc\n",
+ "\n",
+ "def check_file_size(file_path, max_size_bytes):\n",
+ " \"\"\"Check if file size is within the limit\"\"\"\n",
+ " try:\n",
+ " file_size = os.path.getsize(file_path)\n",
+ " return file_size <= max_size_bytes, file_size\n",
+ " except OSError:\n",
+ " return False, 0\n",
+ "\n",
+ "def load_pdfs_with_size_limit(folder_path, doc_type, max_size_bytes):\n",
+ " \"\"\"Load PDF files from a folder with size restrictions\"\"\"\n",
+ " pdf_files = glob.glob(os.path.join(folder_path, \"**/*.pdf\"), recursive=True)\n",
+ " loaded_docs = []\n",
+ " skipped_files = []\n",
+ " \n",
+ " for pdf_file in pdf_files:\n",
+ " is_valid_size, file_size = check_file_size(pdf_file, max_size_bytes)\n",
+ " \n",
+ " if is_valid_size:\n",
+ " try:\n",
+ " loader = PyPDFLoader(pdf_file)\n",
+ " docs = loader.load()\n",
+ " docs_with_metadata = [add_metadata(doc, doc_type, pdf_file) for doc in docs]\n",
+ " loaded_docs.extend(docs_with_metadata)\n",
+ " except Exception as e:\n",
+ " skipped_files.append((pdf_file, f\"Loading error: {str(e)}\"))\n",
+ " else:\n",
+ " file_size_mb = file_size / 1024 / 1024\n",
+ " skipped_files.append((pdf_file, f\"File too large: {file_size_mb:.2f} MB\"))\n",
+ " \n",
+ " return loaded_docs, skipped_files\n",
+ "\n",
+ "def process_documents(knowledge_base_dir: str, max_file_size_mb: float, chunk_size: int, chunk_overlap: int) -> Tuple[str, str]:\n",
+ " \"\"\"Process documents and create vector store\"\"\"\n",
+ " global current_vectorstore, current_conversation_chain\n",
+ " \n",
+ " try:\n",
+ " # Validate directory\n",
+ " if not knowledge_base_dir or not knowledge_base_dir.strip():\n",
+ " return \"โ Error: Please enter a directory path!\", \"\"\n",
+ " \n",
+ " directory_path = knowledge_base_dir.strip()\n",
+ " \n",
+ " if not os.path.exists(directory_path):\n",
+ " return \"โ Error: Directory does not exist! Please check the path.\", \"\"\n",
+ " \n",
+ " # Configuration\n",
+ " MAX_FILE_SIZE_BYTES = int(max_file_size_mb * 1024 * 1024)\n",
+ " \n",
+ " # Find folders\n",
+ " if directory_path.endswith('*'):\n",
+ " folders = glob.glob(directory_path)\n",
+ " else:\n",
+ " folders = glob.glob(os.path.join(directory_path, \"*\"))\n",
+ " \n",
+ " if not folders:\n",
+ " return \"โ Error: No folders found in the specified directory!\", \"\"\n",
+ " \n",
+ " # Process documents\n",
+ " documents = []\n",
+ " all_skipped_files = []\n",
+ " status_lines = []\n",
+ " \n",
+ " status_lines.append(f\"๐ Processing folders with {max_file_size_mb} MB file size limit...\")\n",
+ " status_lines.append(\"-\" * 60)\n",
+ " \n",
+ " for folder in folders:\n",
+ " if os.path.isdir(folder):\n",
+ " doc_type = os.path.basename(folder)\n",
+ " status_lines.append(f\"๐ Processing folder: {doc_type}\")\n",
+ " \n",
+ " folder_docs, skipped_files = load_pdfs_with_size_limit(folder, doc_type, MAX_FILE_SIZE_BYTES)\n",
+ " documents.extend(folder_docs)\n",
+ " all_skipped_files.extend(skipped_files)\n",
+ " \n",
+ " if folder_docs:\n",
+ " status_lines.append(f\" โ
Loaded {len(folder_docs)} document pages\")\n",
+ " if skipped_files:\n",
+ " status_lines.append(f\" โ ๏ธ Skipped {len(skipped_files)} files\")\n",
+ " \n",
+ " if not documents:\n",
+ " error_msg = \"โ No PDF documents were loaded successfully.\"\n",
+ " if all_skipped_files:\n",
+ " error_msg += f\"\\n\\nAll {len(all_skipped_files)} files were skipped:\"\n",
+ " for file_path, reason in all_skipped_files[:10]: # Show first 10\n",
+ " error_msg += f\"\\n โข {os.path.basename(file_path)}: {reason}\"\n",
+ " if len(all_skipped_files) > 10:\n",
+ " error_msg += f\"\\n ... and {len(all_skipped_files) - 10} more\"\n",
+ " return error_msg, \"\"\n",
+ " \n",
+ " # Text splitting\n",
+ " status_lines.append(\"\\n\" + \"=\"*40)\n",
+ " status_lines.append(\"โ๏ธ TEXT SPLITTING\")\n",
+ " status_lines.append(\"=\"*40)\n",
+ " \n",
+ " text_splitter = CharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)\n",
+ " chunks = text_splitter.split_documents(documents)\n",
+ " \n",
+ " # Filter chunks by token count to prevent API errors\n",
+ " status_lines.append(\"๐ข Checking token limits...\")\n",
+ " original_chunk_count = len(chunks)\n",
+ " chunks = filter_chunks_by_tokens(chunks, max_total_tokens=250000)\n",
+ " \n",
+ " if len(chunks) < original_chunk_count:\n",
+ " status_lines.append(f\"โ ๏ธ Filtered from {original_chunk_count} to {len(chunks)} chunks to stay within token limits\")\n",
+ " \n",
+ " # Create vectorstore\n",
+ " status_lines.append(\"๐งฎ Creating vector embeddings...\")\n",
+ " embeddings = OpenAIEmbeddings()\n",
+ " \n",
+ " # Use a temporary database name\n",
+ " db_name = \"temp_vector_db\"\n",
+ " \n",
+ " # Delete if already exists\n",
+ " if os.path.exists(db_name):\n",
+ " shutil.rmtree(db_name)\n",
+ " \n",
+ " # Create vectorstore\n",
+ " vectorstore = Chroma.from_documents(\n",
+ " documents=chunks, \n",
+ " embedding=embeddings, \n",
+ " persist_directory=db_name\n",
+ " )\n",
+ " \n",
+ " # Update global variables\n",
+ " current_vectorstore = vectorstore\n",
+ " \n",
+ " # Create conversation chain\n",
+ " llm = ChatOpenAI(temperature=0.7, model_name=\"gpt-4o-mini\")\n",
+ " memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)\n",
+ " retriever = vectorstore.as_retriever(search_kwargs={\"k\": 25})\n",
+ " current_conversation_chain = ConversationalRetrievalChain.from_llm(\n",
+ " llm=llm, \n",
+ " retriever=retriever, \n",
+ " memory=memory\n",
+ " )\n",
+ " \n",
+ " # Summary statistics\n",
+ " status_lines.append(\"\\n\" + \"=\"*40)\n",
+ " status_lines.append(\"๐ SUMMARY\")\n",
+ " status_lines.append(\"=\"*40)\n",
+ " status_lines.append(f\"โ
Total PDFs processed: {len(set(doc.metadata['file_path'] for doc in documents))}\")\n",
+ " status_lines.append(f\"๐ Total document pages: {len(documents)}\")\n",
+ " status_lines.append(f\"๐งฉ Total text chunks: {len(chunks)}\")\n",
+ " status_lines.append(f\"๐ Document types: {', '.join(set(doc.metadata['doc_type'] for doc in documents))}\")\n",
+ " status_lines.append(f\"๐๏ธ Vector store size: {vectorstore._collection.count()} embeddings\")\n",
+ " \n",
+ " if all_skipped_files:\n",
+ " status_lines.append(f\"\\nโ ๏ธ Skipped files: {len(all_skipped_files)}\")\n",
+ " for file_path, reason in all_skipped_files[:5]: # Show first 5\n",
+ " status_lines.append(f\" โข {os.path.basename(file_path)}: {reason}\")\n",
+ " if len(all_skipped_files) > 5:\n",
+ " status_lines.append(f\" ... and {len(all_skipped_files) - 5} more\")\n",
+ " \n",
+ " success_msg = \"โ
Knowledge base successfully created and ready for questions!\"\n",
+ " detailed_status = \"\\n\".join(status_lines)\n",
+ " \n",
+ " return success_msg, detailed_status\n",
+ " \n",
+ " except Exception as e:\n",
+ " error_msg = f\"โ Error processing documents: {str(e)}\"\n",
+ " return error_msg, \"\"\n",
+ "\n",
+ "def chat_with_documents(message, history, num_chunks):\n",
+ " \"\"\"Chat with the processed documents\"\"\"\n",
+ " global current_conversation_chain, current_vectorstore\n",
+ " \n",
+ " if current_conversation_chain is None:\n",
+ " return \"โ Please process documents first before asking questions!\"\n",
+ " \n",
+ " try:\n",
+ " # Update retriever with new chunk count\n",
+ " if current_vectorstore is not None:\n",
+ " retriever = current_vectorstore.as_retriever(search_kwargs={\"k\": num_chunks})\n",
+ " current_conversation_chain.retriever = retriever\n",
+ " \n",
+ " result = current_conversation_chain.invoke({\"question\": message})\n",
+ " return result[\"answer\"]\n",
+ " \n",
+ " except Exception as e:\n",
+ " return f\"โ Error generating response: {str(e)}\"\n",
+ "\n",
+ "def reset_conversation():\n",
+ " \"\"\"Reset the conversation memory\"\"\"\n",
+ " global current_conversation_chain\n",
+ " if current_conversation_chain is not None:\n",
+ " current_conversation_chain.memory.clear()\n",
+ " return \"โ
Conversation history cleared!\"\n",
+ " return \"No active conversation to reset.\"\n",
+ "\n",
+ "# Create Gradio Interface\n",
+ "with gr.Blocks(title=\"AI-Powered Academic Knowledge Assistant\", theme=gr.themes.Soft()) as app:\n",
+ " gr.Markdown(\"# ๐ AI-Powered Academic Knowledge Assistant\")\n",
+ " gr.Markdown(\"Transform your entire document library into an intelligent, searchable AI tutor that answers questions instantly.\")\n",
+ " \n",
+ " with gr.Tabs():\n",
+ " # Configuration Tab\n",
+ " with gr.Tab(\"โ๏ธ Configuration\"):\n",
+ " gr.Markdown(\"### ๐ Document Processing Settings\")\n",
+ " \n",
+ " gr.Markdown(\"๐ก **Tip:** Copy and paste your folder path here. On mobile, you can use file manager apps to copy folder paths.\")\n",
+ " \n",
+ " with gr.Row():\n",
+ " with gr.Column():\n",
+ " knowledge_dir = gr.Textbox(\n",
+ " label=\"Knowledge Base Directory\",\n",
+ " value=r\"C:\\Users\\Documents\\Syllabi\\Georgia Tech\\Spring 22\\Microwave Design\",\n",
+ " placeholder=\"Enter or paste your document directory path\",\n",
+ " lines=1\n",
+ " )\n",
+ " \n",
+ " max_file_size = gr.Slider(\n",
+ " label=\"Max File Size (MB)\",\n",
+ " minimum=0.5,\n",
+ " maximum=50,\n",
+ " value=4,\n",
+ " step=0.5\n",
+ " )\n",
+ " \n",
+ " with gr.Column():\n",
+ " chunk_size = gr.Slider(\n",
+ " label=\"Chunk Size (characters)\",\n",
+ " minimum=200,\n",
+ " maximum=1500,\n",
+ " value=800,\n",
+ " step=100,\n",
+ " info=\"Smaller chunks = better token management\"\n",
+ " )\n",
+ " \n",
+ " chunk_overlap = gr.Slider(\n",
+ " label=\"Chunk Overlap (characters)\",\n",
+ " minimum=0,\n",
+ " maximum=300,\n",
+ " value=150,\n",
+ " step=25,\n",
+ " info=\"Overlap preserves context between chunks\"\n",
+ " )\n",
+ " \n",
+ " process_btn = gr.Button(\"๐ Process Documents\", variant=\"primary\", size=\"lg\")\n",
+ " \n",
+ " with gr.Row():\n",
+ " status_output = gr.Textbox(\n",
+ " label=\"Status\",\n",
+ " lines=2,\n",
+ " max_lines=2\n",
+ " )\n",
+ " \n",
+ " detailed_output = gr.Textbox(\n",
+ " label=\"Detailed Processing Log\",\n",
+ " lines=15,\n",
+ " max_lines=20\n",
+ " )\n",
+ " \n",
+ " # Chat Tab\n",
+ " with gr.Tab(\"๐ฌ Chat\"):\n",
+ " gr.Markdown(\"### ๐ค Ask Questions About Your Documents\")\n",
+ " \n",
+ " with gr.Row():\n",
+ " with gr.Column(scale=1):\n",
+ " num_chunks = gr.Slider(\n",
+ " label=\"Number of chunks to retrieve\",\n",
+ " minimum=1,\n",
+ " maximum=50,\n",
+ " value=25,\n",
+ " step=1\n",
+ " )\n",
+ " \n",
+ " reset_btn = gr.Button(\"๐๏ธ Clear Chat History\", variant=\"secondary\")\n",
+ " reset_output = gr.Textbox(label=\"Reset Status\", lines=1)\n",
+ " \n",
+ " with gr.Column(scale=3):\n",
+ " chatbot = gr.ChatInterface(\n",
+ " fn=lambda msg, history: chat_with_documents(msg, history, num_chunks.value),\n",
+ " type=\"messages\",\n",
+ " title=\"Academic Assistant Chat\",\n",
+ " description=\"Ask questions about your processed documents\"\n",
+ " )\n",
+ " \n",
+ " # Event handlers\n",
+ " process_btn.click(\n",
+ " fn=process_documents,\n",
+ " inputs=[knowledge_dir, max_file_size, chunk_size, chunk_overlap],\n",
+ " outputs=[status_output, detailed_output]\n",
+ " )\n",
+ " \n",
+ " reset_btn.click(\n",
+ " fn=reset_conversation,\n",
+ " outputs=reset_output\n",
+ " )\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "9eb807e0-194b-48dd-a1e9-b1b9b8a99620",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "app.launch(share=True, inbrowser=True)"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.13"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/week5/community-contributions/RAG_Using_Gemini b/week5/community-contributions/RAG_Using_Gemini
new file mode 100644
index 0000000..73709ba
--- /dev/null
+++ b/week5/community-contributions/RAG_Using_Gemini
@@ -0,0 +1 @@
+Testing
diff --git a/week6/day5.ipynb b/week6/day5.ipynb
index 5d5619e..14abeab 100644
--- a/week6/day5.ipynb
+++ b/week6/day5.ipynb
@@ -149,7 +149,7 @@
"source": [
"# First let's work on a good prompt for a Frontier model\n",
"# Notice that I'm removing the \" to the nearest dollar\"\n",
- "# When we train our own models, we'll need to make the problem as easy as possible, \n",
+ "# When we train our own models, we'll need to make the problem as easy as possible,\n",
"# but a Frontier model needs no such simplification.\n",
"\n",
"def messages_for(item):\n",
@@ -393,6 +393,22 @@
"openai.fine_tuning.jobs.list_events(fine_tuning_job_id=job_id, limit=10).data"
]
},
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "b19ea9e9",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import wandb\n",
+ "from wandb.integration.openai.fine_tuning import WandbLogger\n",
+ "\n",
+ "# Log in to Weights & Biases.\n",
+ "wandb.login()\n",
+ "# Sync the fine-tuning job with Weights & Biases.\n",
+ "WandbLogger.sync(fine_tune_job_id=job_id, project=\"gpt-pricer\")"
+ ]
+ },
{
"cell_type": "markdown",
"id": "066fef03-8338-4526-9df3-89b649ad4f0a",
@@ -490,7 +506,7 @@
"\n",
"def gpt_fine_tuned(item):\n",
" response = openai.chat.completions.create(\n",
- " model=fine_tuned_model_name, \n",
+ " model=fine_tuned_model_name,\n",
" messages=messages_for(item),\n",
" seed=42,\n",
" max_tokens=7\n",