From 7b37c207dcc517c96a84d1e9237b64995c27a414 Mon Sep 17 00:00:00 2001 From: SUKIHEALTH Date: Fri, 6 Jun 2025 00:22:36 +0200 Subject: [PATCH 01/25] Create README.MD This folder contains a referral letter generator for general practitioners. --- README.MD | 1 + 1 file changed, 1 insertion(+) create mode 100644 README.MD diff --git a/README.MD b/README.MD new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/README.MD @@ -0,0 +1 @@ + From 5c4846f9a4af860e40f589b6d6751efe42f2724f Mon Sep 17 00:00:00 2001 From: SUKIHEALTH Date: Fri, 6 Jun 2025 00:35:00 +0200 Subject: [PATCH 02/25] Delete README.MD --- README.MD | 1 - 1 file changed, 1 deletion(-) delete mode 100644 README.MD diff --git a/README.MD b/README.MD deleted file mode 100644 index 8b13789..0000000 --- a/README.MD +++ /dev/null @@ -1 +0,0 @@ - From 8aa8e9df3fac5c1f484c92fc81d9be3220021dbe Mon Sep 17 00:00:00 2001 From: SUKIHEALTH Date: Fri, 6 Jun 2025 00:35:33 +0200 Subject: [PATCH 03/25] Create README.md This folder contains the GP referral automation notebook and example data. --- gp_referral_toolkit/README.md | 1 + 1 file changed, 1 insertion(+) create mode 100644 gp_referral_toolkit/README.md diff --git a/gp_referral_toolkit/README.md b/gp_referral_toolkit/README.md new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/gp_referral_toolkit/README.md @@ -0,0 +1 @@ + From bace59f98aa50579313f32f32d2527468e8842f1 Mon Sep 17 00:00:00 2001 From: SUKIHEALTH Date: Fri, 6 Jun 2025 00:37:20 +0200 Subject: [PATCH 04/25] Add files via upload Upload notebook and sample data --- gp_referral_toolkit/patient_note.txt | 17 ++++++++ gp_referral_toolkit/referral_letter_bot.py | 46 ++++++++++++++++++++++ 2 files changed, 63 insertions(+) create mode 100644 gp_referral_toolkit/patient_note.txt create mode 100644 gp_referral_toolkit/referral_letter_bot.py diff --git a/gp_referral_toolkit/patient_note.txt b/gp_referral_toolkit/patient_note.txt new file mode 100644 index 0000000..bef096a --- /dev/null +++ b/gp_referral_toolkit/patient_note.txt @@ -0,0 +1,17 @@ +45F, fatigue and weight gain. Reports cold intolerance and constipation. No palpitations. Family history of thyroid disease. + +--- + +56M, chest pain on exertion for 3 weeks. No SOB or nausea. Hypertension, diabetes. Family history of CAD. + +--- + +22F, recurrent UTIs. Sexually active. No fever or flank pain. Normal renal function. History of E. coli positive urine cultures. + +--- + +60M, progressive shortness of breath. Former smoker. Bilateral wheezing on auscultation. Awaiting spirometry. History of COPD. + +--- + +32F, persistent headaches. Worse with stress. Normal neuro exam. No aura. Family history of migraines. Normal MRI last year. diff --git a/gp_referral_toolkit/referral_letter_bot.py b/gp_referral_toolkit/referral_letter_bot.py new file mode 100644 index 0000000..4c6784d --- /dev/null +++ b/gp_referral_toolkit/referral_letter_bot.py @@ -0,0 +1,46 @@ +import openai + +# Step 1: Summarize the patient consultation note +def summarize_patient_note(note_text): + response = openai.chat.completions.create( + model="gpt-4o-mini", + messages=[ + {"role": "user", "content": f"Please summarize the following patient consultation note in a clear, clinical style:\n\n{note_text}"} + ] + ) + return response.choices[0].message.content + +# Step 2: Generate a specialist referral letter +def generate_referral_letter(summary_text, specialist_type): + system_prompt = f"You are an experienced general practitioner. Based on the consultation summary, write a concise, professional referral letter to a {specialist_type}." + + response = openai.chat.completions.create( + model="gpt-4o-mini", + messages=[ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": f"Consultation summary:\n\n{summary_text}"} + ] + ) + return response.choices[0].message.content + +# Main logic +if __name__ == "__main__": + try: + with open('patient_note.txt', 'r', encoding='utf-8') as file: + patient_note = file.read() + + # Step 1: Summarize the note + summary = summarize_patient_note(patient_note) + print("\n🩺 Consultation Summary:") + print(summary) + + # Step 2: Ask user which specialist to refer to + specialist = input("\n➡️ Which specialist is this referral for (e.g., cardiologist, neurologist)?\n") + + # Step 3: Generate the referral letter + referral_letter = generate_referral_letter(summary, specialist) + print("\n📨 Generated Referral Letter:\n") + print(referral_letter) + + except FileNotFoundError: + print("❌ The file 'patient_note.txt' was not found. Please ensure it exists in the project folder.") From 3ba4c0af91e9cc2d106a0c6e99539fb1fe1241c1 Mon Sep 17 00:00:00 2001 From: SUKIHEALTH Date: Sun, 8 Jun 2025 16:25:08 +0200 Subject: [PATCH 05/25] Rename gp_referral_toolkit/README.md to /community-contributions/gp_referral_toolkit/README.md --- .../{ => community-contributions/gp_referral_toolkit}/README.md | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename gp_referral_toolkit/{ => community-contributions/gp_referral_toolkit}/README.md (100%) diff --git a/gp_referral_toolkit/README.md b/gp_referral_toolkit/community-contributions/gp_referral_toolkit/README.md similarity index 100% rename from gp_referral_toolkit/README.md rename to gp_referral_toolkit/community-contributions/gp_referral_toolkit/README.md From 96dc3a9e1dbc6113c963952dd0d11c5f5b84a56e Mon Sep 17 00:00:00 2001 From: SUKIHEALTH Date: Sun, 8 Jun 2025 16:26:46 +0200 Subject: [PATCH 06/25] Update and rename gp_referral_toolkit/patient_note.txt to /communit-contributions/gp_referral_toolkit/patient_note.txt --- .../gp_referral_toolkit}/patient_note.txt | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename gp_referral_toolkit/{ => communit-contributions/gp_referral_toolkit}/patient_note.txt (100%) diff --git a/gp_referral_toolkit/patient_note.txt b/gp_referral_toolkit/communit-contributions/gp_referral_toolkit/patient_note.txt similarity index 100% rename from gp_referral_toolkit/patient_note.txt rename to gp_referral_toolkit/communit-contributions/gp_referral_toolkit/patient_note.txt From 066d6c552f96a34549e998c493301c834483c723 Mon Sep 17 00:00:00 2001 From: SUKIHEALTH Date: Sun, 8 Jun 2025 16:28:37 +0200 Subject: [PATCH 07/25] Delete gp_referral_toolkit/communit-contributions/gp_referral_toolkit directory --- .../gp_referral_toolkit/patient_note.txt | 17 ----------------- 1 file changed, 17 deletions(-) delete mode 100644 gp_referral_toolkit/communit-contributions/gp_referral_toolkit/patient_note.txt diff --git a/gp_referral_toolkit/communit-contributions/gp_referral_toolkit/patient_note.txt b/gp_referral_toolkit/communit-contributions/gp_referral_toolkit/patient_note.txt deleted file mode 100644 index bef096a..0000000 --- a/gp_referral_toolkit/communit-contributions/gp_referral_toolkit/patient_note.txt +++ /dev/null @@ -1,17 +0,0 @@ -45F, fatigue and weight gain. Reports cold intolerance and constipation. No palpitations. Family history of thyroid disease. - ---- - -56M, chest pain on exertion for 3 weeks. No SOB or nausea. Hypertension, diabetes. Family history of CAD. - ---- - -22F, recurrent UTIs. Sexually active. No fever or flank pain. Normal renal function. History of E. coli positive urine cultures. - ---- - -60M, progressive shortness of breath. Former smoker. Bilateral wheezing on auscultation. Awaiting spirometry. History of COPD. - ---- - -32F, persistent headaches. Worse with stress. Normal neuro exam. No aura. Family history of migraines. Normal MRI last year. From 3c9d1fecfcea7b8a371202ea8c368fb1ce281db5 Mon Sep 17 00:00:00 2001 From: SUKIHEALTH Date: Sun, 8 Jun 2025 16:28:53 +0200 Subject: [PATCH 08/25] Delete gp_referral_toolkit directory --- .../gp_referral_toolkit/README.md | 1 - gp_referral_toolkit/referral_letter_bot.py | 46 ------------------- 2 files changed, 47 deletions(-) delete mode 100644 gp_referral_toolkit/community-contributions/gp_referral_toolkit/README.md delete mode 100644 gp_referral_toolkit/referral_letter_bot.py diff --git a/gp_referral_toolkit/community-contributions/gp_referral_toolkit/README.md b/gp_referral_toolkit/community-contributions/gp_referral_toolkit/README.md deleted file mode 100644 index 8b13789..0000000 --- a/gp_referral_toolkit/community-contributions/gp_referral_toolkit/README.md +++ /dev/null @@ -1 +0,0 @@ - diff --git a/gp_referral_toolkit/referral_letter_bot.py b/gp_referral_toolkit/referral_letter_bot.py deleted file mode 100644 index 4c6784d..0000000 --- a/gp_referral_toolkit/referral_letter_bot.py +++ /dev/null @@ -1,46 +0,0 @@ -import openai - -# Step 1: Summarize the patient consultation note -def summarize_patient_note(note_text): - response = openai.chat.completions.create( - model="gpt-4o-mini", - messages=[ - {"role": "user", "content": f"Please summarize the following patient consultation note in a clear, clinical style:\n\n{note_text}"} - ] - ) - return response.choices[0].message.content - -# Step 2: Generate a specialist referral letter -def generate_referral_letter(summary_text, specialist_type): - system_prompt = f"You are an experienced general practitioner. Based on the consultation summary, write a concise, professional referral letter to a {specialist_type}." - - response = openai.chat.completions.create( - model="gpt-4o-mini", - messages=[ - {"role": "system", "content": system_prompt}, - {"role": "user", "content": f"Consultation summary:\n\n{summary_text}"} - ] - ) - return response.choices[0].message.content - -# Main logic -if __name__ == "__main__": - try: - with open('patient_note.txt', 'r', encoding='utf-8') as file: - patient_note = file.read() - - # Step 1: Summarize the note - summary = summarize_patient_note(patient_note) - print("\n🩺 Consultation Summary:") - print(summary) - - # Step 2: Ask user which specialist to refer to - specialist = input("\n➡️ Which specialist is this referral for (e.g., cardiologist, neurologist)?\n") - - # Step 3: Generate the referral letter - referral_letter = generate_referral_letter(summary, specialist) - print("\n📨 Generated Referral Letter:\n") - print(referral_letter) - - except FileNotFoundError: - print("❌ The file 'patient_note.txt' was not found. Please ensure it exists in the project folder.") From 1ab4b2ceb30200e0fee30bbfbd757946208570cc Mon Sep 17 00:00:00 2001 From: Sabine Fonderson | CEO Date: Sun, 22 Jun 2025 14:16:21 +0200 Subject: [PATCH 09/25] create folder sf-patient-brochure --- community-contributions/sf-patient-brochure/.gitkeep | 1 + 1 file changed, 1 insertion(+) create mode 100644 community-contributions/sf-patient-brochure/.gitkeep diff --git a/community-contributions/sf-patient-brochure/.gitkeep b/community-contributions/sf-patient-brochure/.gitkeep new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/community-contributions/sf-patient-brochure/.gitkeep @@ -0,0 +1 @@ + From 72432fdf54be520d68b1e1f20e43212725256ca0 Mon Sep 17 00:00:00 2001 From: Sabine Fonderson | CEO Date: Sun, 22 Jun 2025 14:17:13 +0200 Subject: [PATCH 10/25] add patient brochure notebook and summaries --- .../Patient brochure.ipynb | 517 ++++++++++++++++++ .../brochure_summaries.txt | 40 ++ 2 files changed, 557 insertions(+) create mode 100644 community-contributions/sf-patient-brochure/Patient brochure.ipynb create mode 100644 community-contributions/sf-patient-brochure/brochure_summaries.txt diff --git a/community-contributions/sf-patient-brochure/Patient brochure.ipynb b/community-contributions/sf-patient-brochure/Patient brochure.ipynb new file mode 100644 index 0000000..4f6bc85 --- /dev/null +++ b/community-contributions/sf-patient-brochure/Patient brochure.ipynb @@ -0,0 +1,517 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 9, + "id": "fc57c47f-31fc-4527-af71-ce117d35c480", + "metadata": {}, + "outputs": [], + "source": [ + "# imports\n", + "# If these fail, please check you're running from an 'activated' environment with (llms) in the command prompt\n", + "\n", + "import os\n", + "import requests\n", + "import json\n", + "from typing import List\n", + "from dotenv import load_dotenv\n", + "from bs4 import BeautifulSoup\n", + "from IPython.display import Markdown, display, update_display\n", + "from openai import OpenAI\n" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "d74ea4e7-7d4a-4c85-92d3-8cdb231bc261", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd " + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "3eb884ea-02db-4ff8-91f9-c71e40b1cf4a", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "API key looks good so far\n" + ] + } + ], + "source": [ + "# Initialize and constants\n", + "\n", + "load_dotenv(override=True)\n", + "api_key = os.getenv('OPENAI_API_KEY')\n", + "\n", + "if api_key and api_key.startswith('sk-proj-') and len(api_key)>10:\n", + " print(\"API key looks good so far\")\n", + "else:\n", + " print(\"There might be a problem with your API key? Please visit the troubleshooting notebook!\")\n", + " \n", + "MODEL = 'gpt-4o-mini'\n", + "openai = OpenAI()" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "d48a7b9b-273d-4bc9-997b-c7112e02528c", + "metadata": {}, + "outputs": [], + "source": [ + "# A class to represent a Webpage\n", + "\n", + "# Some websites need you to use proper headers when fetching them:\n", + "headers = {\n", + " \"User-Agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36\"\n", + "}\n", + "\n", + "class Website:\n", + " def __init__(self, url):\n", + " self.url = url\n", + " response = requests.get(url, headers=headers)\n", + " self.body = response.content\n", + " soup = BeautifulSoup(self.body, 'html.parser')\n", + " self.title = soup.title.string if soup.title else \"No title found\"\n", + "\n", + " if soup.body:\n", + " for irrelevant in soup.body([\"script\", \"style\", \"img\", \"input\"]):\n", + " irrelevant.decompose()\n", + " self.text = soup.body.get_text(separator=\"\\n\", strip=True)\n", + " else:\n", + " self.text = \"\"\n", + "\n", + " links = [link.get('href') for link in soup.find_all('a')]\n", + " self.links = [link for link in links if link]\n", + "\n", + " def get_contents(self):\n", + " return f\"Webpage Title:\\n{self.title}\\nWebpage Contents:\\n{self.text}\\n\\n\"\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "bf51ae6e-91ae-46eb-ac39-dc860454ea4a", + "metadata": {}, + "outputs": [], + "source": [ + "def get_condition_links_from_topics_page():\n", + " topics_url = \"https://www.thuisarts.nl/overzicht/onderwerpen\"\n", + " response = requests.get(topics_url, headers=headers)\n", + " soup = BeautifulSoup(response.content, 'html.parser')\n", + "\n", + " # Find all tags that look like condition pages\n", + " links = soup.find_all(\"a\", href=True)\n", + " condition_links = []\n", + "\n", + " for link in links:\n", + " href = link['href']\n", + " if href.startswith(\"/\"):\n", + " href = \"https://www.thuisarts.nl\" + href\n", + " if href.startswith(\"https://www.thuisarts.nl/\") and len(href.split(\"/\")) > 3:\n", + " condition_links.append(href)\n", + "\n", + " # Remove duplicates and return\n", + " return list(set(condition_links))\n" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "a246ac9f-73fb-4c2d-ab92-6f3f2bf7afac", + "metadata": {}, + "outputs": [], + "source": [ + "link_system_prompt = \"\"\"You are an assistant that filters URLs for patient education content. \n", + "\n", + "Only return links that lead to pages about symptoms, health conditions, treatments, or diseases — for example: pages on 'headache', 'diarrhea', 'stomach pain', 'asthma', etc.\n", + "\n", + "DO NOT return:\n", + "- contact pages\n", + "- overview/video/image/keuzekaart lists unless they directly link to medical complaints\n", + "- navigation or privacy/cookie/social media links\n", + "\n", + "Respond only with full https links in JSON format, like this:\n", + "{\n", + " \"links\": [\n", + " {\"type\": \"symptom or condition page\", \"url\": \"https://www.thuisarts.nl/hoofdpijn\"},\n", + " {\"type\": \"symptom or condition page\", \"url\": \"https://www.thuisarts.nl/buikpijn\"}\n", + " ]\n", + "}\n", + "\"\"\"\n" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "b3ac761e-f583-479e-b8ef-70e70f8f361a", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "You are an assistant that filters URLs for patient education content. \n", + "\n", + "Only return links that lead to pages about symptoms, health conditions, treatments, or diseases — for example: pages on 'headache', 'diarrhea', 'stomach pain', 'asthma', etc.\n", + "\n", + "DO NOT return:\n", + "- contact pages\n", + "- overview/video/image/keuzekaart lists unless they directly link to medical complaints\n", + "- navigation or privacy/cookie/social media links\n", + "\n", + "Respond only with full https links in JSON format, like this:\n", + "{\n", + " \"links\": [\n", + " {\"type\": \"symptom or condition page\", \"url\": \"https://www.thuisarts.nl/hoofdpijn\"},\n", + " {\"type\": \"symptom or condition page\", \"url\": \"https://www.thuisarts.nl/buikpijn\"}\n", + " ]\n", + "}\n", + "\n" + ] + } + ], + "source": [ + "print(link_system_prompt)" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "5548e8d4-2813-40fe-a807-cf3661d3a0a9", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "✅ Found 680 condition pages.\n" + ] + } + ], + "source": [ + "condition_links = get_condition_links_from_topics_page()\n", + "print(f\"✅ Found {len(condition_links)} condition pages.\")\n", + "\n", + "# Format for summary function\n", + "selected_links = [{\"url\": link} for link in condition_links]\n" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "8d264592-8b77-425a-be4a-73ef7d32d744", + "metadata": {}, + "outputs": [], + "source": [ + "import json\n", + "\n", + "def load_existing_summaries(filepath=\"brochure_cache.json\"):\n", + " if os.path.exists(filepath):\n", + " with open(filepath, \"r\", encoding=\"utf-8\") as f:\n", + " return json.load(f)\n", + " return {}\n", + "\n", + "def save_summaries_to_cache(summaries, filepath=\"brochure_cache.json\"):\n", + " with open(filepath, \"w\", encoding=\"utf-8\") as f:\n", + " json.dump(summaries, f, indent=2, ensure_ascii=False)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "1cdd9456-1262-40a0-bc3f-28d23010ed7f", + "metadata": {}, + "outputs": [], + "source": [ + "selected_links = [{\"url\": link} for link in get_condition_links_from_topics_page()][:10]\n" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "0c2f24ea-fa6b-4431-849a-e1aeaa936022", + "metadata": {}, + "outputs": [], + "source": [ + "summary_cache = {}\n", + "\n", + "def summarize_for_brochure(url):\n", + " if url in summary_cache:\n", + " summary = summary_cache[url]\n", + " print(f\"✅ [Cached] {url}\")\n", + " print(f\"📄 Summary:\\n{summary}\\n\") # 👈 this prints the cached summary too\n", + " return summary\n", + "\n", + " page = Website(url)\n", + "\n", + " example = \"\"\"\n", + "Example:\n", + "\n", + "Title: Keelpijn \n", + "Summary: Sore throat is a common symptom, often caused by a virus. It usually goes away on its own within a few days. Drink warm fluids, rest your voice, and take paracetamol if needed. See a doctor if the pain lasts more than a week or gets worse.\n", + "\n", + "Title: Hoofdpijn \n", + "Summary: Headaches can have many causes like stress, fatigue, or dehydration. Most are harmless and go away with rest and fluids. Painkillers like paracetamol can help. If headaches are severe, frequent, or different than usual, contact your GP.\n", + "\"\"\"\n", + "\n", + " prompt = f\"\"\"\n", + "You are a health writer. Based on the Dutch content below, write a clear, short, brochure-style summary in **English** for patients.\n", + "\n", + "Use the format: \n", + "Title: {page.title} \n", + "Summary: \n", + "\n", + "Keep it under 100 words, easy to read, friendly, and medically accurate.\n", + "\n", + "{example}\n", + "\n", + "Now use this for:\n", + "Title: {page.title}\n", + "Content:\n", + "{page.text[:3000]}\n", + "\"\"\"\n", + "\n", + " response = openai.chat.completions.create(\n", + " model=\"gpt-4\",\n", + " messages=[{\"role\": \"user\", \"content\": prompt}],\n", + " temperature=0.4\n", + " )\n", + "\n", + " summary = response.choices[0].message.content.strip()\n", + " summary_cache[url] = summary\n", + " return summary\n" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "af8f9d81-d848-4fb9-ac79-782b39fed4a2", + "metadata": {}, + "outputs": [], + "source": [ + "def build_symptom_brochure(links, cache_file=\"brochure_cache.json\"):\n", + " brochure = []\n", + " cached = load_existing_summaries(cache_file)\n", + " print(\"📄 Building summaries for brochure:\\n\")\n", + "\n", + " for i, item in enumerate(links, 1):\n", + " url = item[\"url\"]\n", + " if url in cached:\n", + " print(f\"✅ [Cached] {url}\")\n", + " brochure.append({\"url\": url, \"summary\": cached[url]})\n", + " continue\n", + " \n", + " print(f\"🔄 [{i}/{len(links)}] Summarizing: {url}\")\n", + " try:\n", + " summary = summarize_for_brochure(url)\n", + " print(f\"✅ Summary:\\n{summary}\\n\")\n", + " brochure.append({\"url\": url, \"summary\": summary})\n", + " cached[url] = summary # Save new summary\n", + " save_summaries_to_cache(cached, cache_file)\n", + " except Exception as e:\n", + " print(f\"❌ Error summarizing {url}: {e}\\n\")\n", + " brochure.append({\"url\": url, \"summary\": \"Error generating summary.\"})\n", + "\n", + " return brochure\n" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "e9079d6b-538f-4681-9776-4628a111246a", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "📄 Building summaries for brochure:\n", + "\n", + "🔄 [1/10] Summarizing: https://www.thuisarts.nl/sociale-angststoornis\n", + "✅ [New] https://www.thuisarts.nl/sociale-angststoornis\n", + "📄 Summary:\n", + "Title: Social Anxiety Disorder\n", + "Summary: Social anxiety disorder, or social phobia, is a fear of what others think of you, often leading to panic attacks. Writing down what happens, your thoughts, and feelings can help manage this fear. Positive thinking can also be beneficial when you're feeling anxious. Discussing your concerns with your GP or practice nurse can be helpful. If there's no improvement or symptoms are severe, treatments such as therapy with a psychologist or anxiety medication may be considered.\n", + "\n", + "✅ Summary:\n", + "Title: Social Anxiety Disorder\n", + "Summary: Social anxiety disorder, or social phobia, is a fear of what others think of you, often leading to panic attacks. Writing down what happens, your thoughts, and feelings can help manage this fear. Positive thinking can also be beneficial when you're feeling anxious. Discussing your concerns with your GP or practice nurse can be helpful. If there's no improvement or symptoms are severe, treatments such as therapy with a psychologist or anxiety medication may be considered.\n", + "\n", + "✅ [Cached] https://www.thuisarts.nl/diabetes-type-2\n", + "🔄 [3/10] Summarizing: https://www.thuisarts.nl/morton-neuroom\n", + "✅ [New] https://www.thuisarts.nl/morton-neuroom\n", + "📄 Summary:\n", + "Title: Morton's Neuroma | Thuisarts.nl \n", + "Summary: Morton's Neuroma is a pinched nerve in the forefoot, causing burning pain in the forefoot and toes. It often results from wearing too narrow shoes or high heels. Wearing comfortable, roomy shoes can help alleviate symptoms. For severe pain, paracetamol can be taken. Sometimes, a custom shoe insole can also help.\n", + "\n", + "✅ Summary:\n", + "Title: Morton's Neuroma | Thuisarts.nl \n", + "Summary: Morton's Neuroma is a pinched nerve in the forefoot, causing burning pain in the forefoot and toes. It often results from wearing too narrow shoes or high heels. Wearing comfortable, roomy shoes can help alleviate symptoms. For severe pain, paracetamol can be taken. Sometimes, a custom shoe insole can also help.\n", + "\n", + "🔄 [4/10] Summarizing: https://www.thuisarts.nl/borstvergroting\n", + "✅ [New] https://www.thuisarts.nl/borstvergroting\n", + "📄 Summary:\n", + "Title: Breast Augmentation | Thuisarts.nl \n", + "Summary: A breast augmentation is a procedure where a plastic surgeon inserts fillings into your breasts, under general anesthesia. The surgery takes about an hour. Consider the pros and cons carefully. Benefits may include a more positive body image and increased self-confidence. Risks may include infection, bleeding, scarring, or hardening of the breasts over time. Often, a follow-up surgery is needed later. If you smoke, it's important to quit three weeks before surgery.\n", + "\n", + "✅ Summary:\n", + "Title: Breast Augmentation | Thuisarts.nl \n", + "Summary: A breast augmentation is a procedure where a plastic surgeon inserts fillings into your breasts, under general anesthesia. The surgery takes about an hour. Consider the pros and cons carefully. Benefits may include a more positive body image and increased self-confidence. Risks may include infection, bleeding, scarring, or hardening of the breasts over time. Often, a follow-up surgery is needed later. If you smoke, it's important to quit three weeks before surgery.\n", + "\n", + "🔄 [5/10] Summarizing: https://www.thuisarts.nl/kijkoperatie-in-buik\n", + "✅ [New] https://www.thuisarts.nl/kijkoperatie-in-buik\n", + "📄 Summary:\n", + "Title: Abdominal Laparoscopy | Thuisarts.nl\n", + "Summary: An abdominal laparoscopy allows the doctor to examine or operate in your abdomen. Small tubes with a camera and tools are inserted through tiny incisions. You'll have a pre-operation discussion with your surgeon and anesthesiologist. You will be deeply sedated for the procedure. You cannot drive home post-operation, so arrange for someone to pick you up. Recovery usually requires a week off work, sometimes longer.\n", + "\n", + "✅ Summary:\n", + "Title: Abdominal Laparoscopy | Thuisarts.nl\n", + "Summary: An abdominal laparoscopy allows the doctor to examine or operate in your abdomen. Small tubes with a camera and tools are inserted through tiny incisions. You'll have a pre-operation discussion with your surgeon and anesthesiologist. You will be deeply sedated for the procedure. You cannot drive home post-operation, so arrange for someone to pick you up. Recovery usually requires a week off work, sometimes longer.\n", + "\n", + "🔄 [6/10] Summarizing: https://www.thuisarts.nl/veranderingen-in-zorg-als-je-18-wordt\n", + "✅ [New] https://www.thuisarts.nl/veranderingen-in-zorg-als-je-18-wordt\n", + "📄 Summary:\n", + "Title: Changes in Care When You Turn 18 | Thuisarts.nl\n", + "Summary: As you become an adult, usually around 18, you transition from child to adult healthcare. You will start to take more responsibility, such as making appointments and requesting medications, giving you more control over your care. You will create a plan detailing what you need to manage this independently, with support provided to help you. This transition is a gradual process, with preparation beginning before you turn 18.\n", + "\n", + "✅ Summary:\n", + "Title: Changes in Care When You Turn 18 | Thuisarts.nl\n", + "Summary: As you become an adult, usually around 18, you transition from child to adult healthcare. You will start to take more responsibility, such as making appointments and requesting medications, giving you more control over your care. You will create a plan detailing what you need to manage this independently, with support provided to help you. This transition is a gradual process, with preparation beginning before you turn 18.\n", + "\n", + "🔄 [7/10] Summarizing: https://www.thuisarts.nl/zon-en-zonnebrand\n", + "✅ [New] https://www.thuisarts.nl/zon-en-zonnebrand\n", + "📄 Summary:\n", + "Title: Sun and Sunburn | Thuisarts.nl\n", + "Summary: Protect your skin from excessive sunlight to avoid sunburn. If you notice your skin burning, immediately move out of the sun. Cool your skin with wet cloths if it hurts and take paracetamol for severe pain. Stay out of the sun for at least three days to allow your skin to recover. If you have symptoms of sunstroke, sun allergy, or eczema, seek medical advice.\n", + "\n", + "✅ Summary:\n", + "Title: Sun and Sunburn | Thuisarts.nl\n", + "Summary: Protect your skin from excessive sunlight to avoid sunburn. If you notice your skin burning, immediately move out of the sun. Cool your skin with wet cloths if it hurts and take paracetamol for severe pain. Stay out of the sun for at least three days to allow your skin to recover. If you have symptoms of sunstroke, sun allergy, or eczema, seek medical advice.\n", + "\n", + "🔄 [8/10] Summarizing: https://www.thuisarts.nl/ganglion\n", + "✅ [New] https://www.thuisarts.nl/ganglion\n", + "📄 Summary:\n", + "Title: Ganglion | Thuisarts.nl \n", + "Summary: A ganglion is a small bump that can appear on your wrist, finger, or foot. It is a protrusion from the joint and is harmless. In half of the cases, a ganglion disappears on its own. If you notice such a bump, there is usually no cause for concern.\n", + "\n", + "✅ Summary:\n", + "Title: Ganglion | Thuisarts.nl \n", + "Summary: A ganglion is a small bump that can appear on your wrist, finger, or foot. It is a protrusion from the joint and is harmless. In half of the cases, a ganglion disappears on its own. If you notice such a bump, there is usually no cause for concern.\n", + "\n", + "🔄 [9/10] Summarizing: https://www.thuisarts.nl/kunstheup\n", + "✅ [New] https://www.thuisarts.nl/kunstheup\n", + "📄 Summary:\n", + "Title: Hip Replacement | Thuisarts.nl\n", + "Summary: A hip replacement can be an option if you are experiencing severe pain or stiffness in your hip, such as from advanced arthritis or another hip disease. This is usually considered when other treatments like physiotherapy and painkillers have not provided enough relief. You can discuss with your hospital doctor whether a hip replacement is suitable for you. A hip prosthesis typically lasts longer than 20 years.\n", + "\n", + "✅ Summary:\n", + "Title: Hip Replacement | Thuisarts.nl\n", + "Summary: A hip replacement can be an option if you are experiencing severe pain or stiffness in your hip, such as from advanced arthritis or another hip disease. This is usually considered when other treatments like physiotherapy and painkillers have not provided enough relief. You can discuss with your hospital doctor whether a hip replacement is suitable for you. A hip prosthesis typically lasts longer than 20 years.\n", + "\n", + "🔄 [10/10] Summarizing: https://www.thuisarts.nl/gezond-leven\n", + "✅ [New] https://www.thuisarts.nl/gezond-leven\n", + "📄 Summary:\n", + "Title: Healthy Living | Thuisarts.nl\n", + "Summary: For good health, it's important to eat, drink, and sleep well, stay active, relax, and maintain social contacts. Avoiding substances like alcohol is also beneficial. If you want to make changes to your lifestyle, take it step by step. Discuss your plans with your GP or practice nurse. Whether it's about healthy eating, exercise, sleep, stress management, social contact, or substance use, they can provide guidance and support.\n", + "\n", + "✅ Summary:\n", + "Title: Healthy Living | Thuisarts.nl\n", + "Summary: For good health, it's important to eat, drink, and sleep well, stay active, relax, and maintain social contacts. Avoiding substances like alcohol is also beneficial. If you want to make changes to your lifestyle, take it step by step. Discuss your plans with your GP or practice nurse. Whether it's about healthy eating, exercise, sleep, stress management, social contact, or substance use, they can provide guidance and support.\n", + "\n" + ] + } + ], + "source": [ + "brochure = build_symptom_brochure(selected_links)" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "e2121c3c-aa6a-4640-8e19-6ca6ccf84783", + "metadata": {}, + "outputs": [], + "source": [ + "def export_brochure_to_txt(brochure, filepath=\"brochure_summaries.txt\"):\n", + " if not brochure:\n", + " print(\"⚠️ No summaries to export.\")\n", + " return\n", + "\n", + " with open(filepath, \"w\", encoding=\"utf-8\") as f:\n", + " for item in brochure:\n", + " url = item.get(\"url\", \"Unknown URL\")\n", + " summary = item.get(\"summary\", \"No summary available.\")\n", + " f.write(f\"URL: {url}\\n\")\n", + " f.write(f\"{summary}\\n\\n\")\n", + "\n", + " print(f\"📁 Exported {len(brochure)} summaries to {filepath}\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "id": "f14288f9-4d1c-4a0e-aaf4-9f86324b0602", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "📁 Exported 10 summaries to brochure_summaries.txt\n" + ] + } + ], + "source": [ + "export_brochure_to_txt(brochure)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c23e89db-3ded-4189-a227-6ca6ac2f1332", + "metadata": {}, + "outputs": [], + "source": [ + "###---it works---" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a700e4f3-fb6a-499a-a579-6f9b8ad35c9f", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/community-contributions/sf-patient-brochure/brochure_summaries.txt b/community-contributions/sf-patient-brochure/brochure_summaries.txt new file mode 100644 index 0000000..0ba4556 --- /dev/null +++ b/community-contributions/sf-patient-brochure/brochure_summaries.txt @@ -0,0 +1,40 @@ +URL: https://www.thuisarts.nl/sociale-angststoornis +Title: Social Anxiety Disorder +Summary: Social anxiety disorder, or social phobia, is a fear of what others think of you, often leading to panic attacks. Writing down what happens, your thoughts, and feelings can help manage this fear. Positive thinking can also be beneficial when you're feeling anxious. Discussing your concerns with your GP or practice nurse can be helpful. If there's no improvement or symptoms are severe, treatments such as therapy with a psychologist or anxiety medication may be considered. + +URL: https://www.thuisarts.nl/diabetes-type-2 +Title: Diabetes type 2 | Thuisarts.nl +Summary: Type 2 diabetes, also known as sugar disease, is characterized by high blood sugar levels. Leading a healthy lifestyle is crucial: eat healthily, lose weight, exercise regularly, relax, and quit smoking. If blood sugar levels remain high, medication may be required. Regular check-ups, usually every three months, with your GP or practice nurse are essential. + +URL: https://www.thuisarts.nl/morton-neuroom +Title: Morton's Neuroma | Thuisarts.nl +Summary: Morton's Neuroma is a pinched nerve in the forefoot, causing burning pain in the forefoot and toes. It often results from wearing too narrow shoes or high heels. Wearing comfortable, roomy shoes can help alleviate symptoms. For severe pain, paracetamol can be taken. Sometimes, a custom shoe insole can also help. + +URL: https://www.thuisarts.nl/borstvergroting +Title: Breast Augmentation | Thuisarts.nl +Summary: A breast augmentation is a procedure where a plastic surgeon inserts fillings into your breasts, under general anesthesia. The surgery takes about an hour. Consider the pros and cons carefully. Benefits may include a more positive body image and increased self-confidence. Risks may include infection, bleeding, scarring, or hardening of the breasts over time. Often, a follow-up surgery is needed later. If you smoke, it's important to quit three weeks before surgery. + +URL: https://www.thuisarts.nl/kijkoperatie-in-buik +Title: Abdominal Laparoscopy | Thuisarts.nl +Summary: An abdominal laparoscopy allows the doctor to examine or operate in your abdomen. Small tubes with a camera and tools are inserted through tiny incisions. You'll have a pre-operation discussion with your surgeon and anesthesiologist. You will be deeply sedated for the procedure. You cannot drive home post-operation, so arrange for someone to pick you up. Recovery usually requires a week off work, sometimes longer. + +URL: https://www.thuisarts.nl/veranderingen-in-zorg-als-je-18-wordt +Title: Changes in Care When You Turn 18 | Thuisarts.nl +Summary: As you become an adult, usually around 18, you transition from child to adult healthcare. You will start to take more responsibility, such as making appointments and requesting medications, giving you more control over your care. You will create a plan detailing what you need to manage this independently, with support provided to help you. This transition is a gradual process, with preparation beginning before you turn 18. + +URL: https://www.thuisarts.nl/zon-en-zonnebrand +Title: Sun and Sunburn | Thuisarts.nl +Summary: Protect your skin from excessive sunlight to avoid sunburn. If you notice your skin burning, immediately move out of the sun. Cool your skin with wet cloths if it hurts and take paracetamol for severe pain. Stay out of the sun for at least three days to allow your skin to recover. If you have symptoms of sunstroke, sun allergy, or eczema, seek medical advice. + +URL: https://www.thuisarts.nl/ganglion +Title: Ganglion | Thuisarts.nl +Summary: A ganglion is a small bump that can appear on your wrist, finger, or foot. It is a protrusion from the joint and is harmless. In half of the cases, a ganglion disappears on its own. If you notice such a bump, there is usually no cause for concern. + +URL: https://www.thuisarts.nl/kunstheup +Title: Hip Replacement | Thuisarts.nl +Summary: A hip replacement can be an option if you are experiencing severe pain or stiffness in your hip, such as from advanced arthritis or another hip disease. This is usually considered when other treatments like physiotherapy and painkillers have not provided enough relief. You can discuss with your hospital doctor whether a hip replacement is suitable for you. A hip prosthesis typically lasts longer than 20 years. + +URL: https://www.thuisarts.nl/gezond-leven +Title: Healthy Living | Thuisarts.nl +Summary: For good health, it's important to eat, drink, and sleep well, stay active, relax, and maintain social contacts. Avoiding substances like alcohol is also beneficial. If you want to make changes to your lifestyle, take it step by step. Discuss your plans with your GP or practice nurse. Whether it's about healthy eating, exercise, sleep, stress management, social contact, or substance use, they can provide guidance and support. + From b0e113481658ee65e5037e1f4bb27b6a5b33ec1f Mon Sep 17 00:00:00 2001 From: Vanshika-mahajan Date: Mon, 23 Jun 2025 22:42:08 +0530 Subject: [PATCH 11/25] Add fashion summarizer notebook using Ollama --- web_summary_fashion.ipynb | 933 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 933 insertions(+) create mode 100644 web_summary_fashion.ipynb diff --git a/web_summary_fashion.ipynb b/web_summary_fashion.ipynb new file mode 100644 index 0000000..bc0930c --- /dev/null +++ b/web_summary_fashion.ipynb @@ -0,0 +1,933 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 113, + "id": "030082e9-edee-40b6-9f17-b6a683f2e334", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import requests\n", + "from dotenv import load_dotenv\n", + "import bs4\n", + "from bs4 import BeautifulSoup\n", + "import lxml\n", + "from IPython.display import Markdown, display\n", + "from openai import OpenAI" + ] + }, + { + "cell_type": "code", + "execution_count": 115, + "id": "c87e997d-e1d6-4b6f-9c76-3fb1d607f7cd", + "metadata": {}, + "outputs": [], + "source": [ + "openai = OpenAI(base_url='http://localhost:11434/v1', api_key='ollama')" + ] + }, + { + "cell_type": "code", + "execution_count": 116, + "id": "e450cb33-1ae4-435e-b155-35f2bd7ab78e", + "metadata": {}, + "outputs": [], + "source": [ + "headers={\n", + " \"User-Agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36\"\n", + "} \n", + "#a dictionary named header so that we can grab same html code as the user ,and also to avoid blocks,captcha and error403" + ] + }, + { + "cell_type": "code", + "execution_count": 119, + "id": "63a57fb7-79db-444b-968b-c9314b1f3d3f", + "metadata": {}, + "outputs": [], + "source": [ + "class Website:\n", + " def __init__(self,url):\n", + " self.url=url\n", + " response= requests.get(url,headers=headers,timeout=30)\n", + " soup=BeautifulSoup(response.content,'lxml')\n", + " self.title=soup.title.string if soup.title else \"No title found\"#scraping the content\n", + " for irrelevant in soup.body([\"script\", \"style\", \"img\", \"input\"]):#cleaning the content\n", + " irrelevant.decompose()\n", + " #using .get_text() method of Beautiful soup\n", + " self.text = soup.body.get_text(separator=\"\\n\", strip=True)#creating space between different lines and removing leading whitespaces by strip=true" + ] + }, + { + "cell_type": "code", + "execution_count": 121, + "id": "7369159d-1f36-43c9-b7e7-a0b65b56426b", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Latest and Trending Entertainment News, Celebrity News, Movie News, Breaking News | Entertainment - Times of India\n", + "Sign In\n", + "TOI\n", + "Go to\n", + "TOI\n", + "Etimes\n", + "home\n", + "cinema\n", + "news\n", + "movie reviews\n", + "movie listings\n", + "box office\n", + "anime\n", + "previews\n", + "did you know\n", + "videos\n", + "showtimes\n", + "blogs\n", + "awards\n", + "News\n", + "entertainment\n", + "Trending\n", + "Javed Akhtar\n", + "Diljit Dosanjh\n", + "Jaideep Ahlawat\n", + "Karisma Kapoor\n", + "Gauri Khan\n", + "Blake Lively\n", + "Trisha Krishnan\n", + "Kuberaa Box Office Collection\n", + "Sitaare Zameen Par Box Office Collection\n", + "Housefull 5\n", + "Kuberaa Movie Review\n", + "Sitaare Zameen Par Movie Review\n", + "Javed Akhtar\n", + "Diljit Dosanjh\n", + "Jaideep Ahlawat\n", + "Karisma Kapoor\n", + "Gauri Khan\n", + "Blake Lively\n", + "Trisha Krishnan\n", + "Kuberaa Box Office Collection\n", + "Sitaare Zameen Par Box Office Collection\n", + "Housefull 5\n", + "Kuberaa Movie Review\n", + "Sitaare Zameen Par Movie Review\n", + "Javed Akhtar\n", + "Diljit Dosanjh\n", + "Jaideep Ahlawat\n", + "Karisma Kapoor\n", + "Gauri Khan\n", + "Blake Lively\n", + "Trisha Krishnan\n", + "Kuberaa Box Office Collection\n", + "Sitaare Zameen Par Box Office Collection\n", + "Housefull 5\n", + "Kuberaa Movie Review\n", + "Sitaare Zameen Par Movie Review\n", + "Sudhanshu: At 52, John, Dino all of them look like rockstars - EXCLUSIVE\n", + "Sudhanshu Pandey, recognized from 'Band Of Boys' and 'Anupama', defies his 50 years with his fitness. He credits his peers like Dino Moria, Arjun Rampal, and John Abraham for inspiring him to maintain a fit and youthful appearance. Pandey also admires Anil Kapoor's energy and dedication, motivating him to continue prioritizing fitness and inspiring others.\n", + "Previous\n", + "Sonakshi breaks silence on her rift with Luv and Kussh\n", + "Madhuri once chased Aamir with hockey stick for THIS reason\n", + "Ranbir-Raj Kapoor, Diljit-Hania, Samay-IGL: Top 5 news\n", + "Big B's savage reply to troll over cybercrime callertune\n", + "Anushka on keeping kids Vamika, Akaay away from public eye\n", + "Apoorva Mukhija recalls witnessing gender bias at home\n", + "Danish influencer seeks help to find papads from Big B\n", + "Sunjay Kapur's reception pics with Priya Sachdev goes viral\n", + "Big B schools trolls commenting 'buddha sathiya gaya hai'\n", + "Anushka on how she and Virat divide parenting duties\n", + "Brahmaji reacts to Vishnu's 7,000-acre land in New Zealand\n", + "Diljit says THIS amidst trolling for working with Hania\n", + "Riddhi found it ridiculous to like SRK's mother in Jawan\n", + "Priya Sachdev once called husband Sunjay Kapur ‘misunderstood’\n", + "Next\n", + "1\n", + "2\n", + "3\n", + "Hindi\n", + "See All\n", + "Sudhanshu: At 52, John, Dino all of them look like rockstars - EXCLUSIVE\n", + "Sudhanshu Pandey, recognized from 'Band Of Boys' and 'Anupama', defies his 50 years with his fitness. He credits his peers like Dino Moria, Arjun Rampal, and John Abraham for inspiring him to maintain a fit and youthful appearance. Pandey also admires Anil Kapoor's energy and dedication, motivating him to continue prioritizing fitness and inspiring others.\n", + "Sonakshi breaks silence on her rift with Luv and Kussh\n", + "Madhuri once chased Aamir with hockey stick for THIS reason\n", + "Ranbir-Raj Kapoor, Diljit-Hania, Samay-IGL: Top 5 news\n", + "Anushka on keeping kids Vamika, Akaay away from public eye\n", + "Anushka Sharma and Virat Kohli are committed to shielding their children, Vamika and Akaay, from the constant glare of public attention. In a recent interview, Anushka emphasized the couple's focus on instilling strong values and ensuring a normal upbringing for their kids.\n", + "Apoorva Mukhija recalls witnessing gender bias at home\n", + "Regional\n", + "When Samantha’s class 10 mark sheet got leaked\n", + "Throwback to when a nostalgic memory made its way across the internet — Samantha Ruth Prabhu’s Class 10 mark sheet! The actress’s charming on-screen presence and grounded personality were once again in the spotlight as her old school report card began doing the rounds on social media.\n", + "Actor Tushar Ghadigaonkar passes away at 34\n", + "‘Kuberaa’ Twitter review: Netizens calls it a ‘Blockbuster’\n", + "Mammootty’s health- Brittas says actor doing well\n", + "Kavya Madhavan’s father P. Madhavan passes away\n", + "‘The Raja Saab’ teaser: Prabhas shines in this horror comedy\n", + "Mammootty’s father-in-law P S Abu passes away\n", + "Videos\n", + "See All\n", + "Previous\n", + "03:07\n", + "Ananya Panday’s Garden Bond With Parrots Wins Hearts\n", + "88 views | 2 hours ago\n", + "03:14\n", + "Sameera Reddy’s Healing Journey Through Yoga\n", + "31 views | 2 hours ago\n", + "03:13\n", + "Kriti Kharbanda’s Modern Maharani Look Stuns Instagram\n", + "26 views | 2 hours ago\n", + "03:12\n", + "Bobby Deol Meets Diljit Dosanjh: Punjabi Power Goes Viral\n", + "81 views | 2 hours ago\n", + "03:19\n", + "‘Sitaare Zameen Par’: Riteish Deshmukh’s Emotional Shoutout For Genelia’s Big Win\n", + "162 views | 2 hours ago\n", + "03:26\n", + "Varun Dhawan Stuns With 50 Push-Ups Alongside Army Cadets on Border 2 Set\n", + "21 views | 2 hours ago\n", + "03:00\n", + "VIDYA BALAN TURNS HEADS WITH CASUAL AIRPORT LOOK\n", + "16 views | 2 hours ago\n", + "03:05\n", + "MANDHIRA KAPUR BREAKS DOWN IN EMOTIONAL POST FOR LATE BROTHER SUNJAY KAPUR\n", + "1.2K views | 2 hours ago\n", + "03:28\n", + "SALMAN KHAN TAKES A BRUTAL DIG AT SOHAIL’S DIVORCE ON NATIONAL TV\n", + "185 views | 2 hours ago\n", + "03:15\n", + "RAJINIKANTH CAUSES FAN RIOT DURING ‘JAILER 2’ SHOOT IN MYSORE\n", + "26 views | 2 hours ago\n", + "03:10\n", + "IBRAHIM ALI KHAN KISSES HIS DOG AT AIRPORT IN HEARTWARMING FAREWELL\n", + "20 views | 3 hours ago\n", + "03:09\n", + "ANUPAMAA SET GUTTED IN MASSIVE FIRE | CREW ESCAPES, CINE BODY DEMANDS ACTION\n", + "1.2K views | 3 hours ago\n", + "Next\n", + "1\n", + "2\n", + "3\n", + "4\n", + "5\n", + "6\n", + "7\n", + "8\n", + "9\n", + "10\n", + "11\n", + "World\n", + "See All\n", + "Aamir to Tom: Celebs on a mission to 'Save Cinema'\n", + "'How to Train Your Dragon' beats '28 Years Later' and 'Elio' to top the US box office on second weekend\n", + "Blake Lively is heartbroken after friendship ends with Taylor Swift; accepts the music mogul won't be returning - Deets inside\n", + "Selena-Hailey UNFOLLOW each other amid Bieber drama\n", + "Judge gives Baldoni access to Blake-Taylor messages\n", + "Trending Now\n", + "# Sidharth Malhotra-Kiara Advani\n", + "# AbRam Khan-Taimur Ali Khan\n", + "# Janhvi Kapoor\n", + "# Salman Khan\n", + "# Hema Malini\n", + "# Salman Khan\n", + "# Gauri Khan\n", + "# Shah Rukh Khan\n", + "# Chahatt Khanna\n", + "Visual Stories\n", + "See All\n", + "Previous\n", + "Kuberaa’s Sameera to Pushpa’s Srivalli: Rashmika Mandanna’s most iconic on-screen avatars\n", + "Ahaana Krishna’s ethereal photo series is straight out of a dream\n", + "Rashmika Mandanna to Rakul Preet Singh: Best pictures of the week featuring south actresses\n", + "Gauri Khan's most loved saree looks - An ode to modern day elegance\n", + "​South Indian beauties whose smiles will light up your Monday\n", + "Karishma Tanna Slays Every Frame\n", + "Tamannaah Bhatia’s traditional looks\n", + "Malavika Mohanan's radiant pics\n", + "​Neha Shetty stuns in every shade of blue\n", + "Thalapathy Vijay’s top 10 blockbuster movies worth re-watching!\n", + "​In pic: Mesmerizing looks of Shruti Haasan​\n", + "Dushara Vijayan’s Most Elegant Fashion Moments\n", + "Next\n", + "1\n", + "2\n", + "3\n", + "More Stories\n", + "Sonakshi Sinha breaks silence on her rumoured rift with brothers Luv and Kussh Sinha: 'My effort is always to support them...'\n", + "Madhuri Dixit once chased Aamir Khan with a hockey stick for THIS reason on sets of Dil: 'People fool you and you believe them'\n", + "Mohanlal declines to continue as president at AMMA’s general body meeting- Deets Inside\n", + "Blockbusters Ranbir Kapoor turned down: Films that became hits without him\n", + "Anushka Sharma reveals why she and Virat Kohli are keeping their children Vamika and Akaay away from the public eye: 'We don't want to raise brats'\n", + "Apoorva Mukhija recalls witnessing gender bias at home: 'My mother did it all, but father got credit for showing up at PTMs'\n", + "Amitabh Bachchan gives a savage reply to a troll over his viral cybercrime caller tune: 'Sarkar ko bolo bhai..'\n", + "Danish influencer asks fans to help her find papads from Amitabh Bachchan; netizens say 'he also used to grow basmati rice'\n", + "Days after his untimely demise, Sunjay Kapur's reception photos with Priya Sachdev goes viral; Looked dashing in hand embroidered shoes, written 'I do'\n", + "Priyanka Chopra Jonas recollects walking into a trap set by John Cena, Idris Elba on sets of 'Heads of State'\n", + "Bobby Deol's London vacation sparks fan frenzy: viral video shows actor posing for selfies outside restaurant\n", + "Amitabh Bachchah gives befitting replies to 'buddha sathiya gaya hai', ‘ganja’ comments by trolls: 'Ek din, Bhagwan naa kare voh din jaldi aaye...'\n", + "Sai Pallavi’s best performances\n", + "Brahmaji clears the air about Vishnu Manchu purchasing 7,000-acre land in New Zealand: 'I was pulling their leg as usual...'\n", + "Anushka Sharma reveals how she and Virat Kohli divide the parenting duties: 'I will be the primary caregiver, he plays round the year'\n", + "Ranbir Kapoor's 'Awara' look sparks rumours of Raj Kapoor tribute, Diljit Dosanjh slammed for working with Hania Aamir in Sardaar Ji 3: Top 5 news\n", + "Has Kiara Advani been approached to play Meena Kumari in her biopic? Here's what we know\n", + "Top 5 psychological Anime every thriller fan must watch\n", + "Load More Stories\n", + "# Latest Movies 2025\n", + "# Best Bollywood Movies 2025\n", + "# Hollywood Movie 2025\n", + "# Tamil Movies 2025\n", + "# Telugu Movies 2025\n", + "# Malayalam Movies 2025\n", + "# Kannada Movies 2025\n", + "# Marathi Movies 2025\n", + "# Bengali Movies 2025\n", + "# Top Rated Movies 2025\n", + "# Best Hindi Movies\n", + "# Best English Movies\n", + "Hot on the Web\n", + "Salman Khan\n", + "Karisma Kapoor\n", + "Jaideep Ahlawat\n", + "Blood Pressure\n", + "Big Cat Species\n", + "Trisha\n", + "Sitaare Zameen Par Review\n", + "Ancient Indigenous Tribes\n", + "Hair Growth Tips\n", + "Kidney Health\n", + "Kuberaa Review\n", + "Blake Lively\n", + "Reverse Fatty Liver\n", + "Skincare Hacks\n", + "Kuberaa Box Office Collection\n", + "Sitaare Zameen Par Box Office Collection\n", + "Baby Girl Names\n", + "Diljit Dosanjh\n", + "Kidney Disease Symptoms\n", + "Javed Akhtar\n", + "Heart Attack\n", + "Ram Kapoor Diet\n", + "Liver Damage\n", + "Kuberaa Movie Review\n", + "Gauri Khan\n", + "Baba Vanga Prediction\n", + "Baby Boy Names\n", + "Navjot Singh Sidhu\n", + "Housefull 5 Box Office Collection\n", + "DNA Movie Review\n", + "Kidney Damage Symptoms\n", + "Popular Waterfalls In India\n", + "Linkedin Ceo On AI Killing Jobs\n", + "Tesla Robotaxi\n", + "Early Cancer Detection\n", + "Harvard Research Reveals\n", + "American Destinations Explore Without Passport\n", + "Amouranth\n", + "Mouth Larvae\n", + "Doomsday Fish\n", + "Salman Khan AVM\n", + "Ginger Health Tips\n", + "Trending Topics\n", + "Latest Movies\n", + "Bollywood Movies\n", + "Hollywood Movies\n", + "Tamil Movies 2025\n", + "Telugu Movies 2025\n", + "Malayalam Movies 2025\n", + "Kannada Movies 2025\n", + "Marathi Movies 2025\n", + "Bengali Movies 2025\n", + "Top Rated Movies 2025\n", + "Best Hindi Movies\n", + "Best English Movies\n", + "Best Telugu Movies\n", + "Best Tamil Movies\n", + "Best Malayalam Movies\n", + "Best Kannada Movies\n", + "Best Bengali Movies\n", + "Upcoming Hindi Movies\n", + "Best Movies Of All Time\n", + "Best Hindi Movies of All Time\n", + "Latest English Movies\n", + "Latest Malayalam Movies\n", + "English TV News\n", + "Tamil TV News\n", + "Telugu TV News\n", + "Malayalam TV News\n", + "Kannada TV News\n", + "Movie Reviews\n", + "Bhojpuri Cinema News\n", + "Gujarati Cinema News\n", + "Popular Categories\n", + "Viral News\n", + "K Pop News\n", + "Web Series News\n", + "Anime News\n", + "Upcoming English Movies\n", + "Upcoming Tamil Movies\n", + "Upcoming Telugu Movies\n", + "Upcoming Malayalam Movies\n", + "Upcoming Kannada Movies\n", + "Fashion Tips\n", + "Travel News\n", + "Entertainment News\n", + "Bollywood News\n", + "Tollywood News\n", + "Kollywood News\n", + "Mollywood News\n", + "Food News\n", + "Latest Hindi Movies\n", + "Latest Tamil Movies\n", + "Parenting Tips\n", + "Home Remedies\n", + "Weight Loss\n", + "Beauty Tips\n", + "Parenting Tips\n", + "Hindi Videos\n", + "Hindi Video Songs\n", + "Bhojpuri Music Videos\n", + "Latest Telugu Movies\n", + "Bhojpuri Music Video\n", + "Hindi TV News\n", + "Latest News\n", + "NHL free agency turns spicy as Mitch Marner and Connor McDavid eye shorter deals to cash in later\n", + "Olive Ridley turtle washed ashore at Polem\n", + "Who is Thomas Fugate? Meet the 22-year-old leading Trump's terrorism unit amid Iran fiasco\n", + "'And that's why Putin's the boss': Trump rebukes former Russian President Medvedev; warns against treating 'N word casually'\n", + "Govt plans ₹10cr road on Bicholim-Dodamarg route\n", + "Former WWE star Batista eyed for Road House 2 sequel\n", + "Sonakshi Sinha breaks silence on her rumoured rift with brothers Luv and Kussh Sinha: 'My effort is always to support them...'\n", + "Andre Agassi and Steffi Graf’s son Jaden Agassi shows love for girlfriend Catherine Holt’s bold new photo from bedroom series\n", + "Is WWE planning to change Cody Rhodes’ iconic entrance theme song ‘Kingdom’?\n", + "Velumani says he didn’t attend RSS event in Coimbatore\n", + "Strait of Hormuz: Oil supply not an issue for India; 'pricing is a bigger concern,' what experts say\n", + "Madhuri Dixit once chased Aamir Khan with a hockey stick for THIS reason on sets of Dil: 'People fool you and you believe them'\n", + "As commissions fall, India’s ride-hailing firms test viability of flat-fee economics\n", + "Analysing what Trump’s strikes mean for Iran\n", + "Trump's clarification on 'Iran regime change' divides MAGA further: JD Vance, Hegseth, Marco Rubio 'humiliated'\n", + "Laughter Chefs 2: Krushna Abhishek roasts Rahul Vaidya for his in-famous feud with cricketer Virat Kohli\n", + "“I could have passed Dan Ticktum”: Edoardo Mortara regrets Attack Mode strategy at Jakarta E-Prix\n", + "India vs England Test: Sunil Gavaskar calls for Rishabh Pant's signature somersault celebration, wicketkeeper politely declines - WATCH\n", + "Copyright © 2025 Bennett, Coleman & Co. Ltd. All rights reserved. For reprint rights: Times Syndication Service\n", + "Follow us on\n" + ] + } + ], + "source": [ + "gossip= Website(\"https://timesofindia.indiatimes.com/entertainment\")\n", + "print(gossip.title)\n", + "print(gossip.text)" + ] + }, + { + "cell_type": "code", + "execution_count": 123, + "id": "a6f30380-1b91-48e4-9c86-df0369e2e675", + "metadata": {}, + "outputs": [], + "source": [ + "system_prompt = \"\"\"\n", + "You are a stylish and culturally aware assistant who specializes in summarizing and discussing fashion trends, celebrity style, entertainment news, and television gossip.\n", + "\n", + "You stay updated on Hollywood, Bollywood, and the television world—including celebrity rumors, drama, reality TV updates, show recaps, and behind-the-scenes stories.\n", + "\n", + "When summarizing content, be engaging, concise, and insightful. Focus on what's trending, who's wearing what, and what everyone is talking about in fashion and entertainment. Maintain a fun yet informative tone, like a pop culture expert writing for a lifestyle magazine.\n", + "\n", + "If content includes TV gossip, highlight key rumors, casting updates, fan reactions, and noteworthy moments from popular shows.\n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": 125, + "id": "30822d5c-d518-451c-b31f-44afa2a3b37a", + "metadata": {}, + "outputs": [], + "source": [ + "def user_prompt_for(website):\n", + " user_prompt = f\"\"\"The following text is extracted from a website titled: \"{website.title}\".\n", + "\n", + "Please analyze this content and provide a short and engaging summary in **Markdown format**.\n", + "\n", + "If the page contains:\n", + "- 🧵 Fashion trends: mention standout styles, designers, or events.\n", + "- 🗣️ TV gossip: highlight any drama, casting news, or fan reactions.\n", + "- 🎬 Celebrity updates (Hollywood/Bollywood): include relevant quotes, fashion moments, or event mentions.\n", + "- 📺 Show recaps: summarize what happened and any major twists.\n", + "\n", + "Keep the summary clear, fun, and informative. Use bullet points if multiple themes appear. If there is no meaningful content, say: *“No relevant summary could be generated.”*\n", + "\n", + "Website Content:\n", + "{website.text}\n", + "\"\"\"\n", + " return user_prompt" + ] + }, + { + "cell_type": "code", + "execution_count": 127, + "id": "5a25e90f-20a0-44ac-a96c-575ae974a45f", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The following text is extracted from a website titled: \"Latest and Trending Entertainment News, Celebrity News, Movie News, Breaking News | Entertainment - Times of India\".\n", + "\n", + "Please analyze this content and provide a short and engaging summary in **Markdown format**.\n", + "\n", + "If the page contains:\n", + "- 🧵 Fashion trends: mention standout styles, designers, or events.\n", + "- 🗣️ TV gossip: highlight any drama, casting news, or fan reactions.\n", + "- 🎬 Celebrity updates (Hollywood/Bollywood): include relevant quotes, fashion moments, or event mentions.\n", + "- 📺 Show recaps: summarize what happened and any major twists.\n", + "\n", + "Keep the summary clear, fun, and informative. Use bullet points if multiple themes appear. If there is no meaningful content, say: *“No relevant summary could be generated.”*\n", + "\n", + "Website Content:\n", + "Sign In\n", + "TOI\n", + "Go to\n", + "TOI\n", + "Etimes\n", + "home\n", + "cinema\n", + "news\n", + "movie reviews\n", + "movie listings\n", + "box office\n", + "anime\n", + "previews\n", + "did you know\n", + "videos\n", + "showtimes\n", + "blogs\n", + "awards\n", + "News\n", + "entertainment\n", + "Trending\n", + "Javed Akhtar\n", + "Diljit Dosanjh\n", + "Jaideep Ahlawat\n", + "Karisma Kapoor\n", + "Gauri Khan\n", + "Blake Lively\n", + "Trisha Krishnan\n", + "Kuberaa Box Office Collection\n", + "Sitaare Zameen Par Box Office Collection\n", + "Housefull 5\n", + "Kuberaa Movie Review\n", + "Sitaare Zameen Par Movie Review\n", + "Javed Akhtar\n", + "Diljit Dosanjh\n", + "Jaideep Ahlawat\n", + "Karisma Kapoor\n", + "Gauri Khan\n", + "Blake Lively\n", + "Trisha Krishnan\n", + "Kuberaa Box Office Collection\n", + "Sitaare Zameen Par Box Office Collection\n", + "Housefull 5\n", + "Kuberaa Movie Review\n", + "Sitaare Zameen Par Movie Review\n", + "Javed Akhtar\n", + "Diljit Dosanjh\n", + "Jaideep Ahlawat\n", + "Karisma Kapoor\n", + "Gauri Khan\n", + "Blake Lively\n", + "Trisha Krishnan\n", + "Kuberaa Box Office Collection\n", + "Sitaare Zameen Par Box Office Collection\n", + "Housefull 5\n", + "Kuberaa Movie Review\n", + "Sitaare Zameen Par Movie Review\n", + "Sudhanshu: At 52, John, Dino all of them look like rockstars - EXCLUSIVE\n", + "Sudhanshu Pandey, recognized from 'Band Of Boys' and 'Anupama', defies his 50 years with his fitness. He credits his peers like Dino Moria, Arjun Rampal, and John Abraham for inspiring him to maintain a fit and youthful appearance. Pandey also admires Anil Kapoor's energy and dedication, motivating him to continue prioritizing fitness and inspiring others.\n", + "Previous\n", + "Sonakshi breaks silence on her rift with Luv and Kussh\n", + "Madhuri once chased Aamir with hockey stick for THIS reason\n", + "Ranbir-Raj Kapoor, Diljit-Hania, Samay-IGL: Top 5 news\n", + "Big B's savage reply to troll over cybercrime callertune\n", + "Anushka on keeping kids Vamika, Akaay away from public eye\n", + "Apoorva Mukhija recalls witnessing gender bias at home\n", + "Danish influencer seeks help to find papads from Big B\n", + "Sunjay Kapur's reception pics with Priya Sachdev goes viral\n", + "Big B schools trolls commenting 'buddha sathiya gaya hai'\n", + "Anushka on how she and Virat divide parenting duties\n", + "Brahmaji reacts to Vishnu's 7,000-acre land in New Zealand\n", + "Diljit says THIS amidst trolling for working with Hania\n", + "Riddhi found it ridiculous to like SRK's mother in Jawan\n", + "Priya Sachdev once called husband Sunjay Kapur ‘misunderstood’\n", + "Next\n", + "1\n", + "2\n", + "3\n", + "Hindi\n", + "See All\n", + "Sudhanshu: At 52, John, Dino all of them look like rockstars - EXCLUSIVE\n", + "Sudhanshu Pandey, recognized from 'Band Of Boys' and 'Anupama', defies his 50 years with his fitness. He credits his peers like Dino Moria, Arjun Rampal, and John Abraham for inspiring him to maintain a fit and youthful appearance. Pandey also admires Anil Kapoor's energy and dedication, motivating him to continue prioritizing fitness and inspiring others.\n", + "Sonakshi breaks silence on her rift with Luv and Kussh\n", + "Madhuri once chased Aamir with hockey stick for THIS reason\n", + "Ranbir-Raj Kapoor, Diljit-Hania, Samay-IGL: Top 5 news\n", + "Anushka on keeping kids Vamika, Akaay away from public eye\n", + "Anushka Sharma and Virat Kohli are committed to shielding their children, Vamika and Akaay, from the constant glare of public attention. In a recent interview, Anushka emphasized the couple's focus on instilling strong values and ensuring a normal upbringing for their kids.\n", + "Apoorva Mukhija recalls witnessing gender bias at home\n", + "Regional\n", + "When Samantha’s class 10 mark sheet got leaked\n", + "Throwback to when a nostalgic memory made its way across the internet — Samantha Ruth Prabhu’s Class 10 mark sheet! The actress’s charming on-screen presence and grounded personality were once again in the spotlight as her old school report card began doing the rounds on social media.\n", + "Actor Tushar Ghadigaonkar passes away at 34\n", + "‘Kuberaa’ Twitter review: Netizens calls it a ‘Blockbuster’\n", + "Mammootty’s health- Brittas says actor doing well\n", + "Kavya Madhavan’s father P. Madhavan passes away\n", + "‘The Raja Saab’ teaser: Prabhas shines in this horror comedy\n", + "Mammootty’s father-in-law P S Abu passes away\n", + "Videos\n", + "See All\n", + "Previous\n", + "03:07\n", + "Ananya Panday’s Garden Bond With Parrots Wins Hearts\n", + "88 views | 2 hours ago\n", + "03:14\n", + "Sameera Reddy’s Healing Journey Through Yoga\n", + "31 views | 2 hours ago\n", + "03:13\n", + "Kriti Kharbanda’s Modern Maharani Look Stuns Instagram\n", + "26 views | 2 hours ago\n", + "03:12\n", + "Bobby Deol Meets Diljit Dosanjh: Punjabi Power Goes Viral\n", + "81 views | 2 hours ago\n", + "03:19\n", + "‘Sitaare Zameen Par’: Riteish Deshmukh’s Emotional Shoutout For Genelia’s Big Win\n", + "162 views | 2 hours ago\n", + "03:26\n", + "Varun Dhawan Stuns With 50 Push-Ups Alongside Army Cadets on Border 2 Set\n", + "21 views | 2 hours ago\n", + "03:00\n", + "VIDYA BALAN TURNS HEADS WITH CASUAL AIRPORT LOOK\n", + "16 views | 2 hours ago\n", + "03:05\n", + "MANDHIRA KAPUR BREAKS DOWN IN EMOTIONAL POST FOR LATE BROTHER SUNJAY KAPUR\n", + "1.2K views | 2 hours ago\n", + "03:28\n", + "SALMAN KHAN TAKES A BRUTAL DIG AT SOHAIL’S DIVORCE ON NATIONAL TV\n", + "185 views | 2 hours ago\n", + "03:15\n", + "RAJINIKANTH CAUSES FAN RIOT DURING ‘JAILER 2’ SHOOT IN MYSORE\n", + "26 views | 2 hours ago\n", + "03:10\n", + "IBRAHIM ALI KHAN KISSES HIS DOG AT AIRPORT IN HEARTWARMING FAREWELL\n", + "20 views | 3 hours ago\n", + "03:09\n", + "ANUPAMAA SET GUTTED IN MASSIVE FIRE | CREW ESCAPES, CINE BODY DEMANDS ACTION\n", + "1.2K views | 3 hours ago\n", + "Next\n", + "1\n", + "2\n", + "3\n", + "4\n", + "5\n", + "6\n", + "7\n", + "8\n", + "9\n", + "10\n", + "11\n", + "World\n", + "See All\n", + "Aamir to Tom: Celebs on a mission to 'Save Cinema'\n", + "'How to Train Your Dragon' beats '28 Years Later' and 'Elio' to top the US box office on second weekend\n", + "Blake Lively is heartbroken after friendship ends with Taylor Swift; accepts the music mogul won't be returning - Deets inside\n", + "Selena-Hailey UNFOLLOW each other amid Bieber drama\n", + "Judge gives Baldoni access to Blake-Taylor messages\n", + "Trending Now\n", + "# Sidharth Malhotra-Kiara Advani\n", + "# AbRam Khan-Taimur Ali Khan\n", + "# Janhvi Kapoor\n", + "# Salman Khan\n", + "# Hema Malini\n", + "# Salman Khan\n", + "# Gauri Khan\n", + "# Shah Rukh Khan\n", + "# Chahatt Khanna\n", + "Visual Stories\n", + "See All\n", + "Previous\n", + "Kuberaa’s Sameera to Pushpa’s Srivalli: Rashmika Mandanna’s most iconic on-screen avatars\n", + "Ahaana Krishna’s ethereal photo series is straight out of a dream\n", + "Rashmika Mandanna to Rakul Preet Singh: Best pictures of the week featuring south actresses\n", + "Gauri Khan's most loved saree looks - An ode to modern day elegance\n", + "​South Indian beauties whose smiles will light up your Monday\n", + "Karishma Tanna Slays Every Frame\n", + "Tamannaah Bhatia’s traditional looks\n", + "Malavika Mohanan's radiant pics\n", + "​Neha Shetty stuns in every shade of blue\n", + "Thalapathy Vijay’s top 10 blockbuster movies worth re-watching!\n", + "​In pic: Mesmerizing looks of Shruti Haasan​\n", + "Dushara Vijayan’s Most Elegant Fashion Moments\n", + "Next\n", + "1\n", + "2\n", + "3\n", + "More Stories\n", + "Sonakshi Sinha breaks silence on her rumoured rift with brothers Luv and Kussh Sinha: 'My effort is always to support them...'\n", + "Madhuri Dixit once chased Aamir Khan with a hockey stick for THIS reason on sets of Dil: 'People fool you and you believe them'\n", + "Mohanlal declines to continue as president at AMMA’s general body meeting- Deets Inside\n", + "Blockbusters Ranbir Kapoor turned down: Films that became hits without him\n", + "Anushka Sharma reveals why she and Virat Kohli are keeping their children Vamika and Akaay away from the public eye: 'We don't want to raise brats'\n", + "Apoorva Mukhija recalls witnessing gender bias at home: 'My mother did it all, but father got credit for showing up at PTMs'\n", + "Amitabh Bachchan gives a savage reply to a troll over his viral cybercrime caller tune: 'Sarkar ko bolo bhai..'\n", + "Danish influencer asks fans to help her find papads from Amitabh Bachchan; netizens say 'he also used to grow basmati rice'\n", + "Days after his untimely demise, Sunjay Kapur's reception photos with Priya Sachdev goes viral; Looked dashing in hand embroidered shoes, written 'I do'\n", + "Priyanka Chopra Jonas recollects walking into a trap set by John Cena, Idris Elba on sets of 'Heads of State'\n", + "Bobby Deol's London vacation sparks fan frenzy: viral video shows actor posing for selfies outside restaurant\n", + "Amitabh Bachchah gives befitting replies to 'buddha sathiya gaya hai', ‘ganja’ comments by trolls: 'Ek din, Bhagwan naa kare voh din jaldi aaye...'\n", + "Sai Pallavi’s best performances\n", + "Brahmaji clears the air about Vishnu Manchu purchasing 7,000-acre land in New Zealand: 'I was pulling their leg as usual...'\n", + "Anushka Sharma reveals how she and Virat Kohli divide the parenting duties: 'I will be the primary caregiver, he plays round the year'\n", + "Ranbir Kapoor's 'Awara' look sparks rumours of Raj Kapoor tribute, Diljit Dosanjh slammed for working with Hania Aamir in Sardaar Ji 3: Top 5 news\n", + "Has Kiara Advani been approached to play Meena Kumari in her biopic? Here's what we know\n", + "Top 5 psychological Anime every thriller fan must watch\n", + "Load More Stories\n", + "# Latest Movies 2025\n", + "# Best Bollywood Movies 2025\n", + "# Hollywood Movie 2025\n", + "# Tamil Movies 2025\n", + "# Telugu Movies 2025\n", + "# Malayalam Movies 2025\n", + "# Kannada Movies 2025\n", + "# Marathi Movies 2025\n", + "# Bengali Movies 2025\n", + "# Top Rated Movies 2025\n", + "# Best Hindi Movies\n", + "# Best English Movies\n", + "Hot on the Web\n", + "Salman Khan\n", + "Karisma Kapoor\n", + "Jaideep Ahlawat\n", + "Blood Pressure\n", + "Big Cat Species\n", + "Trisha\n", + "Sitaare Zameen Par Review\n", + "Ancient Indigenous Tribes\n", + "Hair Growth Tips\n", + "Kidney Health\n", + "Kuberaa Review\n", + "Blake Lively\n", + "Reverse Fatty Liver\n", + "Skincare Hacks\n", + "Kuberaa Box Office Collection\n", + "Sitaare Zameen Par Box Office Collection\n", + "Baby Girl Names\n", + "Diljit Dosanjh\n", + "Kidney Disease Symptoms\n", + "Javed Akhtar\n", + "Heart Attack\n", + "Ram Kapoor Diet\n", + "Liver Damage\n", + "Kuberaa Movie Review\n", + "Gauri Khan\n", + "Baba Vanga Prediction\n", + "Baby Boy Names\n", + "Navjot Singh Sidhu\n", + "Housefull 5 Box Office Collection\n", + "DNA Movie Review\n", + "Kidney Damage Symptoms\n", + "Popular Waterfalls In India\n", + "Linkedin Ceo On AI Killing Jobs\n", + "Tesla Robotaxi\n", + "Early Cancer Detection\n", + "Harvard Research Reveals\n", + "American Destinations Explore Without Passport\n", + "Amouranth\n", + "Mouth Larvae\n", + "Doomsday Fish\n", + "Salman Khan AVM\n", + "Ginger Health Tips\n", + "Trending Topics\n", + "Latest Movies\n", + "Bollywood Movies\n", + "Hollywood Movies\n", + "Tamil Movies 2025\n", + "Telugu Movies 2025\n", + "Malayalam Movies 2025\n", + "Kannada Movies 2025\n", + "Marathi Movies 2025\n", + "Bengali Movies 2025\n", + "Top Rated Movies 2025\n", + "Best Hindi Movies\n", + "Best English Movies\n", + "Best Telugu Movies\n", + "Best Tamil Movies\n", + "Best Malayalam Movies\n", + "Best Kannada Movies\n", + "Best Bengali Movies\n", + "Upcoming Hindi Movies\n", + "Best Movies Of All Time\n", + "Best Hindi Movies of All Time\n", + "Latest English Movies\n", + "Latest Malayalam Movies\n", + "English TV News\n", + "Tamil TV News\n", + "Telugu TV News\n", + "Malayalam TV News\n", + "Kannada TV News\n", + "Movie Reviews\n", + "Bhojpuri Cinema News\n", + "Gujarati Cinema News\n", + "Popular Categories\n", + "Viral News\n", + "K Pop News\n", + "Web Series News\n", + "Anime News\n", + "Upcoming English Movies\n", + "Upcoming Tamil Movies\n", + "Upcoming Telugu Movies\n", + "Upcoming Malayalam Movies\n", + "Upcoming Kannada Movies\n", + "Fashion Tips\n", + "Travel News\n", + "Entertainment News\n", + "Bollywood News\n", + "Tollywood News\n", + "Kollywood News\n", + "Mollywood News\n", + "Food News\n", + "Latest Hindi Movies\n", + "Latest Tamil Movies\n", + "Parenting Tips\n", + "Home Remedies\n", + "Weight Loss\n", + "Beauty Tips\n", + "Parenting Tips\n", + "Hindi Videos\n", + "Hindi Video Songs\n", + "Bhojpuri Music Videos\n", + "Latest Telugu Movies\n", + "Bhojpuri Music Video\n", + "Hindi TV News\n", + "Latest News\n", + "NHL free agency turns spicy as Mitch Marner and Connor McDavid eye shorter deals to cash in later\n", + "Olive Ridley turtle washed ashore at Polem\n", + "Who is Thomas Fugate? Meet the 22-year-old leading Trump's terrorism unit amid Iran fiasco\n", + "'And that's why Putin's the boss': Trump rebukes former Russian President Medvedev; warns against treating 'N word casually'\n", + "Govt plans ₹10cr road on Bicholim-Dodamarg route\n", + "Former WWE star Batista eyed for Road House 2 sequel\n", + "Sonakshi Sinha breaks silence on her rumoured rift with brothers Luv and Kussh Sinha: 'My effort is always to support them...'\n", + "Andre Agassi and Steffi Graf’s son Jaden Agassi shows love for girlfriend Catherine Holt’s bold new photo from bedroom series\n", + "Is WWE planning to change Cody Rhodes’ iconic entrance theme song ‘Kingdom’?\n", + "Velumani says he didn’t attend RSS event in Coimbatore\n", + "Strait of Hormuz: Oil supply not an issue for India; 'pricing is a bigger concern,' what experts say\n", + "Madhuri Dixit once chased Aamir Khan with a hockey stick for THIS reason on sets of Dil: 'People fool you and you believe them'\n", + "As commissions fall, India’s ride-hailing firms test viability of flat-fee economics\n", + "Analysing what Trump’s strikes mean for Iran\n", + "Trump's clarification on 'Iran regime change' divides MAGA further: JD Vance, Hegseth, Marco Rubio 'humiliated'\n", + "Laughter Chefs 2: Krushna Abhishek roasts Rahul Vaidya for his in-famous feud with cricketer Virat Kohli\n", + "“I could have passed Dan Ticktum”: Edoardo Mortara regrets Attack Mode strategy at Jakarta E-Prix\n", + "India vs England Test: Sunil Gavaskar calls for Rishabh Pant's signature somersault celebration, wicketkeeper politely declines - WATCH\n", + "Copyright © 2025 Bennett, Coleman & Co. Ltd. All rights reserved. For reprint rights: Times Syndication Service\n", + "Follow us on\n", + "\n" + ] + } + ], + "source": [ + "print(user_prompt_for(gossip))" + ] + }, + { + "cell_type": "code", + "execution_count": 129, + "id": "c039ab7c-88ee-475d-a93e-b26711d3ed4b", + "metadata": {}, + "outputs": [], + "source": [ + "def messages_for(website):\n", + " return [\n", + " {\"role\": \"system\", \"content\": system_prompt},\n", + " {\"role\": \"user\", \"content\": user_prompt_for(website)}\n", + " ]" + ] + }, + { + "cell_type": "code", + "execution_count": 146, + "id": "dd1fee35-6cc9-4995-8b5e-b93d80488364", + "metadata": {}, + "outputs": [], + "source": [ + "def summarize(url):\n", + " website = Website(url)\n", + " response = openai.chat.completions.create(\n", + " model = \"llama3.2\",\n", + " messages = messages_for(website)\n", + " )\n", + " return response.choices[0].message.content" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ed09dad8-93bb-417e-b07b-183d2eba1ec5", + "metadata": {}, + "outputs": [], + "source": [ + "summarize(\"https://timesofindia.indiatimes.com/entertainment\")" + ] + }, + { + "cell_type": "code", + "execution_count": 139, + "id": "16a57eed-eba5-4f75-84f2-d44a67b36047", + "metadata": {}, + "outputs": [], + "source": [ + "def display_summary(url):\n", + " summary = summarize(url)\n", + " display(Markdown(summary))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "25af6217-6944-4c95-b156-0899dfcf0b83", + "metadata": {}, + "outputs": [], + "source": [ + "display_summary(\"https://timesofindia.indiatimes.com/entertainment\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "29daa2d4-9d92-40ae-a0c4-dd2fdacf3f80", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.4" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From b840ea6b58ba79dd2ab63fb2f76b14628fd94c59 Mon Sep 17 00:00:00 2001 From: Sabine Fonderson | CEO Date: Fri, 27 Jun 2025 16:46:48 +0200 Subject: [PATCH 12/25] Add files via upload Hi Ed, --- .../clinic_booking_bot.ipynb | 344 ++++++++++++++++++ 1 file changed, 344 insertions(+) create mode 100644 community-contributions/clinic_booking_bot.ipynb diff --git a/community-contributions/clinic_booking_bot.ipynb b/community-contributions/clinic_booking_bot.ipynb new file mode 100644 index 0000000..d2d8b57 --- /dev/null +++ b/community-contributions/clinic_booking_bot.ipynb @@ -0,0 +1,344 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 170, + "id": "a1aa1b43-7a47-4aca-ae5f-94a9d4ba2d89", + "metadata": {}, + "outputs": [], + "source": [ + "## Clinic Booking Bot\n", + "\n", + "##Easily book your clinic visit – available only on weekdays between **14:00 and 15:00**. \n", + "##Speak or type, and get instant confirmation.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 171, + "id": "fe798c6a-f8da-46aa-8c0e-9d2623def3d2", + "metadata": {}, + "outputs": [], + "source": [ + "# import library\n", + "\n", + "import os\n", + "import json\n", + "from dotenv import load_dotenv\n", + "from openai import OpenAI\n", + "import gradio as gr\n", + "import base64\n", + "from io import BytesIO\n", + "from datetime import date\n", + "from PIL import Image, ImageDraw, ImageFont\n" + ] + }, + { + "cell_type": "code", + "execution_count": 172, + "id": "0ad4e526-e95d-4e70-9faa-b4236b105dd5", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "OpenAI API Key exists and begins sk-proj-\n" + ] + } + ], + "source": [ + "# Save keys\n", + "\n", + "load_dotenv(override=True)\n", + "\n", + "openai_api_key = os.getenv('OPENAI_API_KEY')\n", + "if openai_api_key:\n", + " print(f\"OpenAI API Key exists and begins {openai_api_key[:8]}\")\n", + "else:\n", + " print(\"OpenAI API Key not set\")\n", + " \n", + "MODEL = \"gpt-4o-mini\"\n", + "openai = OpenAI()" + ] + }, + { + "cell_type": "code", + "execution_count": 173, + "id": "ae95308e-0002-4017-9f2c-fcb1ddb248fa", + "metadata": {}, + "outputs": [], + "source": [ + "# --- CONFIG ---\n", + "BOOKING_START = 14\n", + "BOOKING_END = 15\n", + "WEEKDAYS = [\"Monday\", \"Tuesday\", \"Wednesday\", \"Thursday\", \"Friday\"]\n", + "PHONE = \"010-1234567\"\n", + "confirmed_bookings = []\n" + ] + }, + { + "cell_type": "code", + "execution_count": 174, + "id": "e21b0fd0-4cda-4938-8867-dc2c6e7af4b1", + "metadata": {}, + "outputs": [], + "source": [ + "# --- TTS ---\n", + "def generate_tts(text, voice=\"fable\", filename=\"output.mp3\"):\n", + " response = openai.audio.speech.create(\n", + " model=\"tts-1\",\n", + " voice=\"fable\",\n", + " input=text\n", + " )\n", + " with open(filename, \"wb\") as f:\n", + " f.write(response.content)\n", + " return filename" + ] + }, + { + "cell_type": "code", + "execution_count": 175, + "id": "e28a5c3b-bd01-4845-a41e-87823f6bb078", + "metadata": {}, + "outputs": [], + "source": [ + "# --- Translate Booking Confirmation ---\n", + "def translate_text(text, target_language=\"nl\"):\n", + " prompt = f\"Translate this message to {target_language}:\\n{text}\"\n", + " response = openai.chat.completions.create(\n", + " model=\"gpt-4\",\n", + " messages=[\n", + " {\"role\": \"system\", \"content\": \"You are a helpful translator.\"},\n", + " {\"role\": \"user\", \"content\": prompt}\n", + " ]\n", + " )\n", + " return response.choices[0].message.content.strip()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 176, + "id": "8ed57cc9-7d54-4a5d-831b-0efcc5b7a7a9", + "metadata": {}, + "outputs": [], + "source": [ + "# --- Booking Logic ---\n", + "def book_appointment(name, time_str):\n", + " try:\n", + " booking_time = datetime.strptime(time_str, \"%H:%M\")\n", + " except ValueError:\n", + " return \"Invalid time format. Use HH:MM.\", None, None\n", + "\n", + " hour = booking_time.hour\n", + " weekday = datetime.today().strftime(\"%A\")\n", + "\n", + " if weekday not in WEEKDAYS:\n", + " response = \"Bookings are only available on weekdays.\"\n", + " elif BOOKING_START <= hour < BOOKING_END:\n", + " confirmation = f\"Booking confirmed for {name} at {time_str}.\"\n", + " confirmed_bookings.append((name, time_str))\n", + " translated = translate_text(confirmation)\n", + " audio = generate_tts(translated)\n", + " image = generate_booking_image(name, time_str)\n", + " return translated, audio, image\n", + " else:\n", + " response = \"Sorry, bookings are only accepted between 14:00 and 15:00 on weekdays.\"\n", + " translated = translate_text(response)\n", + " audio = generate_tts(translated)\n", + " return translated, audio, None" + ] + }, + { + "cell_type": "code", + "execution_count": 177, + "id": "19b52115-f0f3-4d63-a463-886163d4cfd1", + "metadata": {}, + "outputs": [], + "source": [ + "# --- Booking Card ---\n", + "def generate_booking_image(name, time_str):\n", + " img = Image.new(\"RGB\", (500, 250), color=\"white\")\n", + " draw = ImageDraw.Draw(img)\n", + " msg = f\"\\u2705 Booking Confirmed\\nName: {name}\\nTime: {time_str}\"\n", + " draw.text((50, 100), msg, fill=\"black\")\n", + " return img" + ] + }, + { + "cell_type": "code", + "execution_count": 178, + "id": "2c446b6c-d410-4ba1-b0c7-c475e5259ff5", + "metadata": {}, + "outputs": [], + "source": [ + "# --- Voice Booking ---\n", + "def voice_booking(audio_path, name):\n", + " with open(audio_path, \"rb\") as f:\n", + " response = openai.audio.transcriptions.create(model=\"whisper-1\", file=f)\n", + " transcription = response.text.strip()\n", + "\n", + " system_prompt = \"\"\"\n", + " You are a clinic assistant. Extract only the appointment time from the user's sentence in 24-hour HH:MM format.\n", + " If no time is mentioned, respond with 'No valid time found.'\n", + " \"\"\"\n", + "\n", + " response = openai.chat.completions.create(\n", + " model=\"gpt-4\",\n", + " messages=[\n", + " {\"role\": \"system\", \"content\": system_prompt},\n", + " {\"role\": \"user\", \"content\": transcription}\n", + " ]\n", + " )\n", + " extracted_time = response.choices[0].message.content.strip()\n", + "\n", + " if \":\" in extracted_time:\n", + " return book_appointment(name, extracted_time)\n", + " else:\n", + " message = \"Sorry, I couldn't understand the time. Please try again.\"\n", + " translated = translate_text(message)\n", + " audio_path = generate_tts(translated)\n", + " return translated, audio_path, None" + ] + }, + { + "cell_type": "code", + "execution_count": 179, + "id": "121d2907-7fa8-4248-b2e7-83617ea66ff0", + "metadata": {}, + "outputs": [], + "source": [ + "# --- Chat Bot Handler ---\n", + "def chat_bot(messages):\n", + " system_prompt = \"\"\"\n", + " You are a clinic booking assistant. Your job is to:\n", + " - Greet the patient and explain your role\n", + " - Only assist with making appointments\n", + " - Accept bookings only on weekdays between 14:00 and 15:00\n", + " - Do not provide medical advice\n", + " - Always respond with empathy and clarity\n", + " \"\"\"\n", + " response = openai.chat.completions.create(\n", + " model=\"gpt-4\",\n", + " messages=[{\"role\": \"system\", \"content\": system_prompt}] + messages\n", + " )\n", + " reply = response.choices[0].message.content.strip()\n", + " audio = generate_tts(reply)\n", + " return reply, audio" + ] + }, + { + "cell_type": "code", + "execution_count": 180, + "id": "2427b694-8c57-40cb-b202-4a8989547925", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "* Running on local URL: http://127.0.0.1:7898\n", + "* To create a public link, set `share=True` in `launch()`.\n" + ] + }, + { + "data": { + "text/html": [ + "
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Gradio interface\n", + "with gr.Blocks(theme=gr.themes.Soft()) as demo:\n", + " gr.Markdown(\"\"\"## 🩺 GP Booking Assistant \n", + "Only available weekdays between **14:00 and 15:00** \n", + "☎️ Contact: {PHONE}\n", + "---\"\"\")\n", + "\n", + " name_global = gr.Textbox(label=\"Your Name\", placeholder=\"Enter your name\", interactive=True)\n", + "\n", + " with gr.Tab(\"💬 Chat Mode\"):\n", + " chatbot = gr.Chatbot(label=\"Booking Chat\", type=\"messages\", height=400)\n", + " text_input = gr.Textbox(label=\"Type your message or use your voice below\")\n", + " audio_input = gr.Audio(type=\"filepath\", label=\"🎙️ Or speak your request\")\n", + " chat_audio_output = gr.Audio(label=\"🔊 Assistant's Reply\", type=\"filepath\")\n", + " send_btn = gr.Button(\"Send\")\n", + "\n", + " def handle_chat(user_message, chat_history):\n", + " chat_history = chat_history or []\n", + " chat_history.append({\"role\": \"user\", \"content\": user_message})\n", + " reply, audio = chat_bot(chat_history)\n", + " chat_history.append({\"role\": \"assistant\", \"content\": reply})\n", + " return chat_history, \"\", audio\n", + "\n", + " def handle_audio_chat(audio_path, chat_history):\n", + " with open(audio_path, \"rb\") as f:\n", + " transcription = openai.audio.transcriptions.create(model=\"whisper-1\", file=f).text.strip()\n", + " return handle_chat(transcription, chat_history)\n", + "\n", + " send_btn.click(handle_chat, [text_input, chatbot], [chatbot, text_input, chat_audio_output])\n", + " text_input.submit(handle_chat, [text_input, chatbot], [chatbot, text_input, chat_audio_output])\n", + " audio_input.change(handle_audio_chat, [audio_input, chatbot], [chatbot, text_input, chat_audio_output])\n", + "\n", + "\n", + " \n", + " with gr.Tab(\"📝 Text Booking\"):\n", + " time_text = gr.Textbox(label=\"Preferred Time (HH:MM)\", placeholder=\"e.g., 14:30\")\n", + " btn_text = gr.Button(\"📅 Book via Text\")\n", + "\n", + " with gr.Tab(\"🎙️ Voice Booking\"):\n", + " voice_input = gr.Audio(type=\"filepath\", label=\"Say your preferred time\")\n", + " btn_voice = gr.Button(\"📅 Book via Voice\")\n", + "\n", + " output_text = gr.Textbox(label=\"Response\", interactive=False)\n", + " output_audio = gr.Audio(label=\"Audio Reply\", type=\"filepath\")\n", + " output_image = gr.Image(label=\"Booking Confirmation\")\n", + "\n", + " btn_text.click(fn=book_appointment, inputs=[name_global, time_text], outputs=[output_text, output_audio, output_image])\n", + " btn_voice.click(fn=voice_booking, inputs=[voice_input, name_global], outputs=[output_text, output_audio, output_image])\n", + "\n", + " gr.Markdown(\"\"\"---\n", + "This assistant does **not** give medical advice. It only books appointments within allowed hours.\n", + "\"\"\")\n", + "\n", + " demo.launch()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f359de0a-28b1-4895-b21d-91d79e494a0d", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From 966a5f6c385a6d235d2e79bb2950a1943a66bc24 Mon Sep 17 00:00:00 2001 From: Sabine Fonderson | CEO Date: Fri, 27 Jun 2025 16:50:39 +0200 Subject: [PATCH 13/25] Add files via upload MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Hi Ed, This is a Gradio-based booking assistant bot that: - Accepts text and voice inputs - Uses OpenAI for transcription and chat - Restricts booking times to weekdays 14:00–15:00 - Responds with translated audio confirmations Looking forward to your feedback! Sabine --- .../clinic_booking_bot.ipynb | 344 ++++++++++++++++++ 1 file changed, 344 insertions(+) create mode 100644 week2/community-contributions/clinic_booking_bot.ipynb diff --git a/week2/community-contributions/clinic_booking_bot.ipynb b/week2/community-contributions/clinic_booking_bot.ipynb new file mode 100644 index 0000000..d2d8b57 --- /dev/null +++ b/week2/community-contributions/clinic_booking_bot.ipynb @@ -0,0 +1,344 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 170, + "id": "a1aa1b43-7a47-4aca-ae5f-94a9d4ba2d89", + "metadata": {}, + "outputs": [], + "source": [ + "## Clinic Booking Bot\n", + "\n", + "##Easily book your clinic visit – available only on weekdays between **14:00 and 15:00**. \n", + "##Speak or type, and get instant confirmation.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 171, + "id": "fe798c6a-f8da-46aa-8c0e-9d2623def3d2", + "metadata": {}, + "outputs": [], + "source": [ + "# import library\n", + "\n", + "import os\n", + "import json\n", + "from dotenv import load_dotenv\n", + "from openai import OpenAI\n", + "import gradio as gr\n", + "import base64\n", + "from io import BytesIO\n", + "from datetime import date\n", + "from PIL import Image, ImageDraw, ImageFont\n" + ] + }, + { + "cell_type": "code", + "execution_count": 172, + "id": "0ad4e526-e95d-4e70-9faa-b4236b105dd5", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "OpenAI API Key exists and begins sk-proj-\n" + ] + } + ], + "source": [ + "# Save keys\n", + "\n", + "load_dotenv(override=True)\n", + "\n", + "openai_api_key = os.getenv('OPENAI_API_KEY')\n", + "if openai_api_key:\n", + " print(f\"OpenAI API Key exists and begins {openai_api_key[:8]}\")\n", + "else:\n", + " print(\"OpenAI API Key not set\")\n", + " \n", + "MODEL = \"gpt-4o-mini\"\n", + "openai = OpenAI()" + ] + }, + { + "cell_type": "code", + "execution_count": 173, + "id": "ae95308e-0002-4017-9f2c-fcb1ddb248fa", + "metadata": {}, + "outputs": [], + "source": [ + "# --- CONFIG ---\n", + "BOOKING_START = 14\n", + "BOOKING_END = 15\n", + "WEEKDAYS = [\"Monday\", \"Tuesday\", \"Wednesday\", \"Thursday\", \"Friday\"]\n", + "PHONE = \"010-1234567\"\n", + "confirmed_bookings = []\n" + ] + }, + { + "cell_type": "code", + "execution_count": 174, + "id": "e21b0fd0-4cda-4938-8867-dc2c6e7af4b1", + "metadata": {}, + "outputs": [], + "source": [ + "# --- TTS ---\n", + "def generate_tts(text, voice=\"fable\", filename=\"output.mp3\"):\n", + " response = openai.audio.speech.create(\n", + " model=\"tts-1\",\n", + " voice=\"fable\",\n", + " input=text\n", + " )\n", + " with open(filename, \"wb\") as f:\n", + " f.write(response.content)\n", + " return filename" + ] + }, + { + "cell_type": "code", + "execution_count": 175, + "id": "e28a5c3b-bd01-4845-a41e-87823f6bb078", + "metadata": {}, + "outputs": [], + "source": [ + "# --- Translate Booking Confirmation ---\n", + "def translate_text(text, target_language=\"nl\"):\n", + " prompt = f\"Translate this message to {target_language}:\\n{text}\"\n", + " response = openai.chat.completions.create(\n", + " model=\"gpt-4\",\n", + " messages=[\n", + " {\"role\": \"system\", \"content\": \"You are a helpful translator.\"},\n", + " {\"role\": \"user\", \"content\": prompt}\n", + " ]\n", + " )\n", + " return response.choices[0].message.content.strip()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 176, + "id": "8ed57cc9-7d54-4a5d-831b-0efcc5b7a7a9", + "metadata": {}, + "outputs": [], + "source": [ + "# --- Booking Logic ---\n", + "def book_appointment(name, time_str):\n", + " try:\n", + " booking_time = datetime.strptime(time_str, \"%H:%M\")\n", + " except ValueError:\n", + " return \"Invalid time format. Use HH:MM.\", None, None\n", + "\n", + " hour = booking_time.hour\n", + " weekday = datetime.today().strftime(\"%A\")\n", + "\n", + " if weekday not in WEEKDAYS:\n", + " response = \"Bookings are only available on weekdays.\"\n", + " elif BOOKING_START <= hour < BOOKING_END:\n", + " confirmation = f\"Booking confirmed for {name} at {time_str}.\"\n", + " confirmed_bookings.append((name, time_str))\n", + " translated = translate_text(confirmation)\n", + " audio = generate_tts(translated)\n", + " image = generate_booking_image(name, time_str)\n", + " return translated, audio, image\n", + " else:\n", + " response = \"Sorry, bookings are only accepted between 14:00 and 15:00 on weekdays.\"\n", + " translated = translate_text(response)\n", + " audio = generate_tts(translated)\n", + " return translated, audio, None" + ] + }, + { + "cell_type": "code", + "execution_count": 177, + "id": "19b52115-f0f3-4d63-a463-886163d4cfd1", + "metadata": {}, + "outputs": [], + "source": [ + "# --- Booking Card ---\n", + "def generate_booking_image(name, time_str):\n", + " img = Image.new(\"RGB\", (500, 250), color=\"white\")\n", + " draw = ImageDraw.Draw(img)\n", + " msg = f\"\\u2705 Booking Confirmed\\nName: {name}\\nTime: {time_str}\"\n", + " draw.text((50, 100), msg, fill=\"black\")\n", + " return img" + ] + }, + { + "cell_type": "code", + "execution_count": 178, + "id": "2c446b6c-d410-4ba1-b0c7-c475e5259ff5", + "metadata": {}, + "outputs": [], + "source": [ + "# --- Voice Booking ---\n", + "def voice_booking(audio_path, name):\n", + " with open(audio_path, \"rb\") as f:\n", + " response = openai.audio.transcriptions.create(model=\"whisper-1\", file=f)\n", + " transcription = response.text.strip()\n", + "\n", + " system_prompt = \"\"\"\n", + " You are a clinic assistant. Extract only the appointment time from the user's sentence in 24-hour HH:MM format.\n", + " If no time is mentioned, respond with 'No valid time found.'\n", + " \"\"\"\n", + "\n", + " response = openai.chat.completions.create(\n", + " model=\"gpt-4\",\n", + " messages=[\n", + " {\"role\": \"system\", \"content\": system_prompt},\n", + " {\"role\": \"user\", \"content\": transcription}\n", + " ]\n", + " )\n", + " extracted_time = response.choices[0].message.content.strip()\n", + "\n", + " if \":\" in extracted_time:\n", + " return book_appointment(name, extracted_time)\n", + " else:\n", + " message = \"Sorry, I couldn't understand the time. Please try again.\"\n", + " translated = translate_text(message)\n", + " audio_path = generate_tts(translated)\n", + " return translated, audio_path, None" + ] + }, + { + "cell_type": "code", + "execution_count": 179, + "id": "121d2907-7fa8-4248-b2e7-83617ea66ff0", + "metadata": {}, + "outputs": [], + "source": [ + "# --- Chat Bot Handler ---\n", + "def chat_bot(messages):\n", + " system_prompt = \"\"\"\n", + " You are a clinic booking assistant. Your job is to:\n", + " - Greet the patient and explain your role\n", + " - Only assist with making appointments\n", + " - Accept bookings only on weekdays between 14:00 and 15:00\n", + " - Do not provide medical advice\n", + " - Always respond with empathy and clarity\n", + " \"\"\"\n", + " response = openai.chat.completions.create(\n", + " model=\"gpt-4\",\n", + " messages=[{\"role\": \"system\", \"content\": system_prompt}] + messages\n", + " )\n", + " reply = response.choices[0].message.content.strip()\n", + " audio = generate_tts(reply)\n", + " return reply, audio" + ] + }, + { + "cell_type": "code", + "execution_count": 180, + "id": "2427b694-8c57-40cb-b202-4a8989547925", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "* Running on local URL: http://127.0.0.1:7898\n", + "* To create a public link, set `share=True` in `launch()`.\n" + ] + }, + { + "data": { + "text/html": [ + "
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Gradio interface\n", + "with gr.Blocks(theme=gr.themes.Soft()) as demo:\n", + " gr.Markdown(\"\"\"## 🩺 GP Booking Assistant \n", + "Only available weekdays between **14:00 and 15:00** \n", + "☎️ Contact: {PHONE}\n", + "---\"\"\")\n", + "\n", + " name_global = gr.Textbox(label=\"Your Name\", placeholder=\"Enter your name\", interactive=True)\n", + "\n", + " with gr.Tab(\"💬 Chat Mode\"):\n", + " chatbot = gr.Chatbot(label=\"Booking Chat\", type=\"messages\", height=400)\n", + " text_input = gr.Textbox(label=\"Type your message or use your voice below\")\n", + " audio_input = gr.Audio(type=\"filepath\", label=\"🎙️ Or speak your request\")\n", + " chat_audio_output = gr.Audio(label=\"🔊 Assistant's Reply\", type=\"filepath\")\n", + " send_btn = gr.Button(\"Send\")\n", + "\n", + " def handle_chat(user_message, chat_history):\n", + " chat_history = chat_history or []\n", + " chat_history.append({\"role\": \"user\", \"content\": user_message})\n", + " reply, audio = chat_bot(chat_history)\n", + " chat_history.append({\"role\": \"assistant\", \"content\": reply})\n", + " return chat_history, \"\", audio\n", + "\n", + " def handle_audio_chat(audio_path, chat_history):\n", + " with open(audio_path, \"rb\") as f:\n", + " transcription = openai.audio.transcriptions.create(model=\"whisper-1\", file=f).text.strip()\n", + " return handle_chat(transcription, chat_history)\n", + "\n", + " send_btn.click(handle_chat, [text_input, chatbot], [chatbot, text_input, chat_audio_output])\n", + " text_input.submit(handle_chat, [text_input, chatbot], [chatbot, text_input, chat_audio_output])\n", + " audio_input.change(handle_audio_chat, [audio_input, chatbot], [chatbot, text_input, chat_audio_output])\n", + "\n", + "\n", + " \n", + " with gr.Tab(\"📝 Text Booking\"):\n", + " time_text = gr.Textbox(label=\"Preferred Time (HH:MM)\", placeholder=\"e.g., 14:30\")\n", + " btn_text = gr.Button(\"📅 Book via Text\")\n", + "\n", + " with gr.Tab(\"🎙️ Voice Booking\"):\n", + " voice_input = gr.Audio(type=\"filepath\", label=\"Say your preferred time\")\n", + " btn_voice = gr.Button(\"📅 Book via Voice\")\n", + "\n", + " output_text = gr.Textbox(label=\"Response\", interactive=False)\n", + " output_audio = gr.Audio(label=\"Audio Reply\", type=\"filepath\")\n", + " output_image = gr.Image(label=\"Booking Confirmation\")\n", + "\n", + " btn_text.click(fn=book_appointment, inputs=[name_global, time_text], outputs=[output_text, output_audio, output_image])\n", + " btn_voice.click(fn=voice_booking, inputs=[voice_input, name_global], outputs=[output_text, output_audio, output_image])\n", + "\n", + " gr.Markdown(\"\"\"---\n", + "This assistant does **not** give medical advice. It only books appointments within allowed hours.\n", + "\"\"\")\n", + "\n", + " demo.launch()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f359de0a-28b1-4895-b21d-91d79e494a0d", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From ccfab49b8c9bcf7ed29f40c6b848e5821e0b766d Mon Sep 17 00:00:00 2001 From: Kunmeer-SyedMohamedHyder Date: Tue, 1 Jul 2025 19:47:05 +0530 Subject: [PATCH 14/25] Week 2 Exercise of FlightAI ChatBot --- week2/week2 EXERCISE.ipynb | 605 ++++++++++++++++++++++++++++++++++++- 1 file changed, 604 insertions(+), 1 deletion(-) diff --git a/week2/week2 EXERCISE.ipynb b/week2/week2 EXERCISE.ipynb index d97f5cb..f6c96ca 100644 --- a/week2/week2 EXERCISE.ipynb +++ b/week2/week2 EXERCISE.ipynb @@ -24,6 +24,609 @@ "id": "a07e7793-b8f5-44f4-aded-5562f633271a", "metadata": {}, "outputs": [], + "source": [ + "# Imports\n", + "\n", + "import os\n", + "import json\n", + "import base64\n", + "import logging\n", + "import gradio as gr\n", + "from PIL import Image\n", + "from io import BytesIO\n", + "from openai import OpenAI\n", + "from dotenv import load_dotenv\n", + "from IPython.display import Audio, display" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e879f6ae-b246-479d-8f81-94e47a9072ec", + "metadata": {}, + "outputs": [], + "source": [ + "# Initialization\n", + "logging.basicConfig(level=logging.INFO)\n", + "load_dotenv(override=True)\n", + "\n", + "openai_api_key = os.getenv('OPENAI_API_KEY')\n", + "if openai_api_key:\n", + " logging.info(f\"OpenAI API Key exists and begins {openai_api_key[:8]}\")\n", + "else:\n", + " logging.error(\"OpenAI API Key not set\")\n", + " \n", + "MODEL = \"gpt-4o-mini\"\n", + "openai = OpenAI()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d4455169-9e5e-4171-92e8-6f850a06f6e3", + "metadata": {}, + "outputs": [], + "source": [ + "system_message = (\n", + " \"You are a helpful assistant for an airline called FlightAI. \"\n", + " \"Always respond in a short, courteous sentence. \"\n", + " \"Provide accurate information only. \"\n", + " \"If you don’t know something, say so clearly. \"\n", + " \"Before booking a ticket, strictly follow this order: \"\n", + " \"1) Check if the destination is available, \"\n", + " \"2) Then check the ticket price, \"\n", + " \"3) Collect all neccessary details like name, destination and date of journey, \"\n", + " \"4) Only then proceed with the booking. \"\n", + " \"Always use the appropriate tools or APIs for each step before confirming a booking.\"\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4bab8e2c-e2b1-4421-a95b-7f1251670817", + "metadata": {}, + "outputs": [], + "source": [ + "# Dummy funcs that mimic the ticket booking behaviour\n", + "# Replace these will real funcs (that call APIs or make DB transactions) to actually book a ticket\n", + "\n", + "ticket_prices = {\n", + " \"london\": \"$799\",\n", + " \"paris\": \"$899\",\n", + " \"tokyo\": \"$1400\",\n", + " \"berlin\": \"$499\"\n", + "}\n", + "\n", + "def check_destination_availability(destination: str) -> dict:\n", + " \"\"\"\n", + " Check if the given destination is available in our ticketing system.\n", + " \n", + " Args:\n", + " destination (str): The name of the city.\n", + " \n", + " Returns:\n", + " dict: {\"available\": bool}\n", + " \"\"\"\n", + " logging.info(f\"Checking availability for destination: {destination}\")\n", + " \n", + " available = destination.lower() in ticket_prices\n", + " return {\"available\": available}\n", + "\n", + "\n", + "def fetch_ticket_price(destination_city: str) -> dict:\n", + " \"\"\"\n", + " Retrieve the ticket price for a given city.\n", + " \n", + " Args:\n", + " destination_city (str): The name of the destination city.\n", + " \n", + " Returns:\n", + " dict: {\"price\": str} or {\"price\": \"Unknown\"} if not found\n", + " \"\"\"\n", + " logging.info(f\"Retrieving price for destination: {destination_city}\")\n", + " \n", + " city = destination_city.lower()\n", + " price = ticket_prices.get(city, \"Unknown\")\n", + " \n", + " return {\"price\": price}\n", + "\n", + "\n", + "def book_ticket(name: str, destination_city: str, journey_date: str) -> dict:\n", + " \"\"\"\n", + " Book a ticket to a destination city for a given user and date.\n", + " \n", + " Args:\n", + " name (str): Name of the passenger.\n", + " destination_city (str): Destination city.\n", + " journey_date (str): Date of journey in YYYY-MM-DD format.\n", + " \n", + " Returns:\n", + " dict: Booking confirmation with name, city, price, and date, or error.\n", + " \"\"\"\n", + " logging.info(f\"Booking ticket for {name} to {destination_city} on {journey_date}\")\n", + " \n", + " city = destination_city.lower()\n", + "\n", + " if city not in ticket_prices:\n", + " logging.error(f\"City '{destination_city}' not found in ticket list.\")\n", + " return {\"error\": \"Destination not found.\"}\n", + "\n", + " price_info = fetch_ticket_price(destination_city)\n", + " \n", + " return {\n", + " \"name\": name,\n", + " \"destination_city\": destination_city.title(),\n", + " \"journey_date\": journey_date,\n", + " \"price\": price_info[\"price\"]\n", + " }\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "400f4592-2326-43f6-a921-fcd051c4f022", + "metadata": {}, + "outputs": [], + "source": [ + "destination_availability_tool = {\n", + " \"name\": \"check_destination_availability\",\n", + " \"description\": \"Check if tickets are available for the given destination city before proceeding with any booking or pricing inquiry.\",\n", + " \"parameters\": {\n", + " \"type\": \"object\",\n", + " \"properties\": {\n", + " \"destination\": {\n", + " \"type\": \"string\",\n", + " \"description\": \"The name of the destination city to check for availability.\"\n", + " }\n", + " },\n", + " \"required\": [\"destination\"],\n", + " \"additionalProperties\": False\n", + " }\n", + "}\n", + "\n", + "ticket_price_tool = {\n", + " \"name\": \"fetch_ticket_price\",\n", + " \"description\": (\n", + " \"Get the price of a return ticket to the specified destination city. \"\n", + " \"Use this after confirming that the destination is available, especially when the customer asks for the ticket price.\"\n", + " ),\n", + " \"parameters\": {\n", + " \"type\": \"object\",\n", + " \"properties\": {\n", + " \"destination_city\": {\n", + " \"type\": \"string\",\n", + " \"description\": \"The city for which the customer wants the ticket price.\"\n", + " }\n", + " },\n", + " \"required\": [\"destination_city\"],\n", + " \"additionalProperties\": False\n", + " }\n", + "}\n", + "\n", + "ticket_booking_tool = {\n", + " \"name\": \"book_ticket\",\n", + " \"description\": (\n", + " \"Book a ticket for the customer to the specified destination city on the given journey date. \"\n", + " \"Use only after availability and price have been checked.\"\n", + " ),\n", + " \"parameters\": {\n", + " \"type\": \"object\",\n", + " \"properties\": {\n", + " \"name\": {\n", + " \"type\": \"string\",\n", + " \"description\": \"Full name of the person booking the ticket.\"\n", + " },\n", + " \"destination_city\": {\n", + " \"type\": \"string\",\n", + " \"description\": \"The city that the customer wants to travel to.\"\n", + " },\n", + " \"journey_date\": {\n", + " \"type\": \"string\",\n", + " \"format\": \"date\",\n", + " \"description\": \"The journey date in YYYY-MM-DD format.\"\n", + " }\n", + " },\n", + " \"required\": [\"name\", \"destination_city\", \"journey_date\"],\n", + " \"additionalProperties\": False\n", + " }\n", + "}\n", + "\n", + "tools = [\n", + " {\"type\": \"function\", \"function\": destination_availability_tool},\n", + " {\"type\": \"function\", \"function\": ticket_price_tool},\n", + " {\"type\": \"function\", \"function\": ticket_booking_tool},\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f02c17ba-14f2-41c4-b6a2-d1397405d368", + "metadata": {}, + "outputs": [], + "source": [ + "def handle_tool_call(message):\n", + " \"\"\"\n", + " Handles a single OpenAI tool call message and returns both the result\n", + " and a formatted tool response dictionary.\n", + " \n", + " Args:\n", + " message (object): An OpenAI message containing a tool call.\n", + " \n", + " Returns:\n", + " tuple: (result_dict, response_dict)\n", + " \"\"\"\n", + " tool_call = message.tool_calls[0]\n", + " function_name = tool_call.function.name\n", + " arguments = json.loads(tool_call.function.arguments)\n", + "\n", + " result = None\n", + "\n", + " logging.info(f\"Tool call received: {function_name} with arguments: {arguments}\")\n", + "\n", + " if function_name == \"check_destination_availability\":\n", + " result = check_destination_availability(**arguments)\n", + "\n", + " elif function_name == \"fetch_ticket_price\":\n", + " city = arguments.get(\"destination_city\")\n", + " price_info = fetch_ticket_price(city)\n", + " result = {\"destination_city\": city, \"price\": price_info[\"price\"]}\n", + "\n", + " elif function_name == \"book_ticket\":\n", + " result = book_ticket(**arguments)\n", + "\n", + " else:\n", + " logging.warning(\"Unrecognized tool function: %s\", function_name)\n", + " result = {\"error\": f\"Unknown function '{function_name}'\"}\n", + "\n", + " response = {\n", + " \"role\": \"tool\",\n", + " \"tool_call_id\": tool_call.id,\n", + " \"content\": json.dumps(result)\n", + " }\n", + "\n", + " return result, response" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "72c1a9e7-186c-4218-9edc-01814baec431", + "metadata": {}, + "outputs": [], + "source": [ + "def artist(city: str, style: str = \"vibrant pop-art\", size: str = \"1024x1024\") -> Image.Image:\n", + " \"\"\"\n", + " Generates a city-themed vacation image using DALL·E.\n", + "\n", + " Args:\n", + " city (str): Name of the city to visualize.\n", + " style (str): Artistic style for the image prompt.\n", + " size (str): Image resolution (e.g., \"1024x1024\").\n", + "\n", + " Returns:\n", + " Image.Image: A PIL Image object representing the generated image.\n", + "\n", + " Raises:\n", + " ValueError: If city name is empty.\n", + " RuntimeError: If image generation fails.\n", + " \"\"\"\n", + " if not city.strip():\n", + " raise ValueError(\"City name cannot be empty.\")\n", + "\n", + " prompt = (\n", + " f\"An image representing a vacation in {city}, \"\n", + " f\"showing iconic tourist attractions, cultural elements, and everything unique about {city}, \"\n", + " f\"rendered in a {style} style.\"\n", + " )\n", + "\n", + " logging.info(\"Generating image for city: %s with style: %s\", city, style)\n", + "\n", + " try:\n", + " response = openai.images.generate(\n", + " model=\"dall-e-3\",\n", + " prompt=prompt,\n", + " size=size,\n", + " n=1,\n", + " response_format=\"b64_json\",\n", + " )\n", + "\n", + " image_base64 = response.data[0].b64_json\n", + " image_data = base64.b64decode(image_base64)\n", + " logging.info(\"Image generation successful for %s\", city)\n", + "\n", + " return Image.open(BytesIO(image_data))\n", + "\n", + " except Exception as e:\n", + " logging.error(\"Failed to generate image for city '%s': %s\", city, str(e))\n", + " raise RuntimeError(f\"Image generation failed for city '{city}'\") from e" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fdf7c091-6c68-4af6-8197-c1456b36cedf", + "metadata": {}, + "outputs": [], + "source": [ + "def talker(message: str, output_filename: str = \"output_audio.mp3\", autoplay: bool = True) -> None:\n", + " \"\"\"\n", + " Converts a text message into speech using OpenAI TTS and plays the audio.\n", + "\n", + " Args:\n", + " message (str): The text to convert to speech.\n", + " output_filename (str): The filename to save the generated audio.\n", + " autoplay (bool): Whether to autoplay the audio in the notebook.\n", + "\n", + " Raises:\n", + " ValueError: If the message is empty.\n", + " RuntimeError: If the audio generation fails.\n", + " \"\"\"\n", + " if not message.strip():\n", + " raise ValueError(\"Message cannot be empty.\")\n", + "\n", + " logging.info(\"Generating speech for message: %s\", message)\n", + "\n", + " try:\n", + " response = openai.audio.speech.create(\n", + " model=\"tts-1\",\n", + " voice=\"alloy\",\n", + " input=message\n", + " )\n", + "\n", + " with open(output_filename, \"wb\") as f:\n", + " f.write(response.content)\n", + "\n", + " logging.info(\"Audio written to: %s\", output_filename)\n", + "\n", + " if autoplay:\n", + " display(Audio(output_filename, autoplay=True))\n", + "\n", + " except Exception as e:\n", + " logging.error(\"Failed to generate or play audio: %s\", str(e))\n", + " raise RuntimeError(\"Text-to-speech generation failed.\") from e" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "54568b4a-be8d-47a1-b924-03acdafef70e", + "metadata": {}, + "outputs": [], + "source": [ + "def translate(message, language):\n", + " \"\"\"\n", + " Translates the given text into the specified language using OpenAI Chat API.\n", + "\n", + " Args:\n", + " message (str): The text to be translated.\n", + " language (str): Target language for translation (e.g., 'French', 'Japanese').\n", + "\n", + " Returns:\n", + " str: Translated text.\n", + "\n", + " Raises:\n", + " ValueError: If input message or language is empty.\n", + " RuntimeError: If translation fails due to API or other issues.\n", + " \"\"\"\n", + " if not message.strip():\n", + " raise ValueError(\"Input message cannot be empty.\")\n", + " if not language.strip():\n", + " raise ValueError(\"Target language cannot be empty.\")\n", + "\n", + " logging.info(\"Translating to %s: %s\", language, message)\n", + "\n", + " messages = [\n", + " {\"role\": \"system\", \"content\": f\"You are a translation assistant. Translate everything the user says to {language}.\"},\n", + " {\"role\": \"user\", \"content\": message}\n", + " ]\n", + "\n", + " try:\n", + " response = openai.chat.completions.create(\n", + " model=MODEL,\n", + " messages=messages\n", + " )\n", + " translated = response.choices[0].message.content.strip()\n", + " logging.info(\"Translation successful.\")\n", + " return translated\n", + "\n", + " except Exception as e:\n", + " logging.error(\"Translation failed: %s\", str(e))\n", + " raise RuntimeError(\"Failed to translate message.\") from e" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8e6cf470-8ea0-43b2-bbcc-53c2432feb0d", + "metadata": {}, + "outputs": [], + "source": [ + "def transcribe_audio(audio_path):\n", + " \"\"\"\n", + " Transcribes an audio file using OpenAI's Whisper model.\n", + "\n", + " Args:\n", + " audio_path (str): Path to the audio file (e.g., .mp3, .wav).\n", + " model (str): OpenAI model for transcription (default: 'whisper-1').\n", + "\n", + " Returns:\n", + " str: Transcribed text from the audio file.\n", + "\n", + " Raises:\n", + " ValueError: If the path is invalid or the file does not exist.\n", + " RuntimeError: If the transcription fails.\n", + " \"\"\"\n", + " if not audio_path or not os.path.exists(audio_path):\n", + " raise ValueError(\"Invalid or missing audio file path.\")\n", + "\n", + " logging.info(\"Transcribing audio file: %s using model: whisper-1\", audio_path)\n", + "\n", + " try:\n", + " with open(audio_path, \"rb\") as f:\n", + " response = openai.audio.transcriptions.create(\n", + " model=\"whisper-1\",\n", + " file=f\n", + " )\n", + " transcript = response.text.strip()\n", + " logging.info(\"Transcription successful.\")\n", + " return transcript\n", + "\n", + " except Exception as e:\n", + " logging.error(\"Transcription failed: %s\", str(e))\n", + " raise RuntimeError(\"Failed to transcribe audio.\") from e" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3489656e-0f08-4d41-94b1-d902c93ca164", + "metadata": {}, + "outputs": [], + "source": [ + "def chat(history: list, language: str, translated_history: list, speaking_language: str) -> tuple:\n", + " \"\"\"\n", + " Handles a chat interaction including tool calls, image generation, translation, and TTS playback.\n", + "\n", + " Args:\n", + " history (list): List of previous conversation messages.\n", + " language (str): Target language for translation and TTS.\n", + "\n", + " Returns:\n", + " tuple: (updated history list, generated image if any, translated response string)\n", + " \"\"\"\n", + " messages = [{\"role\": \"system\", \"content\": system_message}] + history\n", + " image = None\n", + "\n", + " try:\n", + " # Initial assistant response\n", + " response = openai.chat.completions.create(model=MODEL, messages=messages, tools=tools)\n", + " choice = response.choices[0]\n", + "\n", + " # Handle tool calls if triggered\n", + " if choice.finish_reason == \"tool_calls\":\n", + " message = choice.message\n", + " result, tool_response = handle_tool_call(message)\n", + "\n", + " # Append tool-related messages\n", + " messages.append(message)\n", + " messages.append(tool_response)\n", + " logging.info(\"Tool call result: %s\", result)\n", + "\n", + " # Generate image if a booking was completed\n", + " if message.tool_calls[0].function.name == \"book_ticket\" and \"destination_city\" in result:\n", + " image = artist(result[\"destination_city\"])\n", + "\n", + " # Get final assistant response after tool execution\n", + " response = openai.chat.completions.create(model=MODEL, messages=messages)\n", + " choice = response.choices[0]\n", + "\n", + " reply = choice.message.content.strip()\n", + " history.append({\"role\": \"assistant\", \"content\": reply})\n", + "\n", + " # Translate and speak the reply\n", + " translated_reply = translate(reply, language)\n", + " translated_history.append({\"role\": \"assistant\", \"content\": translated_reply})\n", + "\n", + " if speaking_language == \"English\":\n", + " talker(reply)\n", + " else:\n", + " talker(translated_reply)\n", + "\n", + " return history, image, translated_history\n", + "\n", + " except Exception as e:\n", + " logging.error(\"Chat processing failed: %s\", str(e))\n", + " raise RuntimeError(\"Failed to complete chat interaction.\") from e" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f76acc68-726e-457f-88ab-99da75debde5", + "metadata": {}, + "outputs": [], + "source": [ + "force_dark_mode = \"\"\"\n", + "function refresh() {\n", + " const url = new URL(window.location);\n", + " if (url.searchParams.get('__theme') !== 'dark') {\n", + " url.searchParams.set('__theme', 'dark');\n", + " window.location.href = url.href;\n", + " }\n", + "}\n", + "\"\"\"\n", + "\n", + "with gr.Blocks(js=force_dark_mode) as ui:\n", + " with gr.Row():\n", + " gr.Markdown(\"### FlightAI Chat with Translation\")\n", + "\n", + " with gr.Row():\n", + " lang_dropdown = gr.Dropdown(\n", + " choices=[\"Spanish\", \"French\", \"German\", \"Japanese\", \"Hindi\"],\n", + " value=\"Spanish\",\n", + " label=\"Translate To\"\n", + " )\n", + " \n", + " speak_dropdown = gr.Dropdown(\n", + " choices=[\"English\", \"Selected Language\"],\n", + " value=\"English\",\n", + " label=\"Speak out in\"\n", + " )\n", + " \n", + " with gr.Row():\n", + " chatbot = gr.Chatbot(height=500, type=\"messages\", label=\"Chat History\")\n", + " translated_chatbot = gr.Chatbot(height=500, type=\"messages\", label=\"Translated Chat\")\n", + " image_output = gr.Image(height=500)\n", + "\n", + " with gr.Row():\n", + " entry = gr.Textbox(label=\"Chat with our AI Assistant:\")\n", + " audio_input = gr.Audio(sources=\"microphone\", type=\"filepath\", label=\"Or speak to the assistant\")\n", + "\n", + " with gr.Row():\n", + " clear = gr.Button(\"Clear\")\n", + "\n", + " def do_entry(message, history, audio, translated_history, language):\n", + " if audio:\n", + " message = transcribe_audio(audio)\n", + "\n", + " if message:\n", + " history += [{\"role\": \"user\", \"content\": message}]\n", + " translated_history += [{\"role\": \"user\", \"content\": translate(message, language)}]\n", + " return \"\", history, None, translated_history\n", + "\n", + " entry.submit(\n", + " do_entry,\n", + " inputs=[entry, chatbot, audio_input, translated_chatbot, lang_dropdown],\n", + " outputs=[entry, chatbot, audio_input, translated_chatbot]\n", + " ).then(\n", + " chat,\n", + " inputs=[chatbot, lang_dropdown, translated_chatbot, speak_dropdown],\n", + " outputs=[chatbot, image_output, translated_chatbot]\n", + " )\n", + "\n", + " audio_input.change(\n", + " do_entry,\n", + " inputs=[entry, chatbot, audio_input, translated_chatbot, lang_dropdown],\n", + " outputs=[entry, chatbot, audio_input, translated_chatbot]\n", + " ).then(\n", + " chat,\n", + " inputs=[chatbot, lang_dropdown, translated_chatbot, speak_dropdown],\n", + " outputs=[chatbot, image_output, translated_chatbot]\n", + " )\n", + "\n", + " clear.click(lambda: [\"\", [], None, [], None], inputs=None, outputs=[entry, chatbot, audio_input, translated_chatbot, image_output], queue=False)\n", + "\n", + "ui.launch(inbrowser=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "58f97435-fa0d-45f7-b02f-4ac5f4901c53", + "metadata": {}, + "outputs": [], "source": [] } ], @@ -43,7 +646,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.11" + "version": "3.10.6" } }, "nbformat": 4, From 7e3ddf460da5fc3e42c5461deb66a096c240cd95 Mon Sep 17 00:00:00 2001 From: Mike Date: Sun, 6 Jul 2025 17:59:51 +0100 Subject: [PATCH 15/25] Added a multimodal chatbot interface project input to community contributions --- .../multi-agent_gui_with_gradio/README.md | 25 ++ .../agentic_voice_text_support.ipynb | 395 ++++++++++++++++++ 2 files changed, 420 insertions(+) create mode 100644 community-contributions/multi-agent_gui_with_gradio/README.md create mode 100644 community-contributions/multi-agent_gui_with_gradio/agentic_voice_text_support.ipynb diff --git a/community-contributions/multi-agent_gui_with_gradio/README.md b/community-contributions/multi-agent_gui_with_gradio/README.md new file mode 100644 index 0000000..3c80ace --- /dev/null +++ b/community-contributions/multi-agent_gui_with_gradio/README.md @@ -0,0 +1,25 @@ +# 🧠 Agentic Voice/Text Support Chatbot + +A multimodal chatbot interface with support for **text and voice input**, **multiple large language models (LLMs)**, and **context memory persistence** — all in a single Gradio-based GUI. + +## 🚀 Features + +- 🔄 **Multi-LLM switching**: Dynamically switch between OpenAI, Anthropic Claude, and Meta LLaMA (via Ollama) +- 🎤 **Voice input**: Use your microphone with live speech-to-text transcription +- 💬 **Contextual memory**: Maintain chat history even when switching models +- 🧪 **Prototype-ready**: Built with Gradio for rapid GUI testing and development + +## 🛠️ Technologies Used + +- [Gradio](https://www.gradio.app/) – GUI interface +- [OpenAI API](https://platform.openai.com/) +- [Anthropic Claude API](https://www.anthropic.com/) +- [Ollama](https://ollama.com/) – Local LLaMA inference +- [`speech_recognition`](https://pypi.org/project/SpeechRecognition/) – Voice-to-text +- `sounddevice`, `numpy` – Audio recording +- `.env` – Environment variable management + +## You’ll also need: +- API keys for OpenAI and Claude +- Ollama installed locally to run LLaMA models +- A .env file with the necessary API keys diff --git a/community-contributions/multi-agent_gui_with_gradio/agentic_voice_text_support.ipynb b/community-contributions/multi-agent_gui_with_gradio/agentic_voice_text_support.ipynb new file mode 100644 index 0000000..d4f6caf --- /dev/null +++ b/community-contributions/multi-agent_gui_with_gradio/agentic_voice_text_support.ipynb @@ -0,0 +1,395 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "d006b2ea-9dfe-49c7-88a9-a5a0775185fd", + "metadata": {}, + "source": [ + "### Building a Chatbot Interface, with Text or Voice Input, Multi-LLM support, and Memory Persistence" + ] + }, + { + "cell_type": "markdown", + "id": "eeb20b3e", + "metadata": {}, + "source": [ + "In this tutorial, we’ll use Gradio to build a simple chatbot prototype with a user-friendly interface. The chatbot will support multiple language models, allowing the user to switch models at any point during the conversation. It will also offer optional memory persistence, where the chat history is stored and forwarded to the selected model — which allows shared memory across models, even when switching mid-chat.\n", + "\n", + "In this project, we'll use OpenAI's API, Anthropic's Claude, and Meta's LLaMA, which runs locally via an Ollama server. Additionally, we'll use Python’s speech_recognition module to convert speech to text.\n", + "\n", + "It's worth noting that some APIs — such as OpenAI's — now support direct audio input, so integrating speech capabilities can also be done end-to-end without a separate transcription module." + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "id": "a07e7793-b8f5-44f4-aded-5562f633271a", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import requests\n", + "from dotenv import load_dotenv\n", + "from openai import OpenAI\n", + "import anthropic" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "id": "a0a343b1", + "metadata": {}, + "outputs": [], + "source": [ + "# Speech recording and recognition libraries\n", + "import speech_recognition as sr\n", + "import sounddevice as sd\n", + "import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "id": "d7693eda", + "metadata": {}, + "outputs": [], + "source": [ + "# GUI prototyping\n", + "import gradio as gr" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "id": "41ffc0e6", + "metadata": {}, + "outputs": [], + "source": [ + "buffer = [] # For temporarily holding sound recording\n", + "\n", + "# Helper function for handling voice recording\n", + "def callback(indata, frames, time, status):\n", + " buffer.append(indata.copy())\n", + "\n", + "stream = sd.InputStream(callback=callback, samplerate=16000, channels=1, dtype='int16')" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "id": "e9a79075", + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "# Function for handling recording data and status\n", + "def toggle_recording(state):\n", + " global stream, buffer\n", + " print('state', state)\n", + "\n", + " if not state:\n", + " buffer.clear()\n", + " stream.start()\n", + " return gr.update(value=\"Stop Recording\"), 'Recording...', not state\n", + " else:\n", + " stream.stop()\n", + " audio = np.concatenate(buffer, axis=0)\n", + " text = transcribe(audio)\n", + " return gr.update(value=\"Start Recording\"), text, not state\n", + "\n", + "# Functio that converts speech to text via Google's voice recognition module\n", + "def transcribe(recording, sample_rate=16000):\n", + " r = sr.Recognizer()\n", + "\n", + " # Convert NumPy array to AudioData\n", + " audio_data = sr.AudioData(\n", + " recording.tobytes(), # Raw byte data\n", + " sample_rate, # Sample rate\n", + " 2 # Sample width in bytes (16-bit = 2 bytes)\n", + " )\n", + "\n", + " text = r.recognize_google(audio_data)\n", + " print(\"You said:\", text)\n", + " return text" + ] + }, + { + "cell_type": "markdown", + "id": "dcfb0190", + "metadata": {}, + "source": [ + "### LLM & API set-up" + ] + }, + { + "cell_type": "markdown", + "id": "59416453", + "metadata": {}, + "source": [ + "##### Load API keys from .env" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "id": "b638b822", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "OpenAI API Key exists and begins sk-proj-\n", + "Anthropic API Key exists and begins sk-ant-\n", + "Google API Key not set\n" + ] + } + ], + "source": [ + "# Load environment variables in a file called .env\n", + "# Print the key prefixes to help with any debugging\n", + "\n", + "load_dotenv(override=True)\n", + "openai_api_key = os.getenv('OPENAI_API_KEY')\n", + "anthropic_api_key = os.getenv('ANTHROPIC_API_KEY')\n", + "google_api_key = os.getenv('GOOGLE_API_KEY')\n", + "\n", + "if openai_api_key:\n", + " print(f\"OpenAI API Key exists and begins {openai_api_key[:8]}\")\n", + "else:\n", + " print(\"OpenAI API Key not set\")\n", + " \n", + "if anthropic_api_key:\n", + " print(f\"Anthropic API Key exists and begins {anthropic_api_key[:7]}\")\n", + "else:\n", + " print(\"Anthropic API Key not set\")\n", + "\n", + "if google_api_key:\n", + " print(f\"Google API Key exists and begins {google_api_key[:8]}\")\n", + "else:\n", + " print(\"Google API Key not set\")" + ] + }, + { + "cell_type": "markdown", + "id": "9e6ae162", + "metadata": {}, + "source": [ + "### Class for handling API calls and routing requests to the selected models" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "id": "268ea65d", + "metadata": {}, + "outputs": [], + "source": [ + "class LLMHandler:\n", + " def __init__(self, system_message: str = '', ollama_api:str='http://localhost:11434/api/chat'):\n", + " # Default system message if none provided\n", + " self.system_message = system_message if system_message else \"You are a helpful assistant. Always reply in Markdown\"\n", + " self.message_history = []\n", + "\n", + " # Initialize LLM clients\n", + " self.openai = OpenAI()\n", + " self.claude = anthropic.Anthropic()\n", + " self.OLLAMA_API = ollama_api\n", + " self.OLLAMA_HEADERS = {\"Content-Type\": \"application/json\"}\n", + "\n", + " def llm_call(self, model: str = 'gpt-4o-mini', prompt: str = '', memory_persistence=True):\n", + " if not model:\n", + " return 'No model specified'\n", + "\n", + " # Use full message template with system prompt if no prior history\n", + " message = self.get_message_template(prompt, initial=True) if (\n", + " not self.message_history and not 'claude' in model\n", + " ) else self.get_message_template(prompt)\n", + "\n", + " # Handle memory persistence\n", + " if memory_persistence:\n", + " self.message_history.extend(message)\n", + " else:\n", + " self.message_history = message\n", + "\n", + " # Model-specific dispatch\n", + " try:\n", + " if 'gpt' in model:\n", + " response = self.call_openai(model=model)\n", + " elif 'claude' in model:\n", + " response = self.call_claude(model=model)\n", + " elif 'llama' in model:\n", + " response = self.call_ollama(model=model)\n", + " else:\n", + " response = f'{model.title()} is not supported or not a valid model name.'\n", + " except Exception as e:\n", + " response = f'Failed to retrieve response. Reason: {e}'\n", + "\n", + " # Save assistant's reply to history if memory is enabled\n", + " if memory_persistence:\n", + " self.message_history.append({\n", + " \"role\": \"assistant\",\n", + " \"content\": response\n", + " })\n", + "\n", + " return response\n", + "\n", + " def get_message_template(self, prompt: str = '', initial=False):\n", + " # Returns a message template with or without system prompt\n", + " initial_template = [\n", + " {\"role\": \"system\", \"content\": self.system_message},\n", + " {\"role\": \"user\", \"content\": prompt}\n", + " ]\n", + " general_template = [\n", + " {\"role\": \"user\", \"content\": prompt}\n", + " ]\n", + " return initial_template if initial else general_template\n", + "\n", + " def call_openai(self, model: str = 'gpt-4o-mini'):\n", + " # Sends chat completion request to OpenAI API\n", + " completion = self.openai.chat.completions.create(\n", + " model=model,\n", + " messages=self.message_history,\n", + " )\n", + " response = completion.choices[0].message.content\n", + " return response\n", + "\n", + " def call_ollama(self, model: str = \"llama3.2\"):\n", + "\n", + " payload = {\n", + " \"model\": model,\n", + " \"messages\": self.message_history,\n", + " \"stream\": False\n", + " }\n", + "\n", + " response = requests.post(url=self.OLLAMA_API, headers=self.OLLAMA_HEADERS, json=payload)\n", + " return response.json()[\"message\"][\"content\"]\n", + "\n", + " def call_claude(self, model: str = \"claude-3-haiku-20240307\"):\n", + " # Sends chat request to Anthropic Claude API\n", + " message = self.claude.messages.create(\n", + " model=model,\n", + " system=self.system_message,\n", + " messages=self.message_history,\n", + " max_tokens=500\n", + " )\n", + " response = message.content[0].text\n", + " return response\n" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "id": "632e618b", + "metadata": {}, + "outputs": [], + "source": [ + "llm_handler = LLMHandler()\n", + "\n", + "# Function to handle user prompts received by the interface\n", + "def llm_call(model, prompt, memory_persistence):\n", + " response = llm_handler.llm_call(model=model, prompt=prompt, memory_persistence=memory_persistence)\n", + " return response, ''\n" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "id": "e19228f6", + "metadata": {}, + "outputs": [], + "source": [ + "# Specify available model names for the dropdown component\n", + "AVAILABLE_MODELS = [\"gpt-4\", \"gpt-3.5\", \"claude-3-haiku-20240307\", \"llama3.2\", \"gpt-4o-mini\"]\n" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "id": "f65f43ff", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "* Running on local URL: http://127.0.0.1:7868\n", + "* To create a public link, set `share=True` in `launch()`.\n" + ] + }, + { + "data": { + "text/html": [ + "
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [] + }, + "execution_count": 46, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "\n", + "with gr.Blocks() as demo:\n", + " state = gr.State(False) # Recording state (on/off)\n", + " with gr.Row():\n", + " \n", + " with gr.Column():\n", + " out = gr.Markdown(label='Message history')\n", + " with gr.Row():\n", + " memory = gr.Checkbox(label='Toggle memory', value=True) # Handle memory status (on/off) btn\n", + " model_choice = gr.Dropdown(label='Model', choices=AVAILABLE_MODELS, interactive=True) # Model selection dropdown\n", + " query_box = gr.Textbox(label='ChatBox', placeholder=\"Your message\")\n", + " record_btn = gr.Button(value='Record voice message') # Start/stop recording btn\n", + " send_btn = gr.Button(\"Send\") # Send prompt btn\n", + " \n", + " \n", + " \n", + " record_btn.click(fn=toggle_recording, inputs=state, outputs=[record_btn, query_box, state])\n", + " send_btn.click(fn=llm_call, inputs=[model_choice, query_box, memory], outputs=[out, query_box])\n", + " \n", + "\n", + "demo.launch()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3743db5d", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "general_env", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.11" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From 0fca56ffa5c3f9c60c0aad5801ee677678fd4ca5 Mon Sep 17 00:00:00 2001 From: Kunmeer-SyedMohamedHyder Date: Thu, 10 Jul 2025 00:58:18 +0530 Subject: [PATCH 16/25] FlightAI --- .../FlightAI-exercise.ipynb | 654 ++++++++++++++++++ 1 file changed, 654 insertions(+) create mode 100644 week2/community-contributions/FlightAI-exercise.ipynb diff --git a/week2/community-contributions/FlightAI-exercise.ipynb b/week2/community-contributions/FlightAI-exercise.ipynb new file mode 100644 index 0000000..f6c96ca --- /dev/null +++ b/week2/community-contributions/FlightAI-exercise.ipynb @@ -0,0 +1,654 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "d006b2ea-9dfe-49c7-88a9-a5a0775185fd", + "metadata": {}, + "source": [ + "# Additional End of week Exercise - week 2\n", + "\n", + "Now use everything you've learned from Week 2 to build a full prototype for the technical question/answerer you built in Week 1 Exercise.\n", + "\n", + "This should include a Gradio UI, streaming, use of the system prompt to add expertise, and the ability to switch between models. Bonus points if you can demonstrate use of a tool!\n", + "\n", + "If you feel bold, see if you can add audio input so you can talk to it, and have it respond with audio. ChatGPT or Claude can help you, or email me if you have questions.\n", + "\n", + "I will publish a full solution here soon - unless someone beats me to it...\n", + "\n", + "There are so many commercial applications for this, from a language tutor, to a company onboarding solution, to a companion AI to a course (like this one!) I can't wait to see your results." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a07e7793-b8f5-44f4-aded-5562f633271a", + "metadata": {}, + "outputs": [], + "source": [ + "# Imports\n", + "\n", + "import os\n", + "import json\n", + "import base64\n", + "import logging\n", + "import gradio as gr\n", + "from PIL import Image\n", + "from io import BytesIO\n", + "from openai import OpenAI\n", + "from dotenv import load_dotenv\n", + "from IPython.display import Audio, display" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e879f6ae-b246-479d-8f81-94e47a9072ec", + "metadata": {}, + "outputs": [], + "source": [ + "# Initialization\n", + "logging.basicConfig(level=logging.INFO)\n", + "load_dotenv(override=True)\n", + "\n", + "openai_api_key = os.getenv('OPENAI_API_KEY')\n", + "if openai_api_key:\n", + " logging.info(f\"OpenAI API Key exists and begins {openai_api_key[:8]}\")\n", + "else:\n", + " logging.error(\"OpenAI API Key not set\")\n", + " \n", + "MODEL = \"gpt-4o-mini\"\n", + "openai = OpenAI()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d4455169-9e5e-4171-92e8-6f850a06f6e3", + "metadata": {}, + "outputs": [], + "source": [ + "system_message = (\n", + " \"You are a helpful assistant for an airline called FlightAI. \"\n", + " \"Always respond in a short, courteous sentence. \"\n", + " \"Provide accurate information only. \"\n", + " \"If you don’t know something, say so clearly. \"\n", + " \"Before booking a ticket, strictly follow this order: \"\n", + " \"1) Check if the destination is available, \"\n", + " \"2) Then check the ticket price, \"\n", + " \"3) Collect all neccessary details like name, destination and date of journey, \"\n", + " \"4) Only then proceed with the booking. \"\n", + " \"Always use the appropriate tools or APIs for each step before confirming a booking.\"\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4bab8e2c-e2b1-4421-a95b-7f1251670817", + "metadata": {}, + "outputs": [], + "source": [ + "# Dummy funcs that mimic the ticket booking behaviour\n", + "# Replace these will real funcs (that call APIs or make DB transactions) to actually book a ticket\n", + "\n", + "ticket_prices = {\n", + " \"london\": \"$799\",\n", + " \"paris\": \"$899\",\n", + " \"tokyo\": \"$1400\",\n", + " \"berlin\": \"$499\"\n", + "}\n", + "\n", + "def check_destination_availability(destination: str) -> dict:\n", + " \"\"\"\n", + " Check if the given destination is available in our ticketing system.\n", + " \n", + " Args:\n", + " destination (str): The name of the city.\n", + " \n", + " Returns:\n", + " dict: {\"available\": bool}\n", + " \"\"\"\n", + " logging.info(f\"Checking availability for destination: {destination}\")\n", + " \n", + " available = destination.lower() in ticket_prices\n", + " return {\"available\": available}\n", + "\n", + "\n", + "def fetch_ticket_price(destination_city: str) -> dict:\n", + " \"\"\"\n", + " Retrieve the ticket price for a given city.\n", + " \n", + " Args:\n", + " destination_city (str): The name of the destination city.\n", + " \n", + " Returns:\n", + " dict: {\"price\": str} or {\"price\": \"Unknown\"} if not found\n", + " \"\"\"\n", + " logging.info(f\"Retrieving price for destination: {destination_city}\")\n", + " \n", + " city = destination_city.lower()\n", + " price = ticket_prices.get(city, \"Unknown\")\n", + " \n", + " return {\"price\": price}\n", + "\n", + "\n", + "def book_ticket(name: str, destination_city: str, journey_date: str) -> dict:\n", + " \"\"\"\n", + " Book a ticket to a destination city for a given user and date.\n", + " \n", + " Args:\n", + " name (str): Name of the passenger.\n", + " destination_city (str): Destination city.\n", + " journey_date (str): Date of journey in YYYY-MM-DD format.\n", + " \n", + " Returns:\n", + " dict: Booking confirmation with name, city, price, and date, or error.\n", + " \"\"\"\n", + " logging.info(f\"Booking ticket for {name} to {destination_city} on {journey_date}\")\n", + " \n", + " city = destination_city.lower()\n", + "\n", + " if city not in ticket_prices:\n", + " logging.error(f\"City '{destination_city}' not found in ticket list.\")\n", + " return {\"error\": \"Destination not found.\"}\n", + "\n", + " price_info = fetch_ticket_price(destination_city)\n", + " \n", + " return {\n", + " \"name\": name,\n", + " \"destination_city\": destination_city.title(),\n", + " \"journey_date\": journey_date,\n", + " \"price\": price_info[\"price\"]\n", + " }\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "400f4592-2326-43f6-a921-fcd051c4f022", + "metadata": {}, + "outputs": [], + "source": [ + "destination_availability_tool = {\n", + " \"name\": \"check_destination_availability\",\n", + " \"description\": \"Check if tickets are available for the given destination city before proceeding with any booking or pricing inquiry.\",\n", + " \"parameters\": {\n", + " \"type\": \"object\",\n", + " \"properties\": {\n", + " \"destination\": {\n", + " \"type\": \"string\",\n", + " \"description\": \"The name of the destination city to check for availability.\"\n", + " }\n", + " },\n", + " \"required\": [\"destination\"],\n", + " \"additionalProperties\": False\n", + " }\n", + "}\n", + "\n", + "ticket_price_tool = {\n", + " \"name\": \"fetch_ticket_price\",\n", + " \"description\": (\n", + " \"Get the price of a return ticket to the specified destination city. \"\n", + " \"Use this after confirming that the destination is available, especially when the customer asks for the ticket price.\"\n", + " ),\n", + " \"parameters\": {\n", + " \"type\": \"object\",\n", + " \"properties\": {\n", + " \"destination_city\": {\n", + " \"type\": \"string\",\n", + " \"description\": \"The city for which the customer wants the ticket price.\"\n", + " }\n", + " },\n", + " \"required\": [\"destination_city\"],\n", + " \"additionalProperties\": False\n", + " }\n", + "}\n", + "\n", + "ticket_booking_tool = {\n", + " \"name\": \"book_ticket\",\n", + " \"description\": (\n", + " \"Book a ticket for the customer to the specified destination city on the given journey date. \"\n", + " \"Use only after availability and price have been checked.\"\n", + " ),\n", + " \"parameters\": {\n", + " \"type\": \"object\",\n", + " \"properties\": {\n", + " \"name\": {\n", + " \"type\": \"string\",\n", + " \"description\": \"Full name of the person booking the ticket.\"\n", + " },\n", + " \"destination_city\": {\n", + " \"type\": \"string\",\n", + " \"description\": \"The city that the customer wants to travel to.\"\n", + " },\n", + " \"journey_date\": {\n", + " \"type\": \"string\",\n", + " \"format\": \"date\",\n", + " \"description\": \"The journey date in YYYY-MM-DD format.\"\n", + " }\n", + " },\n", + " \"required\": [\"name\", \"destination_city\", \"journey_date\"],\n", + " \"additionalProperties\": False\n", + " }\n", + "}\n", + "\n", + "tools = [\n", + " {\"type\": \"function\", \"function\": destination_availability_tool},\n", + " {\"type\": \"function\", \"function\": ticket_price_tool},\n", + " {\"type\": \"function\", \"function\": ticket_booking_tool},\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f02c17ba-14f2-41c4-b6a2-d1397405d368", + "metadata": {}, + "outputs": [], + "source": [ + "def handle_tool_call(message):\n", + " \"\"\"\n", + " Handles a single OpenAI tool call message and returns both the result\n", + " and a formatted tool response dictionary.\n", + " \n", + " Args:\n", + " message (object): An OpenAI message containing a tool call.\n", + " \n", + " Returns:\n", + " tuple: (result_dict, response_dict)\n", + " \"\"\"\n", + " tool_call = message.tool_calls[0]\n", + " function_name = tool_call.function.name\n", + " arguments = json.loads(tool_call.function.arguments)\n", + "\n", + " result = None\n", + "\n", + " logging.info(f\"Tool call received: {function_name} with arguments: {arguments}\")\n", + "\n", + " if function_name == \"check_destination_availability\":\n", + " result = check_destination_availability(**arguments)\n", + "\n", + " elif function_name == \"fetch_ticket_price\":\n", + " city = arguments.get(\"destination_city\")\n", + " price_info = fetch_ticket_price(city)\n", + " result = {\"destination_city\": city, \"price\": price_info[\"price\"]}\n", + "\n", + " elif function_name == \"book_ticket\":\n", + " result = book_ticket(**arguments)\n", + "\n", + " else:\n", + " logging.warning(\"Unrecognized tool function: %s\", function_name)\n", + " result = {\"error\": f\"Unknown function '{function_name}'\"}\n", + "\n", + " response = {\n", + " \"role\": \"tool\",\n", + " \"tool_call_id\": tool_call.id,\n", + " \"content\": json.dumps(result)\n", + " }\n", + "\n", + " return result, response" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "72c1a9e7-186c-4218-9edc-01814baec431", + "metadata": {}, + "outputs": [], + "source": [ + "def artist(city: str, style: str = \"vibrant pop-art\", size: str = \"1024x1024\") -> Image.Image:\n", + " \"\"\"\n", + " Generates a city-themed vacation image using DALL·E.\n", + "\n", + " Args:\n", + " city (str): Name of the city to visualize.\n", + " style (str): Artistic style for the image prompt.\n", + " size (str): Image resolution (e.g., \"1024x1024\").\n", + "\n", + " Returns:\n", + " Image.Image: A PIL Image object representing the generated image.\n", + "\n", + " Raises:\n", + " ValueError: If city name is empty.\n", + " RuntimeError: If image generation fails.\n", + " \"\"\"\n", + " if not city.strip():\n", + " raise ValueError(\"City name cannot be empty.\")\n", + "\n", + " prompt = (\n", + " f\"An image representing a vacation in {city}, \"\n", + " f\"showing iconic tourist attractions, cultural elements, and everything unique about {city}, \"\n", + " f\"rendered in a {style} style.\"\n", + " )\n", + "\n", + " logging.info(\"Generating image for city: %s with style: %s\", city, style)\n", + "\n", + " try:\n", + " response = openai.images.generate(\n", + " model=\"dall-e-3\",\n", + " prompt=prompt,\n", + " size=size,\n", + " n=1,\n", + " response_format=\"b64_json\",\n", + " )\n", + "\n", + " image_base64 = response.data[0].b64_json\n", + " image_data = base64.b64decode(image_base64)\n", + " logging.info(\"Image generation successful for %s\", city)\n", + "\n", + " return Image.open(BytesIO(image_data))\n", + "\n", + " except Exception as e:\n", + " logging.error(\"Failed to generate image for city '%s': %s\", city, str(e))\n", + " raise RuntimeError(f\"Image generation failed for city '{city}'\") from e" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fdf7c091-6c68-4af6-8197-c1456b36cedf", + "metadata": {}, + "outputs": [], + "source": [ + "def talker(message: str, output_filename: str = \"output_audio.mp3\", autoplay: bool = True) -> None:\n", + " \"\"\"\n", + " Converts a text message into speech using OpenAI TTS and plays the audio.\n", + "\n", + " Args:\n", + " message (str): The text to convert to speech.\n", + " output_filename (str): The filename to save the generated audio.\n", + " autoplay (bool): Whether to autoplay the audio in the notebook.\n", + "\n", + " Raises:\n", + " ValueError: If the message is empty.\n", + " RuntimeError: If the audio generation fails.\n", + " \"\"\"\n", + " if not message.strip():\n", + " raise ValueError(\"Message cannot be empty.\")\n", + "\n", + " logging.info(\"Generating speech for message: %s\", message)\n", + "\n", + " try:\n", + " response = openai.audio.speech.create(\n", + " model=\"tts-1\",\n", + " voice=\"alloy\",\n", + " input=message\n", + " )\n", + "\n", + " with open(output_filename, \"wb\") as f:\n", + " f.write(response.content)\n", + "\n", + " logging.info(\"Audio written to: %s\", output_filename)\n", + "\n", + " if autoplay:\n", + " display(Audio(output_filename, autoplay=True))\n", + "\n", + " except Exception as e:\n", + " logging.error(\"Failed to generate or play audio: %s\", str(e))\n", + " raise RuntimeError(\"Text-to-speech generation failed.\") from e" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "54568b4a-be8d-47a1-b924-03acdafef70e", + "metadata": {}, + "outputs": [], + "source": [ + "def translate(message, language):\n", + " \"\"\"\n", + " Translates the given text into the specified language using OpenAI Chat API.\n", + "\n", + " Args:\n", + " message (str): The text to be translated.\n", + " language (str): Target language for translation (e.g., 'French', 'Japanese').\n", + "\n", + " Returns:\n", + " str: Translated text.\n", + "\n", + " Raises:\n", + " ValueError: If input message or language is empty.\n", + " RuntimeError: If translation fails due to API or other issues.\n", + " \"\"\"\n", + " if not message.strip():\n", + " raise ValueError(\"Input message cannot be empty.\")\n", + " if not language.strip():\n", + " raise ValueError(\"Target language cannot be empty.\")\n", + "\n", + " logging.info(\"Translating to %s: %s\", language, message)\n", + "\n", + " messages = [\n", + " {\"role\": \"system\", \"content\": f\"You are a translation assistant. Translate everything the user says to {language}.\"},\n", + " {\"role\": \"user\", \"content\": message}\n", + " ]\n", + "\n", + " try:\n", + " response = openai.chat.completions.create(\n", + " model=MODEL,\n", + " messages=messages\n", + " )\n", + " translated = response.choices[0].message.content.strip()\n", + " logging.info(\"Translation successful.\")\n", + " return translated\n", + "\n", + " except Exception as e:\n", + " logging.error(\"Translation failed: %s\", str(e))\n", + " raise RuntimeError(\"Failed to translate message.\") from e" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8e6cf470-8ea0-43b2-bbcc-53c2432feb0d", + "metadata": {}, + "outputs": [], + "source": [ + "def transcribe_audio(audio_path):\n", + " \"\"\"\n", + " Transcribes an audio file using OpenAI's Whisper model.\n", + "\n", + " Args:\n", + " audio_path (str): Path to the audio file (e.g., .mp3, .wav).\n", + " model (str): OpenAI model for transcription (default: 'whisper-1').\n", + "\n", + " Returns:\n", + " str: Transcribed text from the audio file.\n", + "\n", + " Raises:\n", + " ValueError: If the path is invalid or the file does not exist.\n", + " RuntimeError: If the transcription fails.\n", + " \"\"\"\n", + " if not audio_path or not os.path.exists(audio_path):\n", + " raise ValueError(\"Invalid or missing audio file path.\")\n", + "\n", + " logging.info(\"Transcribing audio file: %s using model: whisper-1\", audio_path)\n", + "\n", + " try:\n", + " with open(audio_path, \"rb\") as f:\n", + " response = openai.audio.transcriptions.create(\n", + " model=\"whisper-1\",\n", + " file=f\n", + " )\n", + " transcript = response.text.strip()\n", + " logging.info(\"Transcription successful.\")\n", + " return transcript\n", + "\n", + " except Exception as e:\n", + " logging.error(\"Transcription failed: %s\", str(e))\n", + " raise RuntimeError(\"Failed to transcribe audio.\") from e" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3489656e-0f08-4d41-94b1-d902c93ca164", + "metadata": {}, + "outputs": [], + "source": [ + "def chat(history: list, language: str, translated_history: list, speaking_language: str) -> tuple:\n", + " \"\"\"\n", + " Handles a chat interaction including tool calls, image generation, translation, and TTS playback.\n", + "\n", + " Args:\n", + " history (list): List of previous conversation messages.\n", + " language (str): Target language for translation and TTS.\n", + "\n", + " Returns:\n", + " tuple: (updated history list, generated image if any, translated response string)\n", + " \"\"\"\n", + " messages = [{\"role\": \"system\", \"content\": system_message}] + history\n", + " image = None\n", + "\n", + " try:\n", + " # Initial assistant response\n", + " response = openai.chat.completions.create(model=MODEL, messages=messages, tools=tools)\n", + " choice = response.choices[0]\n", + "\n", + " # Handle tool calls if triggered\n", + " if choice.finish_reason == \"tool_calls\":\n", + " message = choice.message\n", + " result, tool_response = handle_tool_call(message)\n", + "\n", + " # Append tool-related messages\n", + " messages.append(message)\n", + " messages.append(tool_response)\n", + " logging.info(\"Tool call result: %s\", result)\n", + "\n", + " # Generate image if a booking was completed\n", + " if message.tool_calls[0].function.name == \"book_ticket\" and \"destination_city\" in result:\n", + " image = artist(result[\"destination_city\"])\n", + "\n", + " # Get final assistant response after tool execution\n", + " response = openai.chat.completions.create(model=MODEL, messages=messages)\n", + " choice = response.choices[0]\n", + "\n", + " reply = choice.message.content.strip()\n", + " history.append({\"role\": \"assistant\", \"content\": reply})\n", + "\n", + " # Translate and speak the reply\n", + " translated_reply = translate(reply, language)\n", + " translated_history.append({\"role\": \"assistant\", \"content\": translated_reply})\n", + "\n", + " if speaking_language == \"English\":\n", + " talker(reply)\n", + " else:\n", + " talker(translated_reply)\n", + "\n", + " return history, image, translated_history\n", + "\n", + " except Exception as e:\n", + " logging.error(\"Chat processing failed: %s\", str(e))\n", + " raise RuntimeError(\"Failed to complete chat interaction.\") from e" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f76acc68-726e-457f-88ab-99da75debde5", + "metadata": {}, + "outputs": [], + "source": [ + "force_dark_mode = \"\"\"\n", + "function refresh() {\n", + " const url = new URL(window.location);\n", + " if (url.searchParams.get('__theme') !== 'dark') {\n", + " url.searchParams.set('__theme', 'dark');\n", + " window.location.href = url.href;\n", + " }\n", + "}\n", + "\"\"\"\n", + "\n", + "with gr.Blocks(js=force_dark_mode) as ui:\n", + " with gr.Row():\n", + " gr.Markdown(\"### FlightAI Chat with Translation\")\n", + "\n", + " with gr.Row():\n", + " lang_dropdown = gr.Dropdown(\n", + " choices=[\"Spanish\", \"French\", \"German\", \"Japanese\", \"Hindi\"],\n", + " value=\"Spanish\",\n", + " label=\"Translate To\"\n", + " )\n", + " \n", + " speak_dropdown = gr.Dropdown(\n", + " choices=[\"English\", \"Selected Language\"],\n", + " value=\"English\",\n", + " label=\"Speak out in\"\n", + " )\n", + " \n", + " with gr.Row():\n", + " chatbot = gr.Chatbot(height=500, type=\"messages\", label=\"Chat History\")\n", + " translated_chatbot = gr.Chatbot(height=500, type=\"messages\", label=\"Translated Chat\")\n", + " image_output = gr.Image(height=500)\n", + "\n", + " with gr.Row():\n", + " entry = gr.Textbox(label=\"Chat with our AI Assistant:\")\n", + " audio_input = gr.Audio(sources=\"microphone\", type=\"filepath\", label=\"Or speak to the assistant\")\n", + "\n", + " with gr.Row():\n", + " clear = gr.Button(\"Clear\")\n", + "\n", + " def do_entry(message, history, audio, translated_history, language):\n", + " if audio:\n", + " message = transcribe_audio(audio)\n", + "\n", + " if message:\n", + " history += [{\"role\": \"user\", \"content\": message}]\n", + " translated_history += [{\"role\": \"user\", \"content\": translate(message, language)}]\n", + " return \"\", history, None, translated_history\n", + "\n", + " entry.submit(\n", + " do_entry,\n", + " inputs=[entry, chatbot, audio_input, translated_chatbot, lang_dropdown],\n", + " outputs=[entry, chatbot, audio_input, translated_chatbot]\n", + " ).then(\n", + " chat,\n", + " inputs=[chatbot, lang_dropdown, translated_chatbot, speak_dropdown],\n", + " outputs=[chatbot, image_output, translated_chatbot]\n", + " )\n", + "\n", + " audio_input.change(\n", + " do_entry,\n", + " inputs=[entry, chatbot, audio_input, translated_chatbot, lang_dropdown],\n", + " outputs=[entry, chatbot, audio_input, translated_chatbot]\n", + " ).then(\n", + " chat,\n", + " inputs=[chatbot, lang_dropdown, translated_chatbot, speak_dropdown],\n", + " outputs=[chatbot, image_output, translated_chatbot]\n", + " )\n", + "\n", + " clear.click(lambda: [\"\", [], None, [], None], inputs=None, outputs=[entry, chatbot, audio_input, translated_chatbot, image_output], queue=False)\n", + "\n", + "ui.launch(inbrowser=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "58f97435-fa0d-45f7-b02f-4ac5f4901c53", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From 72f661563e856272bbbab39e18fca860a74b7c8a Mon Sep 17 00:00:00 2001 From: SyedHyder2308 <114393935+Kunmeer-SyedMohamedHyder@users.noreply.github.com> Date: Thu, 10 Jul 2025 01:02:53 +0530 Subject: [PATCH 17/25] Revert --- week2/week2 EXERCISE.ipynb | 605 +------------------------------------ 1 file changed, 1 insertion(+), 604 deletions(-) diff --git a/week2/week2 EXERCISE.ipynb b/week2/week2 EXERCISE.ipynb index f6c96ca..d97f5cb 100644 --- a/week2/week2 EXERCISE.ipynb +++ b/week2/week2 EXERCISE.ipynb @@ -24,609 +24,6 @@ "id": "a07e7793-b8f5-44f4-aded-5562f633271a", "metadata": {}, "outputs": [], - "source": [ - "# Imports\n", - "\n", - "import os\n", - "import json\n", - "import base64\n", - "import logging\n", - "import gradio as gr\n", - "from PIL import Image\n", - "from io import BytesIO\n", - "from openai import OpenAI\n", - "from dotenv import load_dotenv\n", - "from IPython.display import Audio, display" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "e879f6ae-b246-479d-8f81-94e47a9072ec", - "metadata": {}, - "outputs": [], - "source": [ - "# Initialization\n", - "logging.basicConfig(level=logging.INFO)\n", - "load_dotenv(override=True)\n", - "\n", - "openai_api_key = os.getenv('OPENAI_API_KEY')\n", - "if openai_api_key:\n", - " logging.info(f\"OpenAI API Key exists and begins {openai_api_key[:8]}\")\n", - "else:\n", - " logging.error(\"OpenAI API Key not set\")\n", - " \n", - "MODEL = \"gpt-4o-mini\"\n", - "openai = OpenAI()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d4455169-9e5e-4171-92e8-6f850a06f6e3", - "metadata": {}, - "outputs": [], - "source": [ - "system_message = (\n", - " \"You are a helpful assistant for an airline called FlightAI. \"\n", - " \"Always respond in a short, courteous sentence. \"\n", - " \"Provide accurate information only. \"\n", - " \"If you don’t know something, say so clearly. \"\n", - " \"Before booking a ticket, strictly follow this order: \"\n", - " \"1) Check if the destination is available, \"\n", - " \"2) Then check the ticket price, \"\n", - " \"3) Collect all neccessary details like name, destination and date of journey, \"\n", - " \"4) Only then proceed with the booking. \"\n", - " \"Always use the appropriate tools or APIs for each step before confirming a booking.\"\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4bab8e2c-e2b1-4421-a95b-7f1251670817", - "metadata": {}, - "outputs": [], - "source": [ - "# Dummy funcs that mimic the ticket booking behaviour\n", - "# Replace these will real funcs (that call APIs or make DB transactions) to actually book a ticket\n", - "\n", - "ticket_prices = {\n", - " \"london\": \"$799\",\n", - " \"paris\": \"$899\",\n", - " \"tokyo\": \"$1400\",\n", - " \"berlin\": \"$499\"\n", - "}\n", - "\n", - "def check_destination_availability(destination: str) -> dict:\n", - " \"\"\"\n", - " Check if the given destination is available in our ticketing system.\n", - " \n", - " Args:\n", - " destination (str): The name of the city.\n", - " \n", - " Returns:\n", - " dict: {\"available\": bool}\n", - " \"\"\"\n", - " logging.info(f\"Checking availability for destination: {destination}\")\n", - " \n", - " available = destination.lower() in ticket_prices\n", - " return {\"available\": available}\n", - "\n", - "\n", - "def fetch_ticket_price(destination_city: str) -> dict:\n", - " \"\"\"\n", - " Retrieve the ticket price for a given city.\n", - " \n", - " Args:\n", - " destination_city (str): The name of the destination city.\n", - " \n", - " Returns:\n", - " dict: {\"price\": str} or {\"price\": \"Unknown\"} if not found\n", - " \"\"\"\n", - " logging.info(f\"Retrieving price for destination: {destination_city}\")\n", - " \n", - " city = destination_city.lower()\n", - " price = ticket_prices.get(city, \"Unknown\")\n", - " \n", - " return {\"price\": price}\n", - "\n", - "\n", - "def book_ticket(name: str, destination_city: str, journey_date: str) -> dict:\n", - " \"\"\"\n", - " Book a ticket to a destination city for a given user and date.\n", - " \n", - " Args:\n", - " name (str): Name of the passenger.\n", - " destination_city (str): Destination city.\n", - " journey_date (str): Date of journey in YYYY-MM-DD format.\n", - " \n", - " Returns:\n", - " dict: Booking confirmation with name, city, price, and date, or error.\n", - " \"\"\"\n", - " logging.info(f\"Booking ticket for {name} to {destination_city} on {journey_date}\")\n", - " \n", - " city = destination_city.lower()\n", - "\n", - " if city not in ticket_prices:\n", - " logging.error(f\"City '{destination_city}' not found in ticket list.\")\n", - " return {\"error\": \"Destination not found.\"}\n", - "\n", - " price_info = fetch_ticket_price(destination_city)\n", - " \n", - " return {\n", - " \"name\": name,\n", - " \"destination_city\": destination_city.title(),\n", - " \"journey_date\": journey_date,\n", - " \"price\": price_info[\"price\"]\n", - " }\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "400f4592-2326-43f6-a921-fcd051c4f022", - "metadata": {}, - "outputs": [], - "source": [ - "destination_availability_tool = {\n", - " \"name\": \"check_destination_availability\",\n", - " \"description\": \"Check if tickets are available for the given destination city before proceeding with any booking or pricing inquiry.\",\n", - " \"parameters\": {\n", - " \"type\": \"object\",\n", - " \"properties\": {\n", - " \"destination\": {\n", - " \"type\": \"string\",\n", - " \"description\": \"The name of the destination city to check for availability.\"\n", - " }\n", - " },\n", - " \"required\": [\"destination\"],\n", - " \"additionalProperties\": False\n", - " }\n", - "}\n", - "\n", - "ticket_price_tool = {\n", - " \"name\": \"fetch_ticket_price\",\n", - " \"description\": (\n", - " \"Get the price of a return ticket to the specified destination city. \"\n", - " \"Use this after confirming that the destination is available, especially when the customer asks for the ticket price.\"\n", - " ),\n", - " \"parameters\": {\n", - " \"type\": \"object\",\n", - " \"properties\": {\n", - " \"destination_city\": {\n", - " \"type\": \"string\",\n", - " \"description\": \"The city for which the customer wants the ticket price.\"\n", - " }\n", - " },\n", - " \"required\": [\"destination_city\"],\n", - " \"additionalProperties\": False\n", - " }\n", - "}\n", - "\n", - "ticket_booking_tool = {\n", - " \"name\": \"book_ticket\",\n", - " \"description\": (\n", - " \"Book a ticket for the customer to the specified destination city on the given journey date. \"\n", - " \"Use only after availability and price have been checked.\"\n", - " ),\n", - " \"parameters\": {\n", - " \"type\": \"object\",\n", - " \"properties\": {\n", - " \"name\": {\n", - " \"type\": \"string\",\n", - " \"description\": \"Full name of the person booking the ticket.\"\n", - " },\n", - " \"destination_city\": {\n", - " \"type\": \"string\",\n", - " \"description\": \"The city that the customer wants to travel to.\"\n", - " },\n", - " \"journey_date\": {\n", - " \"type\": \"string\",\n", - " \"format\": \"date\",\n", - " \"description\": \"The journey date in YYYY-MM-DD format.\"\n", - " }\n", - " },\n", - " \"required\": [\"name\", \"destination_city\", \"journey_date\"],\n", - " \"additionalProperties\": False\n", - " }\n", - "}\n", - "\n", - "tools = [\n", - " {\"type\": \"function\", \"function\": destination_availability_tool},\n", - " {\"type\": \"function\", \"function\": ticket_price_tool},\n", - " {\"type\": \"function\", \"function\": ticket_booking_tool},\n", - "]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f02c17ba-14f2-41c4-b6a2-d1397405d368", - "metadata": {}, - "outputs": [], - "source": [ - "def handle_tool_call(message):\n", - " \"\"\"\n", - " Handles a single OpenAI tool call message and returns both the result\n", - " and a formatted tool response dictionary.\n", - " \n", - " Args:\n", - " message (object): An OpenAI message containing a tool call.\n", - " \n", - " Returns:\n", - " tuple: (result_dict, response_dict)\n", - " \"\"\"\n", - " tool_call = message.tool_calls[0]\n", - " function_name = tool_call.function.name\n", - " arguments = json.loads(tool_call.function.arguments)\n", - "\n", - " result = None\n", - "\n", - " logging.info(f\"Tool call received: {function_name} with arguments: {arguments}\")\n", - "\n", - " if function_name == \"check_destination_availability\":\n", - " result = check_destination_availability(**arguments)\n", - "\n", - " elif function_name == \"fetch_ticket_price\":\n", - " city = arguments.get(\"destination_city\")\n", - " price_info = fetch_ticket_price(city)\n", - " result = {\"destination_city\": city, \"price\": price_info[\"price\"]}\n", - "\n", - " elif function_name == \"book_ticket\":\n", - " result = book_ticket(**arguments)\n", - "\n", - " else:\n", - " logging.warning(\"Unrecognized tool function: %s\", function_name)\n", - " result = {\"error\": f\"Unknown function '{function_name}'\"}\n", - "\n", - " response = {\n", - " \"role\": \"tool\",\n", - " \"tool_call_id\": tool_call.id,\n", - " \"content\": json.dumps(result)\n", - " }\n", - "\n", - " return result, response" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "72c1a9e7-186c-4218-9edc-01814baec431", - "metadata": {}, - "outputs": [], - "source": [ - "def artist(city: str, style: str = \"vibrant pop-art\", size: str = \"1024x1024\") -> Image.Image:\n", - " \"\"\"\n", - " Generates a city-themed vacation image using DALL·E.\n", - "\n", - " Args:\n", - " city (str): Name of the city to visualize.\n", - " style (str): Artistic style for the image prompt.\n", - " size (str): Image resolution (e.g., \"1024x1024\").\n", - "\n", - " Returns:\n", - " Image.Image: A PIL Image object representing the generated image.\n", - "\n", - " Raises:\n", - " ValueError: If city name is empty.\n", - " RuntimeError: If image generation fails.\n", - " \"\"\"\n", - " if not city.strip():\n", - " raise ValueError(\"City name cannot be empty.\")\n", - "\n", - " prompt = (\n", - " f\"An image representing a vacation in {city}, \"\n", - " f\"showing iconic tourist attractions, cultural elements, and everything unique about {city}, \"\n", - " f\"rendered in a {style} style.\"\n", - " )\n", - "\n", - " logging.info(\"Generating image for city: %s with style: %s\", city, style)\n", - "\n", - " try:\n", - " response = openai.images.generate(\n", - " model=\"dall-e-3\",\n", - " prompt=prompt,\n", - " size=size,\n", - " n=1,\n", - " response_format=\"b64_json\",\n", - " )\n", - "\n", - " image_base64 = response.data[0].b64_json\n", - " image_data = base64.b64decode(image_base64)\n", - " logging.info(\"Image generation successful for %s\", city)\n", - "\n", - " return Image.open(BytesIO(image_data))\n", - "\n", - " except Exception as e:\n", - " logging.error(\"Failed to generate image for city '%s': %s\", city, str(e))\n", - " raise RuntimeError(f\"Image generation failed for city '{city}'\") from e" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "fdf7c091-6c68-4af6-8197-c1456b36cedf", - "metadata": {}, - "outputs": [], - "source": [ - "def talker(message: str, output_filename: str = \"output_audio.mp3\", autoplay: bool = True) -> None:\n", - " \"\"\"\n", - " Converts a text message into speech using OpenAI TTS and plays the audio.\n", - "\n", - " Args:\n", - " message (str): The text to convert to speech.\n", - " output_filename (str): The filename to save the generated audio.\n", - " autoplay (bool): Whether to autoplay the audio in the notebook.\n", - "\n", - " Raises:\n", - " ValueError: If the message is empty.\n", - " RuntimeError: If the audio generation fails.\n", - " \"\"\"\n", - " if not message.strip():\n", - " raise ValueError(\"Message cannot be empty.\")\n", - "\n", - " logging.info(\"Generating speech for message: %s\", message)\n", - "\n", - " try:\n", - " response = openai.audio.speech.create(\n", - " model=\"tts-1\",\n", - " voice=\"alloy\",\n", - " input=message\n", - " )\n", - "\n", - " with open(output_filename, \"wb\") as f:\n", - " f.write(response.content)\n", - "\n", - " logging.info(\"Audio written to: %s\", output_filename)\n", - "\n", - " if autoplay:\n", - " display(Audio(output_filename, autoplay=True))\n", - "\n", - " except Exception as e:\n", - " logging.error(\"Failed to generate or play audio: %s\", str(e))\n", - " raise RuntimeError(\"Text-to-speech generation failed.\") from e" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "54568b4a-be8d-47a1-b924-03acdafef70e", - "metadata": {}, - "outputs": [], - "source": [ - "def translate(message, language):\n", - " \"\"\"\n", - " Translates the given text into the specified language using OpenAI Chat API.\n", - "\n", - " Args:\n", - " message (str): The text to be translated.\n", - " language (str): Target language for translation (e.g., 'French', 'Japanese').\n", - "\n", - " Returns:\n", - " str: Translated text.\n", - "\n", - " Raises:\n", - " ValueError: If input message or language is empty.\n", - " RuntimeError: If translation fails due to API or other issues.\n", - " \"\"\"\n", - " if not message.strip():\n", - " raise ValueError(\"Input message cannot be empty.\")\n", - " if not language.strip():\n", - " raise ValueError(\"Target language cannot be empty.\")\n", - "\n", - " logging.info(\"Translating to %s: %s\", language, message)\n", - "\n", - " messages = [\n", - " {\"role\": \"system\", \"content\": f\"You are a translation assistant. Translate everything the user says to {language}.\"},\n", - " {\"role\": \"user\", \"content\": message}\n", - " ]\n", - "\n", - " try:\n", - " response = openai.chat.completions.create(\n", - " model=MODEL,\n", - " messages=messages\n", - " )\n", - " translated = response.choices[0].message.content.strip()\n", - " logging.info(\"Translation successful.\")\n", - " return translated\n", - "\n", - " except Exception as e:\n", - " logging.error(\"Translation failed: %s\", str(e))\n", - " raise RuntimeError(\"Failed to translate message.\") from e" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8e6cf470-8ea0-43b2-bbcc-53c2432feb0d", - "metadata": {}, - "outputs": [], - "source": [ - "def transcribe_audio(audio_path):\n", - " \"\"\"\n", - " Transcribes an audio file using OpenAI's Whisper model.\n", - "\n", - " Args:\n", - " audio_path (str): Path to the audio file (e.g., .mp3, .wav).\n", - " model (str): OpenAI model for transcription (default: 'whisper-1').\n", - "\n", - " Returns:\n", - " str: Transcribed text from the audio file.\n", - "\n", - " Raises:\n", - " ValueError: If the path is invalid or the file does not exist.\n", - " RuntimeError: If the transcription fails.\n", - " \"\"\"\n", - " if not audio_path or not os.path.exists(audio_path):\n", - " raise ValueError(\"Invalid or missing audio file path.\")\n", - "\n", - " logging.info(\"Transcribing audio file: %s using model: whisper-1\", audio_path)\n", - "\n", - " try:\n", - " with open(audio_path, \"rb\") as f:\n", - " response = openai.audio.transcriptions.create(\n", - " model=\"whisper-1\",\n", - " file=f\n", - " )\n", - " transcript = response.text.strip()\n", - " logging.info(\"Transcription successful.\")\n", - " return transcript\n", - "\n", - " except Exception as e:\n", - " logging.error(\"Transcription failed: %s\", str(e))\n", - " raise RuntimeError(\"Failed to transcribe audio.\") from e" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3489656e-0f08-4d41-94b1-d902c93ca164", - "metadata": {}, - "outputs": [], - "source": [ - "def chat(history: list, language: str, translated_history: list, speaking_language: str) -> tuple:\n", - " \"\"\"\n", - " Handles a chat interaction including tool calls, image generation, translation, and TTS playback.\n", - "\n", - " Args:\n", - " history (list): List of previous conversation messages.\n", - " language (str): Target language for translation and TTS.\n", - "\n", - " Returns:\n", - " tuple: (updated history list, generated image if any, translated response string)\n", - " \"\"\"\n", - " messages = [{\"role\": \"system\", \"content\": system_message}] + history\n", - " image = None\n", - "\n", - " try:\n", - " # Initial assistant response\n", - " response = openai.chat.completions.create(model=MODEL, messages=messages, tools=tools)\n", - " choice = response.choices[0]\n", - "\n", - " # Handle tool calls if triggered\n", - " if choice.finish_reason == \"tool_calls\":\n", - " message = choice.message\n", - " result, tool_response = handle_tool_call(message)\n", - "\n", - " # Append tool-related messages\n", - " messages.append(message)\n", - " messages.append(tool_response)\n", - " logging.info(\"Tool call result: %s\", result)\n", - "\n", - " # Generate image if a booking was completed\n", - " if message.tool_calls[0].function.name == \"book_ticket\" and \"destination_city\" in result:\n", - " image = artist(result[\"destination_city\"])\n", - "\n", - " # Get final assistant response after tool execution\n", - " response = openai.chat.completions.create(model=MODEL, messages=messages)\n", - " choice = response.choices[0]\n", - "\n", - " reply = choice.message.content.strip()\n", - " history.append({\"role\": \"assistant\", \"content\": reply})\n", - "\n", - " # Translate and speak the reply\n", - " translated_reply = translate(reply, language)\n", - " translated_history.append({\"role\": \"assistant\", \"content\": translated_reply})\n", - "\n", - " if speaking_language == \"English\":\n", - " talker(reply)\n", - " else:\n", - " talker(translated_reply)\n", - "\n", - " return history, image, translated_history\n", - "\n", - " except Exception as e:\n", - " logging.error(\"Chat processing failed: %s\", str(e))\n", - " raise RuntimeError(\"Failed to complete chat interaction.\") from e" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f76acc68-726e-457f-88ab-99da75debde5", - "metadata": {}, - "outputs": [], - "source": [ - "force_dark_mode = \"\"\"\n", - "function refresh() {\n", - " const url = new URL(window.location);\n", - " if (url.searchParams.get('__theme') !== 'dark') {\n", - " url.searchParams.set('__theme', 'dark');\n", - " window.location.href = url.href;\n", - " }\n", - "}\n", - "\"\"\"\n", - "\n", - "with gr.Blocks(js=force_dark_mode) as ui:\n", - " with gr.Row():\n", - " gr.Markdown(\"### FlightAI Chat with Translation\")\n", - "\n", - " with gr.Row():\n", - " lang_dropdown = gr.Dropdown(\n", - " choices=[\"Spanish\", \"French\", \"German\", \"Japanese\", \"Hindi\"],\n", - " value=\"Spanish\",\n", - " label=\"Translate To\"\n", - " )\n", - " \n", - " speak_dropdown = gr.Dropdown(\n", - " choices=[\"English\", \"Selected Language\"],\n", - " value=\"English\",\n", - " label=\"Speak out in\"\n", - " )\n", - " \n", - " with gr.Row():\n", - " chatbot = gr.Chatbot(height=500, type=\"messages\", label=\"Chat History\")\n", - " translated_chatbot = gr.Chatbot(height=500, type=\"messages\", label=\"Translated Chat\")\n", - " image_output = gr.Image(height=500)\n", - "\n", - " with gr.Row():\n", - " entry = gr.Textbox(label=\"Chat with our AI Assistant:\")\n", - " audio_input = gr.Audio(sources=\"microphone\", type=\"filepath\", label=\"Or speak to the assistant\")\n", - "\n", - " with gr.Row():\n", - " clear = gr.Button(\"Clear\")\n", - "\n", - " def do_entry(message, history, audio, translated_history, language):\n", - " if audio:\n", - " message = transcribe_audio(audio)\n", - "\n", - " if message:\n", - " history += [{\"role\": \"user\", \"content\": message}]\n", - " translated_history += [{\"role\": \"user\", \"content\": translate(message, language)}]\n", - " return \"\", history, None, translated_history\n", - "\n", - " entry.submit(\n", - " do_entry,\n", - " inputs=[entry, chatbot, audio_input, translated_chatbot, lang_dropdown],\n", - " outputs=[entry, chatbot, audio_input, translated_chatbot]\n", - " ).then(\n", - " chat,\n", - " inputs=[chatbot, lang_dropdown, translated_chatbot, speak_dropdown],\n", - " outputs=[chatbot, image_output, translated_chatbot]\n", - " )\n", - "\n", - " audio_input.change(\n", - " do_entry,\n", - " inputs=[entry, chatbot, audio_input, translated_chatbot, lang_dropdown],\n", - " outputs=[entry, chatbot, audio_input, translated_chatbot]\n", - " ).then(\n", - " chat,\n", - " inputs=[chatbot, lang_dropdown, translated_chatbot, speak_dropdown],\n", - " outputs=[chatbot, image_output, translated_chatbot]\n", - " )\n", - "\n", - " clear.click(lambda: [\"\", [], None, [], None], inputs=None, outputs=[entry, chatbot, audio_input, translated_chatbot, image_output], queue=False)\n", - "\n", - "ui.launch(inbrowser=True)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "58f97435-fa0d-45f7-b02f-4ac5f4901c53", - "metadata": {}, - "outputs": [], "source": [] } ], @@ -646,7 +43,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.6" + "version": "3.11.11" } }, "nbformat": 4, From 1ed52bfb81e275403a33aca051c241a5fcd0fddf Mon Sep 17 00:00:00 2001 From: gulsahdemiryurek <75502685+gulsahdemiryurek@users.noreply.github.com> Date: Thu, 10 Jul 2025 15:56:40 +0300 Subject: [PATCH 18/25] Add files via upload --- .../day1_check_source_for_security_vuln.ipynb | 156 ++++++++++++++++++ .../xss_vulnerable_example.html | 24 +++ 2 files changed, 180 insertions(+) create mode 100644 week1/community-contributions/day1_check_source_for_security_vuln.ipynb create mode 100644 week1/community-contributions/xss_vulnerable_example.html diff --git a/week1/community-contributions/day1_check_source_for_security_vuln.ipynb b/week1/community-contributions/day1_check_source_for_security_vuln.ipynb new file mode 100644 index 0000000..db99309 --- /dev/null +++ b/week1/community-contributions/day1_check_source_for_security_vuln.ipynb @@ -0,0 +1,156 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "e95fa36b-7118-4fd8-a3b2-b4424bda2178", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import requests\n", + "from dotenv import load_dotenv\n", + "from bs4 import BeautifulSoup\n", + "from IPython.display import Markdown, display\n", + "from openai import OpenAI\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a0356762-4a3f-437a-908e-192aa9c804c7", + "metadata": {}, + "outputs": [], + "source": [ + "load_dotenv(override=True)\n", + "api_key = os.getenv('OPENAI_API_KEY')\n", + "\n", + "# Check the key\n", + "\n", + "if not api_key:\n", + " print(\"No API key was found - please head over to the troubleshooting notebook in this folder to identify & fix!\")\n", + "elif not api_key.startswith(\"sk-proj-\"):\n", + " print(\"An API key was found, but it doesn't start sk-proj-; please check you're using the right key - see troubleshooting notebook\")\n", + "elif api_key.strip() != api_key:\n", + " print(\"An API key was found, but it looks like it might have space or tab characters at the start or end - please remove them - see troubleshooting notebook\")\n", + "else:\n", + " print(\"API key found and looks good so far!\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "eb747863-30bd-4a0b-b359-b37223884075", + "metadata": {}, + "outputs": [], + "source": [ + "openai = OpenAI()\n", + "message = \"Hello, GPT! This is my first ever message to you! Hi!\"\n", + "response = openai.chat.completions.create(model=\"gpt-4o-mini\", messages=[{\"role\":\"user\", \"content\":message}])\n", + "print(response.choices[0].message.content)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fae60901-3564-4f26-a812-fc16d3b95bdb", + "metadata": {}, + "outputs": [], + "source": [ + "def get_page_source(url):\n", + " response = requests.get(url)\n", + " response.raise_for_status() # Hata varsa bildirir\n", + " return response.text # Ham HTML metni döner\n", + "\n", + "system_prompt = \"You are an assistant analyzing the source of a website and checking for security vulnerabilities.\"\n", + "\n", + "def user_prompt_for(url):\n", + " user_prompt = \"Below is the HTML source of the website:\\n\\n\"\n", + " user_prompt += get_page_source(url) \n", + " user_prompt += \"\\n\\nPlease check this website and search for security vulnerabilities. \"\n", + " user_prompt += \"If you don't find any, print 'No vulnerability found.' \"\n", + " user_prompt += \"If you find a potential vulnerability risk, describe the vulnerability risk and print 'Potential Vulnerability Risk'.\"\n", + " user_prompt += \"If you find a direct, explicit vulnerability, describe the vulnerability and CVSS Score print 'ATTENTION! Vulnerability is Found.'\"\n", + " user_prompt += \"If you find both a potential vulnerability risk and a direct, explicit vulnerability, describe them and CVSS Score print 'ATTENTION! Potential Vulnerability Risk and Direct Vulnerability are Found!!'\"\n", + " return user_prompt\n", + "\n", + "def messages_for(url):\n", + " return [\n", + " {\"role\": \"system\", \"content\": system_prompt},\n", + " {\"role\": \"user\", \"content\": user_prompt_for(url)}\n", + " ]\n", + "\n", + "def check_vuln(url):\n", + " response = openai.chat.completions.create(\n", + " model = \"gpt-4o-mini\",\n", + " messages = messages_for(url)\n", + " )\n", + " return response.choices[0].message.content\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e048c27f-f659-4c92-a47c-679bf6e5bf5f", + "metadata": {}, + "outputs": [], + "source": [ + "def display_vuln(url):\n", + " display_vuln = check_vuln(url)\n", + " display(Markdown(display_vuln))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "69f5852f-ca5b-4933-b93c-e9f2d401467a", + "metadata": {}, + "outputs": [], + "source": [ + "display_vuln(\"https://edwarddonner.com\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "824943fc-e5a5-424a-abec-56767a709782", + "metadata": {}, + "outputs": [], + "source": [ + "display_vuln(\"http://192.168.1.113/\") #local apache server IP, contains xss_vulnerable_example.html" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e3543846-e0c6-4504-8b65-2f675f0f7ebe", + "metadata": {}, + "outputs": [], + "source": [ + "display_vuln(\"https://www.google.com\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.13" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/week1/community-contributions/xss_vulnerable_example.html b/week1/community-contributions/xss_vulnerable_example.html new file mode 100644 index 0000000..6e1056c --- /dev/null +++ b/week1/community-contributions/xss_vulnerable_example.html @@ -0,0 +1,24 @@ + + + + + XSS Vulnerability Example + + +

Leave a Comment

+
+ + +
+ +

Your Comment:

+

+ + + +

+ + \ No newline at end of file From 21fe10cc90347132afba9c72f201b5155014679d Mon Sep 17 00:00:00 2001 From: RalphMaa Date: Thu, 10 Jul 2025 13:45:04 -0400 Subject: [PATCH 19/25] Added my contributions to community-contributions --- .../day1_Project.ipynb | 189 ++++++++++++++++++ 1 file changed, 189 insertions(+) create mode 100644 week1/community-contributions/day1_Project.ipynb diff --git a/week1/community-contributions/day1_Project.ipynb b/week1/community-contributions/day1_Project.ipynb new file mode 100644 index 0000000..30e795c --- /dev/null +++ b/week1/community-contributions/day1_Project.ipynb @@ -0,0 +1,189 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "181edd2d-67d4-43e4-9a89-327eaff26177", + "metadata": {}, + "source": [ + "Grammar and Vocab AI Checker" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4be465e2-16fc-4b34-a771-d23f05edbc14", + "metadata": {}, + "outputs": [], + "source": [ + "pip install PyMuPDF" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "66b371fb-f4ea-4ced-8ad2-4229892e0647", + "metadata": {}, + "outputs": [], + "source": [ + "# imports\n", + "\n", + "import os\n", + "import requests\n", + "from dotenv import load_dotenv\n", + "from bs4 import BeautifulSoup\n", + "from IPython.display import Markdown, display\n", + "from openai import OpenAI\n", + "import fitz # PyMuPDF" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "41068273-4325-4de2-b11d-37d2831b1a47", + "metadata": {}, + "outputs": [], + "source": [ + "# Load environment variables in a file called .env\n", + "\n", + "load_dotenv(override=True)\n", + "api_key = os.getenv('OPENAI_API_KEY')\n", + "\n", + "# Check the key\n", + "\n", + "if not api_key:\n", + " print(\"No API key was found - please head over to the troubleshooting notebook in this folder to identify & fix!\")\n", + "elif not api_key.startswith(\"sk-proj-\"):\n", + " print(\"An API key was found, but it doesn't start sk-proj-; please check you're using the right key - see troubleshooting notebook\")\n", + "elif api_key.strip() != api_key:\n", + " print(\"An API key was found, but it looks like it might have space or tab characters at the start or end - please remove them - see troubleshooting notebook\")\n", + "else:\n", + " print(\"API key found and looks good so far!\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ba003970-0cc9-4e11-8702-0b120f378fa4", + "metadata": {}, + "outputs": [], + "source": [ + "openai = OpenAI()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "faa89067-fcee-4950-b4ce-3faec640c79b", + "metadata": {}, + "outputs": [], + "source": [ + "system_prompt = \"You are a spell, grammar, and vocabulary checker. You check for any mistakes in terms of spelling, grammar, and vocabulary of texts or files that are given to you. You provide a response with the percentage of the text that is correct in terms of spelling, vocab, and grammar but also the total number of words. These characters is in the file or text that you are checking, and provide instructions in bullet points on how to fix them and where the mistakes are.\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "de32a94d-9c1b-4e1a-a1b9-78d3180c0d79", + "metadata": {}, + "outputs": [], + "source": [ + "# user_prompt = \"Hi, mw namw is kkkdvin. How are y,?\" # Uncomment this to test the implementation" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "272f379d-3471-488d-ba27-bbffff961d72", + "metadata": {}, + "outputs": [], + "source": [ + "def extract_pdf_text_to_string(pdf_path):\n", + " \"\"\"\n", + " Extracts all text from a PDF file and returns it as a single string.\n", + "\n", + " Args:\n", + " pdf_path (str): The path to the PDF file.\n", + "\n", + " Returns:\n", + " str: A string containing all the extracted text from the PDF.\n", + " \"\"\"\n", + " text_content = \"\"\n", + " try:\n", + " doc = fitz.open(pdf_path)\n", + " for page_num in range(doc.page_count):\n", + " page = doc.load_page(page_num)\n", + " text_content += page.get_text()\n", + " doc.close()\n", + " except Exception as e:\n", + " print(f\"Error processing PDF: {e}\")\n", + " return None\n", + " return text_content\n", + "\n", + "pdf_file_path = \"gram-vocab-test.pdf\" # Replace with the actual path to your PDF\n", + "user_prompt = extract_pdf_text_to_string(pdf_file_path)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "07a839f6-c508-4b94-98ec-877c19023e58", + "metadata": {}, + "outputs": [], + "source": [ + "messages = [\n", + " {\"role\": \"system\", \"content\": system_prompt},\n", + " {\"role\": \"user\", \"content\": f\"This is the text to check for grammar, vocab, and spelling errors: {user_prompt}\"}\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a642cb62-9016-4957-a74e-9f97f8c495a7", + "metadata": {}, + "outputs": [], + "source": [ + "response = openai.chat.completions.create(model=\"gpt-4o-mini\", messages=messages)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2ce6b006-19b6-48b4-b344-b4b57b8c1438", + "metadata": {}, + "outputs": [], + "source": [ + "print(response.choices[0].message.content)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "54bc23cd-f59c-4b4d-bc3e-60f273692d92", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.13" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From cb4788a7254c2910a52b55fc492b9381a7de0305 Mon Sep 17 00:00:00 2001 From: habibmir808 Date: Fri, 11 Jul 2025 02:20:18 +0600 Subject: [PATCH 20/25] comment code for better understanding --- .../code_commentor.ipynb | 335 ++++++++++++++++++ 1 file changed, 335 insertions(+) create mode 100644 week4/community-contributions/code_commentor.ipynb diff --git a/week4/community-contributions/code_commentor.ipynb b/week4/community-contributions/code_commentor.ipynb new file mode 100644 index 0000000..3bf10a5 --- /dev/null +++ b/week4/community-contributions/code_commentor.ipynb @@ -0,0 +1,335 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "07bb451d-2b91-425f-b8ea-6f35ced780b0", + "metadata": {}, + "source": [ + "# AI Code Commenting Assistant \n", + "\n", + "## Project Summary \n", + "\n", + "**Purpose**: \n", + "An AI-powered assistant that automatically generates **clear, concise code comments** to improve code readability and maintainability. \n", + "\n", + "**Key Features**: \n", + "- **Language-Agnostic**: Auto-detects programming languages or allows manual specification \n", + "- **Smart Commenting**: Focuses on explaining **complex logic, algorithms, and edge cases** (not obvious syntax) \n", + "- **Customizable**: Optional focus areas let users prioritize specific parts (e.g., database queries, recursion) \n", + "- **Efficient Workflow**: Processes code in chunks and preserves original formatting \n", + "\n", + "**Benefits**: \n", + "✔ Saves time writing documentation \n", + "✔ Helps developers understand unfamiliar code \n", + "✔ Supports multiple languages (Python, JavaScript, C++, SQL, etc.) \n", + "✔ Avoids redundant comments on trivial operations \n", + "\n", + "**Example Use Case**: \n", + "```python \n", + "# Before AI: \n", + "def fib(n): \n", + " if n <= 1: return n \n", + " else: return fib(n-1) + fib(n-2) \n", + "\n", + "# After AI: \n", + "def fib(n): \n", + " # Recursively computes nth Fibonacci number (O(2^n) time) \n", + " if n <= 1: return n # Base case \n", + " else: return fib(n-1) + fib(n-2) # Recursive case " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a0413ae1-0348-4884-ba95-384c4c8f841c", + "metadata": {}, + "outputs": [], + "source": [ + "!pip install --upgrade huggingface_hub" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b22da766-042b-402f-9e05-78aa8f45ddd4", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import io\n", + "from dotenv import load_dotenv\n", + "from google import genai\n", + "from google.genai import types\n", + "from openai import OpenAI\n", + "from anthropic import Anthropic\n", + "from huggingface_hub import InferenceClient\n", + "import gradio as gr" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5af6d3de-bab6-475e-b2f3-7b788bb2e529", + "metadata": {}, + "outputs": [], + "source": [ + "# load environments\n", + "load_dotenv(override=True)\n", + "os.environ['ANTHROPIC_API_KEY'] = os.getenv(\"CLAUDE_API_KEY\")\n", + "os.environ[\"HF_TOKEN\"] = os.getenv(\"HF_TOKEN\")\n", + "gemini_api_key= os.getenv(\"GEMINI_API_KEY\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cad0755e-4174-4fbc-84e6-15cc54bc609a", + "metadata": {}, + "outputs": [], + "source": [ + "#initialize remote models\n", + "claude= Anthropic()\n", + "gemini = genai.Client(api_key=gemini_api_key)\n", + "\n", + "#opensource models\n", + "qwen = InferenceClient(provider=\"featherless-ai\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "31d75812-1cd3-4512-8446-022c3357c354", + "metadata": {}, + "outputs": [], + "source": [ + "#initialize local model\n", + "llama = OpenAI(base_url=\"http://localhost:11434/v1\", api_key=\"ollama\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "31316379-2a56-4707-b207-ea60b490f536", + "metadata": {}, + "outputs": [], + "source": [ + "#models\n", + "claude_model = \"claude-3-5-haiku-latest\"\n", + "gemini_model = \"gemini-2.5-pro\"\n", + "qwen_model= \"Qwen/Qwen2.5-Coder-32B-Instruct\"\n", + "llama_model = \"llama3:8b\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b7d9c4bf-0955-4406-8717-ffa7bdd0bec9", + "metadata": {}, + "outputs": [], + "source": [ + "system_message=\"\"\"\n", + "You are an expert AI specialized in code documentation. Your task is to generate concise, meaningful comments that explain the purpose and logic of provided code. Follow these rules:\n", + "\n", + "1. **Infer language**: Auto-detect programming language and use appropriate comment syntax\n", + "2. **Explain why, not what**: Focus on purpose, edge cases, and non-obvious logic\n", + "3. **Be concise**: Maximum 1-2 sentences per comment block\n", + "4. **Prioritize key sections**: Only comment complex logic, algorithms, or critical operations\n", + "5. **Maintain structure**: Preserve original code formatting and indentation\n", + "6. **Output format**: Return ONLY commented code with no additional text\n", + "\n", + "Commenting guidelines by language:\n", + "- Python: `# Inline comments` and `\"\"Docstrings\"\"`\n", + "- JavaScript/Java: `// Line comments` and `/* Block comments */`\n", + "- C/C++: `//` and `/* */`\n", + "- SQL: `-- Line comments`\n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "79dfe110-1523-40c7-ad90-2787ed22fd8d", + "metadata": {}, + "outputs": [], + "source": [ + "def user_prompt(code):\n", + " prompt = f\"\"\"\n", + " i want to document my code for better understanding. Please generate meaningful necessary comments\n", + " here is my code:\n", + " {code}\n", + "\n", + " Return ONLY commented code with no additional text\n", + " \"\"\"\n", + "\n", + " return prompt" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c7bcf29e-ec78-4cfd-9b41-f2dc86400435", + "metadata": {}, + "outputs": [], + "source": [ + "def conversation_template(code):\n", + " messages = [\n", + " {\"role\":\"system\", \"content\":system_message},\n", + " {\"role\":\"user\",\"content\":user_prompt(code)}\n", + " ]\n", + " return messages" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a36fec0f-7eba-4ccd-8fc4-cbf5ade76fa2", + "metadata": {}, + "outputs": [], + "source": [ + "def stream_gemini(code):\n", + " message = user_prompt(code)\n", + " response = gemini.models.generate_content_stream(\n", + " model=gemini_model,\n", + " config= types.GenerateContentConfig(\n", + " system_instruction = system_message,\n", + " temperature = 0.8,\n", + " ),\n", + " contents = [message]\n", + " )\n", + "\n", + " result = \"\"\n", + " for chunk in response:\n", + " result += chunk.text or \"\"\n", + " yield result" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e5d1e0c0-dc88-43ee-8698-82ad9ce7c51b", + "metadata": {}, + "outputs": [], + "source": [ + "def stream_claude(code):\n", + " messages = [{\"role\":\"user\",\"content\":user_prompt(code)}]\n", + " response = claude.messages.stream(\n", + " model= claude_model,\n", + " temperature=0.8,\n", + " messages = messages,\n", + " max_tokens=5000\n", + " )\n", + "\n", + " result = \"\"\n", + " with response as stream:\n", + " for text in stream.text_stream:\n", + " result += text or \"\"\n", + " yield result\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "903c97e5-9170-449e-8a0f-9f906351ec45", + "metadata": {}, + "outputs": [], + "source": [ + "def stream_opensource(code,model):\n", + " model = model.lower()\n", + " client = globals()[model]\n", + " model = globals()[f\"{model}_model\"]\n", + " stream = client.chat.completions.create(\n", + " model = model,\n", + " messages= conversation_template(code),\n", + " temperature = 0.7,\n", + " stream = True\n", + " )\n", + "\n", + " result = \"\"\n", + " for chunk in stream:\n", + " result += chunk.choices[0].delta.content or \"\"\n", + " yield result" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ff051c22-a2f8-4153-b970-f8a466a4cf5a", + "metadata": {}, + "outputs": [], + "source": [ + "def commentor(code, model):\n", + " model =model.lower()\n", + " if model == \"claude\":\n", + " result = stream_claude(code)\n", + " elif model == \"gemini\":\n", + " result = stream_gemini(code)\n", + " elif model == \"qwen\" or model == \"llama\":\n", + " result = stream_opensource(code, model)\n", + "\n", + "\n", + " for code in result:\n", + " yield code.replace(\"```cpp\\n\",\"\").replace(\"```python\\n\",\"\").replace(\"```javascript\\n\",\"\").replace(\"```typescript\\n\",\"\").replace(\"```\",\"\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "10daf070-3546-4073-a2a0-3f5f8fc156f0", + "metadata": {}, + "outputs": [], + "source": [ + "with gr.Blocks() as ui:\n", + " gr.Markdown(\"# Genarate comment\")\n", + " with gr.Row():\n", + " raw_code = gr.Textbox(label=\"Raw Code:\", lines=10)\n", + " commented_code = gr.Textbox(label=\"Commented_code\",lines=10)\n", + " with gr.Row():\n", + " models = gr.Dropdown([\"Gemini\",\"Claude\",\"Llama\",\"Qwen\"], value=\"Gemini\")\n", + " with gr.Row():\n", + " generate_comment = gr.Button(\"Generate Comment\")\n", + "\n", + " generate_comment.click(commentor, inputs=[raw_code, models], outputs=[commented_code])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "afb87f32-f25e-40c5-844a-d2b7af748192", + "metadata": {}, + "outputs": [], + "source": [ + "ui.launch(inbrowser=True,debug=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "96bc48ad-10ad-4821-b58e-ea1b22cdcdc9", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.13" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From 1bc122939545a403a2ad1bb71e1e4045fb4d72c5 Mon Sep 17 00:00:00 2001 From: Zhufeng-Qiu Date: Thu, 10 Jul 2025 16:47:53 -0700 Subject: [PATCH 21/25] Add the community contribution for Week3/4/5 --- .../Week3_Exercise_Data_Generator.ipynb | 551 ++++++++++++ ..._Meeting_Minutes_product_with_Gradio.ipynb | 523 +++++++++++ ...tween_thirteen_lang_coment_unit_test.ipynb | 841 ++++++++++++++++++ 3 files changed, 1915 insertions(+) create mode 100644 week3/community-contributions/Week3_Exercise_Data_Generator.ipynb create mode 100644 week3/community-contributions/Week_3_Day_5_Meeting_Minutes_product_with_Gradio.ipynb create mode 100644 week4/community-contributions/Week4_Exercise_convert_between_thirteen_lang_coment_unit_test.ipynb diff --git a/week3/community-contributions/Week3_Exercise_Data_Generator.ipynb b/week3/community-contributions/Week3_Exercise_Data_Generator.ipynb new file mode 100644 index 0000000..583010c --- /dev/null +++ b/week3/community-contributions/Week3_Exercise_Data_Generator.ipynb @@ -0,0 +1,551 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "GD5Omr5EfWgb" + }, + "source": [ + "# Date Generator\n", + "\n", + "generate synthetic data when given scheme, business problem description, model, number of records, file name, file type, and environment\n", + "\n", + "# Available models\n", + " Model API:\n", + "\n", + " 1. gpt-4o-mini\n", + " 2. claude-3-haiku-20240307\n", + " 3. gemini-2.0-flash\n", + " 4. deepseek-chat\"\n", + "\n", + " HuggingFace API:\n", + "\n", + " 5. meta-llama/Meta-Llama-3.1-8B-Instruct\n", + "\n", + "\n", + "# Available environment\n", + "\n", + "Colab: set up HF token and API keys in Colab secret section\n", + "\n", + "Local: set up HF token and API keys in .env file\n", + "\n", + "\n", + "\n", + "### *** This project is developed based on the idea of 'week3/community-contributuins/Week3-Dataset_Generator-DP'. Really appreciate it! Then, the project is improved to run both on Colab or locally, and integrate HuggingFace API" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "4FiCnE0MmU56" + }, + "outputs": [], + "source": [ + "!pip install -q --upgrade torch==2.5.1+cu124 torchvision==0.20.1+cu124 torchaudio==2.5.1+cu124 --index-url https://download.pytorch.org/whl/cu124\n", + "!pip install -q requests bitsandbytes==0.46.0 transformers==4.48.3 accelerate==1.3.0\n", + "!pip install anthropic dotenv pyarrow" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "JeyKw5guoH3r" + }, + "outputs": [], + "source": [ + "# imports\n", + "\n", + "import os\n", + "import requests\n", + "from IPython.display import Markdown, display, update_display\n", + "from openai import OpenAI\n", + "from huggingface_hub import login\n", + "from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer, BitsAndBytesConfig\n", + "from bs4 import BeautifulSoup\n", + "from typing import List\n", + "import google.generativeai\n", + "import anthropic\n", + "from itertools import chain\n", + "from dotenv import load_dotenv\n", + "import gradio as gr\n", + "import json\n", + "import pandas as pd\n", + "import random\n", + "import re\n", + "import subprocess\n", + "import pyarrow as pa\n", + "import torch\n", + "import gc" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "7UyjFdRZoIAS" + }, + "outputs": [], + "source": [ + "# --- Schema Definition ---\n", + "SCHEMA = [\n", + " (\"Name\", \"TEXT\", '\"Northern Cafe\"'),\n", + " (\"Location\", \"TEXT\", '\"2904 S Figueroa St, Los Angeles, CA 90007\"'),\n", + " (\"Type\", \"TEXT\", 'One of [\"Chinese\",\"Mexico\",\"French\",\"Korean\",\"Italy\"] or other potential types'),\n", + " (\"Average Price\", \"TEXT\", '\"$30\", or \"--\" if unkown'),\n", + " (\"History/Age\", \"INT\", 'integer age of resturant, e.g., 7'),\n", + " (\"Menu\", \"Array\", '[\"Beef Noodle\", \"Fried Rice\", \"Dumpling\", ...]'),\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "jXcTQATLoICV" + }, + "outputs": [], + "source": [ + "# Default schema text for the textbox\n", + "DEFAULT_SCHEMA_TEXT = \"\\n\".join([f\"{i+1}. {col[0]} ({col[1]}) Example: {col[2]}\" for i, col in enumerate(SCHEMA)])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "4Irf5JV3oIEe" + }, + "outputs": [], + "source": [ + "# Available models\n", + "MODELS = [\n", + " \"gpt-4o-mini\",\n", + " \"claude-3-haiku-20240307\",\n", + " \"gemini-2.0-flash\",\n", + " \"deepseek-chat\",\n", + " \"meta-llama/Meta-Llama-3.1-8B-Instruct\"\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "JJ6r2SH9oIGf" + }, + "outputs": [], + "source": [ + "# Available file formats\n", + "FILE_FORMATS = [\".csv\", \".tsv\", \".jsonl\", \".parquet\", \".arrow\"]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "B98j45E3vq5g" + }, + "outputs": [], + "source": [ + "system_prompt = \"\"\"You are a helpful assistant whose main purpose is to generate datasets for a given business problem based on given schema.\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "lsX16cWfwf6x" + }, + "outputs": [], + "source": [ + "def get_env_info(env):\n", + " try:\n", + " global hf_token, openai_api_key, anthropic_api_key, google_api_key, deepseek_api_key\n", + " if env == \"Colab\":\n", + " # Colab environment\n", + " from google.colab import drive\n", + " from google.colab import userdata\n", + " hf_token = userdata.get('HF_TOKEN')\n", + " openai_api_key = userdata.get('OPENAI_API_KEY')\n", + " anthropic_api_key = userdata.get('ANTHROPIC_API_KEY')\n", + " google_api_key = userdata.get('GOOGLE_API_KEY')\n", + " deepseek_api_key = userdata.get('DEEPSEEK_API_KEY')\n", + " elif env == \"Local\":\n", + " # Local environment\n", + " load_dotenv(override=True)\n", + " hf_token = os.getenv('HF_TOKEN')\n", + " openai_api_key = os.getenv('OPENAI_API_KEY')\n", + " anthropic_api_key = os.getenv('ANTHROPIC_API_KEY')\n", + " google_api_key = os.getenv('GOOGLE_API_KEY')\n", + " deepseek_api_key = os.getenv('DEEPSEEK_API_KEY')\n", + " except Exception as e:\n", + " raise Exception(f\"Please check your environment: {str(e)}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "2gLUFAwGv29Q" + }, + "outputs": [], + "source": [ + "def get_prompt(schema_text, business_problem, nr_records):\n", + " prompt = f\"\"\"\n", + " The problem is: {business_problem}\n", + "\n", + " Generate {nr_records} rows data in JSONL format, each line a JSON object with the following fields:\n", + "\n", + " {schema_text}\n", + "\n", + " Do NOT repeat column values from one row to another.\n", + "\n", + " Only output valid JSONL.\n", + " \"\"\"\n", + " return prompt.strip()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "YZe1FVH8wf84" + }, + "outputs": [], + "source": [ + "# --- LLM Interface ---\n", + "def query(user_prompt, model):\n", + " try:\n", + " if \"gpt\" in model.lower():\n", + " client = OpenAI(api_key=openai_api_key)\n", + " messages = [\n", + " {\"role\": \"system\", \"content\": system_prompt},\n", + " {\"role\": \"user\", \"content\": user_prompt}\n", + " ]\n", + " response = client.chat.completions.create(\n", + " model=model,\n", + " messages=messages,\n", + " temperature=0.7\n", + " )\n", + " content = response.choices[0].message.content\n", + "\n", + " elif \"claude\" in model.lower():\n", + " client = anthropic.Anthropic(api_key=anthropic_api_key)\n", + " response = client.messages.create(\n", + " model=model,\n", + " messages=[{\"role\": \"user\", \"content\": user_prompt}],\n", + " max_tokens=4000,\n", + " temperature=0.7,\n", + " system=system_prompt\n", + " )\n", + " content = response.content[0].text\n", + " elif \"gemini\" in model.lower():\n", + " client = OpenAI(\n", + " api_key=google_api_key,\n", + " base_url=\"https://generativelanguage.googleapis.com/v1beta/openai/\"\n", + " )\n", + " messages = [\n", + " {\"role\": \"system\", \"content\": system_prompt},\n", + " {\"role\": \"user\", \"content\": user_prompt}\n", + " ]\n", + " response = client.chat.completions.create(\n", + " model=model,\n", + " messages=messages,\n", + " temperature=0.7\n", + " )\n", + " content = response.choices[0].message.content\n", + "\n", + " elif \"deepseek\" in model.lower():\n", + " client = OpenAI(\n", + " api_key=deepseek_api_key,\n", + " base_url=\"https://api.deepseek.com\"\n", + " )\n", + " messages = [\n", + " {\"role\": \"system\", \"content\": system_prompt},\n", + " {\"role\": \"user\", \"content\": user_prompt}\n", + " ]\n", + " response = client.chat.completions.create(\n", + " model=model,\n", + " messages=messages,\n", + " temperature=0.7\n", + " )\n", + " content = response.choices[0].message.content\n", + "\n", + " elif \"llama\" in model.lower():\n", + " global tokenizer, inputs, llama_model, outputs\n", + " messages = [\n", + " {\"role\": \"system\", \"content\": system_prompt},\n", + " {\"role\": \"user\", \"content\": user_prompt}\n", + " ]\n", + "\n", + " login(hf_token, add_to_git_credential=True)\n", + " quant_config = BitsAndBytesConfig(\n", + " load_in_4bit=True,\n", + " bnb_4bit_use_double_quant=True,\n", + " bnb_4bit_compute_dtype=torch.bfloat16,\n", + " bnb_4bit_quant_type=\"nf4\"\n", + " )\n", + "\n", + " tokenizer = AutoTokenizer.from_pretrained(model, trust_remote_code=True)\n", + " tokenizer.pad_token = tokenizer.eos_token\n", + " inputs = tokenizer.apply_chat_template(messages, return_tensors=\"pt\").to(\"cuda\")\n", + " if llama_model == None:\n", + " llama_model = AutoModelForCausalLM.from_pretrained(model, device_map=\"auto\", quantization_config=quant_config)\n", + " outputs = llama_model.generate(inputs, max_new_tokens=4000)\n", + "\n", + " _, _, after = tokenizer.decode(outputs[0]).partition(\"assistant<|end_header_id|>\")\n", + " content = after.strip()\n", + " else:\n", + " raise ValueError(f\"Unsupported model. Use one of {MODELS}\")\n", + "\n", + " # Parse JSONL output\n", + " lines = [line.strip() for line in content.strip().splitlines() if line.strip().startswith(\"{\")]\n", + " return [json.loads(line) for line in lines]\n", + "\n", + " except Exception as e:\n", + " raise Exception(f\"Model query failed: {str(e)}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "4WUj-XqM5IYT" + }, + "outputs": [], + "source": [ + "# --- Output Formatter ---\n", + "def save_dataset(records, file_format, filename):\n", + " df = pd.DataFrame(records)\n", + " if file_format == \".csv\":\n", + " df.to_csv(filename, index=False)\n", + " elif file_format == \".tsv\":\n", + " df.to_csv(filename, sep=\"\\t\", index=False)\n", + " elif file_format == \".jsonl\":\n", + " with open(filename, \"w\") as f:\n", + " for record in records:\n", + " f.write(json.dumps(record) + \"\\n\")\n", + " elif file_format == \".parquet\":\n", + " df.to_parquet(filename, engine=\"pyarrow\", index=False)\n", + " elif file_format == \".arrow\":\n", + " table = pa.Table.from_pandas(df)\n", + " with pa.OSFile(filename, \"wb\") as sink:\n", + " with pa.ipc.new_file(sink, table.schema) as writer:\n", + " writer.write(table)\n", + " else:\n", + " raise ValueError(\"Unsupported file format\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "WenbNqrpwf-_" + }, + "outputs": [], + "source": [ + "# --- Main Generation Function ---\n", + "def generate_dataset(schema_text, business_problem, model, nr_records, file_format, save_as, env):\n", + " try:\n", + " # Validation\n", + " if nr_records <= 10:\n", + " return \"❌ Error: Number of records must be greater than 10.\", None\n", + " if nr_records > 1000:\n", + " return \"❌ Error: Number of records must be less than or equal to 1000.\", None\n", + "\n", + " if file_format not in FILE_FORMATS:\n", + " return \"❌ Error: Invalid file format.\", None\n", + "\n", + " if not (save_as or save_as.strip() == \"\"):\n", + " save_as = f\"default{file_format}\"\n", + " elif not save_as.endswith(file_format):\n", + " save_as = save_as + file_format\n", + "\n", + " # Load env\n", + " get_env_info(env)\n", + "\n", + " # Generate prompt\n", + " user_prompt = get_prompt(schema_text, business_problem, nr_records)\n", + "\n", + " # Query model\n", + " records = query(user_prompt, model)\n", + "\n", + " if not records:\n", + " return \"❌ Error: No valid records generated from the model.\", None\n", + "\n", + " # Save dataset\n", + " save_dataset(records, file_format, save_as)\n", + "\n", + " # Create preview\n", + " df = pd.DataFrame(records)\n", + " preview = df.head(10) # Show first 10 rows\n", + "\n", + " success_message = f\"✅ Generated {len(records)} records successfully!\\n📁 Saved to: {save_as}\\n📊 \"\n", + "\n", + " return success_message, preview\n", + "\n", + " except Exception as e:\n", + " return f\"❌ Error: {str(e)}\", None" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "pHiP8ky8wgEb" + }, + "outputs": [], + "source": [ + "# --- Gradio Interface ---\n", + "\n", + "with gr.Blocks(title=\"Dataset Generator\", theme=gr.themes.Citrus()) as interface:\n", + " hf_token = None\n", + " openai_api_key = None\n", + " anthropic_api_key = None\n", + " google_api_key = None\n", + " deepseek_api_key = None\n", + " tokenizer = None\n", + " inputs = None\n", + " llama_model = None\n", + " outputs = None\n", + "\n", + " gr.Markdown(\"# Dataset Generator\")\n", + " gr.Markdown(\"Generate synthetic datasets using AI models\")\n", + "\n", + " with gr.Row():\n", + " with gr.Column(scale=2):\n", + " schema_input = gr.Textbox(\n", + " label=\"Schema\",\n", + " value=DEFAULT_SCHEMA_TEXT,\n", + " lines=15,\n", + " placeholder=\"Define your dataset schema here... Please follow this format: Field_Name, Field_Type, Field Example\"\n", + " )\n", + "\n", + " business_problem_input = gr.Textbox(\n", + " label=\"Business Problem\",\n", + " value=\"I want to generate restuant records\",\n", + " lines=1,\n", + " placeholder=\"Enter business problem desciption for the model...\"\n", + " )\n", + "\n", + " with gr.Row():\n", + " model_dropdown = gr.Dropdown(\n", + " label=\"Model\",\n", + " choices=MODELS,\n", + " value=MODELS[0],\n", + " interactive=True\n", + " )\n", + "\n", + " nr_records_input = gr.Number(\n", + " label=\"Number of records\",\n", + " value=27,\n", + " minimum=11,\n", + " maximum=1000,\n", + " step=1\n", + " )\n", + "\n", + " with gr.Row():\n", + " save_as_input = gr.Textbox(\n", + " label=\"Save as\",\n", + " value=\"restaurant_dataset\",\n", + " placeholder=\"Enter filename (extension will be added automatically)\"\n", + " )\n", + "\n", + " file_format_dropdown = gr.Dropdown(\n", + " label=\"File format\",\n", + " choices=FILE_FORMATS,\n", + " value=FILE_FORMATS[0],\n", + " interactive=True\n", + " )\n", + "\n", + " env_dropdown = gr.Dropdown(\n", + " label=\"Environment\",\n", + " choices=[\"Colab\", \"Local\"],\n", + " value=\"Colab\",\n", + " interactive=True\n", + " )\n", + "\n", + "\n", + "\n", + " generate_btn = gr.Button(\"🚀 Generate\", variant=\"secondary\", size=\"lg\")\n", + "\n", + " with gr.Column(scale=1):\n", + " output_status = gr.Textbox(\n", + " label=\"Status\",\n", + " lines=4,\n", + " interactive=False\n", + " )\n", + "\n", + " output_preview = gr.Dataframe(\n", + " label=\"Preview (First 10 rows)\",\n", + " interactive=False,\n", + " wrap=True\n", + " )\n", + "\n", + " # Connect the generate button\n", + " generate_btn.click(\n", + " fn=generate_dataset,\n", + " inputs=[\n", + " schema_input,\n", + " business_problem_input,\n", + " model_dropdown,\n", + " nr_records_input,\n", + " file_format_dropdown,\n", + " save_as_input,\n", + " env_dropdown\n", + " ],\n", + " outputs=[output_status, output_preview]\n", + " )\n", + "\n", + " gr.Markdown(\"\"\"\n", + " ### 📝 Instructions:\n", + " 1. **Schema**: Define the structure of your dataset (pre-filled with restaurant schema)\n", + " 2. **Business problem**: User prompt to guide the AI model\n", + " 3. **Model**: Choose between GPT, Claude, Gemini, DeepSeek or Llama models\n", + " 4. **Number of records**: Number of records to generate (minimum 11)\n", + " 5. **File format**: Choose output format (.csv, .tsv, .jsonl, .parquet, .arrow)\n", + " 6. **Save as**: Filename (extension added automatically)\n", + " 7. Click **Generate** to create your dataset\n", + "\n", + " ### 🔧 Requirements:\n", + " - For local mode, set up HF token and API keys in `.env` file (`OPENAI_API_KEY`, `ANTHROPIC_API_KEY`, `GOOGLE_API_KEY`, `DEEPSEEK_API_KEY`, `HF_TOKEN`)\n", + " - For colab mode, set up HF token and API keys in Colab secret section (`OPENAI_API_KEY`, `ANTHROPIC_API_KEY`, `GOOGLE_API_KEY`, `DEEPSEEK_API_KEY`, `HF_TOKEN`)\n", + " \"\"\")\n", + "\n", + "interface.launch(debug=True)\n", + "\n", + "del tokenizer, inputs, llama_model, outputs\n", + "gc.collect()\n", + "torch.cuda.empty_cache()" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "gpuType": "T4", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.13" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/week3/community-contributions/Week_3_Day_5_Meeting_Minutes_product_with_Gradio.ipynb b/week3/community-contributions/Week_3_Day_5_Meeting_Minutes_product_with_Gradio.ipynb new file mode 100644 index 0000000..3428e62 --- /dev/null +++ b/week3/community-contributions/Week_3_Day_5_Meeting_Minutes_product_with_Gradio.ipynb @@ -0,0 +1,523 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "It89APiAtTUF" + }, + "source": [ + "# Create meeting minutes from an Audio file\n", + "\n", + "I downloaded some Denver City Council meeting minutes and selected a portion of the meeting for us to transcribe. You can download it here: \n", + "https://drive.google.com/file/d/1N_kpSojRR5RYzupz6nqM8hMSoEF_R7pU/view?usp=sharing\n", + "\n", + "If you'd rather work with the original data, the HuggingFace dataset is [here](https://huggingface.co/datasets/huuuyeah/meetingbank) and the audio can be downloaded [here](https://huggingface.co/datasets/huuuyeah/MeetingBank_Audio/tree/main).\n", + "\n", + "The goal of this product is to use the Audio to generate meeting minutes, including actions.\n", + "\n", + "For this project, you can either use the Denver meeting minutes, or you can record something of your own!\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "sJPSCwPX3MOV" + }, + "source": [ + "## Again - please note: 2 important pro-tips for using Colab:\n", + "\n", + "**Pro-tip 1:**\n", + "\n", + "The top of every colab has some pip installs. You may receive errors from pip when you run this, such as:\n", + "\n", + "> gcsfs 2025.3.2 requires fsspec==2025.3.2, but you have fsspec 2025.3.0 which is incompatible.\n", + "\n", + "These pip compatibility errors can be safely ignored; and while it's tempting to try to fix them by changing version numbers, that will actually introduce real problems!\n", + "\n", + "**Pro-tip 2:**\n", + "\n", + "In the middle of running a Colab, you might get an error like this:\n", + "\n", + "> Runtime error: CUDA is required but not available for bitsandbytes. Please consider installing [...]\n", + "\n", + "This is a super-misleading error message! Please don't try changing versions of packages...\n", + "\n", + "This actually happens because Google has switched out your Colab runtime, perhaps because Google Colab was too busy. The solution is:\n", + "\n", + "1. Kernel menu >> Disconnect and delete runtime\n", + "2. Reload the colab from fresh and Edit menu >> Clear All Outputs\n", + "3. Connect to a new T4 using the button at the top right\n", + "4. Select \"View resources\" from the menu on the top right to confirm you have a GPU\n", + "5. Rerun the cells in the colab, from the top down, starting with the pip installs\n", + "\n", + "And all should work great - otherwise, ask me!" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "f2vvgnFpHpID" + }, + "outputs": [], + "source": [ + "!pip install -q --upgrade torch==2.5.1+cu124 torchvision==0.20.1+cu124 torchaudio==2.5.1+cu124 --index-url https://download.pytorch.org/whl/cu124\n", + "!pip install -q requests bitsandbytes==0.46.0 transformers==4.48.3 accelerate==1.3.0 openai" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "FW8nl3XRFrz0" + }, + "outputs": [], + "source": [ + "# imports\n", + "\n", + "import os\n", + "import requests\n", + "from IPython.display import Markdown, display, update_display\n", + "from openai import OpenAI\n", + "from google.colab import drive\n", + "from huggingface_hub import login\n", + "from google.colab import userdata\n", + "from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer, BitsAndBytesConfig\n", + "import torch" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "q3D1_T0uG_Qh" + }, + "outputs": [], + "source": [ + "# Constants\n", + "\n", + "AUDIO_MODEL = \"whisper-1\"\n", + "LLAMA = \"meta-llama/Meta-Llama-3.1-8B-Instruct\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Es9GkQ0FGCMt" + }, + "outputs": [], + "source": [ + "# New capability - connect this Colab to my Google Drive\n", + "# See immediately below this for instructions to obtain denver_extract.mp3\n", + "\n", + "drive.mount(\"/content/drive\")\n", + "audio_filename = \"/content/drive/MyDrive/llms/denver_extract.mp3\"" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "HTl3mcjyzIEE" + }, + "source": [ + "# Download denver_extract.mp3\n", + "\n", + "You can either use the same file as me, the extract from Denver city council minutes, or you can try your own..\n", + "\n", + "If you want to use the same as me, then please download my extract here, and put this on your Google Drive: \n", + "https://drive.google.com/file/d/1N_kpSojRR5RYzupz6nqM8hMSoEF_R7pU/view?usp=sharing\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "xYW8kQYtF-3L" + }, + "outputs": [], + "source": [ + "# Sign in to HuggingFace Hub\n", + "\n", + "hf_token = userdata.get('HF_TOKEN')\n", + "login(hf_token, add_to_git_credential=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "qP6OB2OeGC2C" + }, + "outputs": [], + "source": [ + "# Sign in to OpenAI using Secrets in Colab\n", + "\n", + "openai_api_key = userdata.get('OPENAI_API_KEY')\n", + "openai = OpenAI(api_key=openai_api_key)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "GMShdVGlGGr4" + }, + "outputs": [], + "source": [ + "# Use the Whisper OpenAI model to convert the Audio to Text\n", + "# If you'd prefer to use an Open Source model, class student Youssef has contributed an open source version\n", + "# which I've added to the bottom of this colab\n", + "\n", + "audio_file = open(audio_filename, \"rb\")\n", + "transcription = openai.audio.transcriptions.create(model=AUDIO_MODEL, file=audio_file, response_format=\"text\")\n", + "print(transcription)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "piEMmcSfMH-O" + }, + "outputs": [], + "source": [ + "system_message = \"You are an assistant that produces minutes of meetings from transcripts, with summary, key discussion points, takeaways and action items with owners, in markdown.\"\n", + "user_prompt = f\"Below is an extract transcript of a Denver council meeting. Please write minutes in markdown, including a summary with attendees, location and date; discussion points; takeaways; and action items with owners.\\n{transcription}\"\n", + "\n", + "messages = [\n", + " {\"role\": \"system\", \"content\": system_message},\n", + " {\"role\": \"user\", \"content\": user_prompt}\n", + " ]\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "UcRKUgcxMew6" + }, + "outputs": [], + "source": [ + "quant_config = BitsAndBytesConfig(\n", + " load_in_4bit=True,\n", + " bnb_4bit_use_double_quant=True,\n", + " bnb_4bit_compute_dtype=torch.bfloat16,\n", + " bnb_4bit_quant_type=\"nf4\"\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "6CujZRAgMimy" + }, + "outputs": [], + "source": [ + "tokenizer = AutoTokenizer.from_pretrained(LLAMA)\n", + "tokenizer.pad_token = tokenizer.eos_token\n", + "# inputs = tokenizer.apply_chat_template(messages, return_tensors=\"pt\").to(\"cuda\")\n", + "streamer = TextStreamer(tokenizer)\n", + "model = AutoModelForCausalLM.from_pretrained(LLAMA, device_map=\"auto\", quantization_config=quant_config, trust_remote_code=True)\n", + "# outputs = model.generate(inputs, max_new_tokens=2000, streamer=streamer)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "MaLNmJ5PSqcH" + }, + "outputs": [], + "source": [ + "inputs = tokenizer.apply_chat_template(messages, return_tensors=\"pt\").to(\"cuda\")\n", + "outputs = model.generate(inputs, max_new_tokens=2000, streamer=streamer)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "102tdU_3Peam" + }, + "outputs": [], + "source": [ + "response = tokenizer.decode(outputs[0])\n", + "response" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "KlomN6CwMdoN" + }, + "outputs": [], + "source": [ + "display(Markdown(response))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "0jZElVOMSPAr" + }, + "source": [ + "Day5 exercise - Gradio UI for meeting minutes" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "5iiYYxQMHf0i" + }, + "outputs": [], + "source": [ + "import gradio as gr\n", + "import tempfile\n", + "import soundfile as sf" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "aGwXW7BjPcTM" + }, + "outputs": [], + "source": [ + "# !pip install pydub\n", + "# !apt-get install ffmpeg\n", + "\n", + "from pydub import AudioSegment" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "RNu-reHuCYj_" + }, + "outputs": [], + "source": [ + "# Make sure that the tokenizeer and model is already generated\n", + "\n", + "# tokenizer = AutoTokenizer.from_pretrained(LLAMA)\n", + "# tokenizer.pad_token = tokenizer.eos_token\n", + "# streamer = TextStreamer(tokenizer)\n", + "# model = AutoModelForCausalLM.from_pretrained(LLAMA, device_map=\"auto\", quantization_config=quant_config)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "KOuoH0YOPruE" + }, + "outputs": [], + "source": [ + "# def save_as_mp3(audio_np):\n", + "# sr, data = audio_np\n", + "# # Convert float32 or int16 to PCM wav and then mp3\n", + "# wav_path = tempfile.NamedTemporaryFile(suffix=\".wav\", delete=False).name\n", + "# mp3_path = tempfile.NamedTemporaryFile(suffix=\".mp3\", delete=False).name\n", + "\n", + "# sf.write(wav_path, data, sr)\n", + "# audio_segment = AudioSegment.from_wav(wav_path)\n", + "# audio_segment.export(mp3_path, format=\"mp3\", bitrate=\"64k\") # Low bitrate = small file\n", + "# return mp3_path" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "toBIPBJoSNw0" + }, + "outputs": [], + "source": [ + "# Handles audio input as numpy array and returns updated chat history\n", + "def speak_send(audio_np):\n", + "\n", + " # If use numpy as input: audio_input = gr.Audio(sources=\"upload\", type=\"numpy\", label=\"Upload audio file to generate meeting minutes\")\n", + " # mp3_path = save_as_mp3(audio_np)\n", + "\n", + " # with open(mp3_path, \"rb\") as audio_file:\n", + " # transcription = openai.audio.transcriptions.create(\n", + " # model=AUDIO_MODEL,\n", + " # file=audio_file,\n", + " # response_format=\"text\"\n", + " # )\n", + "\n", + " audio = AudioSegment.from_file(audio_np)\n", + " with tempfile.NamedTemporaryFile(suffix=\".mp3\", delete=False) as tmpfile:\n", + " audio.export(tmpfile.name, format=\"mp3\")\n", + " with open(tmpfile.name, \"rb\") as file:\n", + " transcript = openai.audio.transcriptions.create(\n", + " model=AUDIO_MODEL,\n", + " file=file,\n", + " response_format=\"text\"\n", + " )\n", + "\n", + " system_message = \"You are an assistant that produces minutes of meetings from transcripts, with summary, key discussion points, takeaways and action items with owners, in markdown.\"\n", + " user_prompt = f\"Below is an extract transcript of a Denver council meeting. Please write minutes in markdown, including a summary with attendees, location and date; discussion points; takeaways; and action items with owners.\\n{transcription}\"\n", + "\n", + " messages = [\n", + " {\"role\": \"system\", \"content\": system_message},\n", + " {\"role\": \"user\", \"content\": user_prompt}\n", + " ]\n", + "\n", + " inputs = tokenizer.apply_chat_template(messages, return_tensors=\"pt\").to(\"cuda\")\n", + " outputs = model.generate(inputs, max_new_tokens=2000)\n", + "\n", + " _, _, after = tokenizer.decode(outputs[0]).partition(\"assistant<|end_header_id|>\")\n", + " return after.strip()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "xXJfabpDSN5R" + }, + "outputs": [], + "source": [ + "with gr.Blocks() as demo:\n", + "\n", + " with gr.Row():\n", + " audio_input = gr.Audio(sources=\"upload\", type=\"filepath\", label=\"Upload audio file to generate meeting minutes\")\n", + " with gr.Row():\n", + " audio_submit = gr.Button(\"Send\")\n", + " with gr.Row():\n", + " outputs = [gr.Markdown(label=\"Meeting minutes:\")]\n", + "\n", + " audio_submit.click(speak_send, inputs=audio_input, outputs=outputs)\n", + "\n", + "demo.launch(debug=True)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "kuxYecT2QDQ9" + }, + "source": [ + "# Student contribution\n", + "\n", + "Student Emad S. has made this powerful variation that uses `TextIteratorStreamer` to stream back results into a Gradio UI, and takes advantage of background threads for performance! I'm sharing it here if you'd like to take a look at some very interesting work. Thank you, Emad!\n", + "\n", + "https://colab.research.google.com/drive/1Ja5zyniyJo5y8s1LKeCTSkB2xyDPOt6D" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "AU3uAEyU3a-o" + }, + "source": [ + "## Alternative implementation\n", + "\n", + "Class student Youssef has contributed this variation in which we use an open-source model to transcribe the meeting Audio.\n", + "\n", + "Thank you Youssef!" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "phYYgAbBRvu5" + }, + "outputs": [], + "source": [ + "import torch\n", + "from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "HdQnWEzW3lzP" + }, + "outputs": [], + "source": [ + "AUDIO_MODEL = \"openai/whisper-medium\"\n", + "speech_model = AutoModelForSpeechSeq2Seq.from_pretrained(AUDIO_MODEL, torch_dtype=torch.float16, low_cpu_mem_usage=True, use_safetensors=True)\n", + "speech_model.to('cuda')\n", + "processor = AutoProcessor.from_pretrained(AUDIO_MODEL)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ZhA_fbeCSAeZ" + }, + "outputs": [], + "source": [ + "pipe = pipeline(\n", + " \"automatic-speech-recognition\",\n", + " model=speech_model,\n", + " tokenizer=processor.tokenizer,\n", + " feature_extractor=processor.feature_extractor,\n", + " torch_dtype=torch.float16,\n", + " device='cuda',\n", + " return_timestamps=True\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "nrQjKtD53omJ" + }, + "outputs": [], + "source": [ + "# Use the Whisper OpenAI model to convert the Audio to Text\n", + "result = pipe(audio_filename)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "G_XSljOY3tDf" + }, + "outputs": [], + "source": [ + "transcription = result[\"text\"]\n", + "print(transcription)" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "gpuType": "T4", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.13" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/week4/community-contributions/Week4_Exercise_convert_between_thirteen_lang_coment_unit_test.ipynb b/week4/community-contributions/Week4_Exercise_convert_between_thirteen_lang_coment_unit_test.ipynb new file mode 100644 index 0000000..a99930c --- /dev/null +++ b/week4/community-contributions/Week4_Exercise_convert_between_thirteen_lang_coment_unit_test.ipynb @@ -0,0 +1,841 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "4a6ab9a2-28a2-445d-8512-a0dc8d1b54e9", + "metadata": {}, + "source": [ + "# Power Coder\n", + "\n", + "1. Convert code between two programming language; supporting languages are Python, Java, JavaScript, TypeScript, C, C++, C#, Go, Rust, Kotlin, Swift, PHP, Julia\n", + "2. Automatically add docstring/comments based on selected comment style\n", + "3. Automatically generate unit tests based on selected unit test style\n", + "4. Supporting models: gpt-4o, claude-3-5-sonnet-20240620, gemini-2.5-flash\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e610bf56-a46e-4aff-8de1-ab49d62b1ad3", + "metadata": {}, + "outputs": [], + "source": [ + "# imports\n", + "\n", + "import os\n", + "import io\n", + "import sys\n", + "import json\n", + "import requests\n", + "from dotenv import load_dotenv\n", + "from openai import OpenAI\n", + "import google.generativeai\n", + "import anthropic\n", + "from IPython.display import Markdown, display, update_display\n", + "import gradio as gr\n", + "import subprocess" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4f672e1c-87e9-4865-b760-370fa605e614", + "metadata": {}, + "outputs": [], + "source": [ + "# environment\n", + "\n", + "load_dotenv(override=True)\n", + "os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY', 'your-key-if-not-using-env')\n", + "os.environ['ANTHROPIC_API_KEY'] = os.getenv('ANTHROPIC_API_KEY', 'your-key-if-not-using-env')\n", + "os.environ['GOOGLE_API_KEY'] = os.getenv('GOOGLE_API_KEY', 'your-key-if-not-using-env')\n", + "os.environ['HF_TOKEN'] = os.getenv('HF_TOKEN', 'your-key-if-not-using-env')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8aa149ed-9298-4d69-8fe2-8f5de0f667da", + "metadata": {}, + "outputs": [], + "source": [ + "# initialize\n", + "\n", + "openai = OpenAI()\n", + "claude = anthropic.Anthropic()\n", + "gemini_via_openai_client = OpenAI(\n", + " api_key=os.environ['GOOGLE_API_KEY'], \n", + " base_url=\"https://generativelanguage.googleapis.com/v1beta/openai/\"\n", + ")\n", + "OPENAI_MODEL = \"gpt-4o\"\n", + "CLAUDE_MODEL = \"claude-3-5-sonnet-20240620\"\n", + "GEMINI_MODEL = \"gemini-2.5-flash\"" + ] + }, + { + "cell_type": "markdown", + "id": "37b204dd-f770-41d9-9b19-7e1baa5273cd", + "metadata": {}, + "source": [ + "## 1. Convesion Part" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6896636f-923e-4a2c-9d6c-fac07828a201", + "metadata": {}, + "outputs": [], + "source": [ + "def convert_system_prompt_for(in_lang, out_lang):\n", + " convert_system_message = f\"You are an assistant that reimplements {in_lang} code in high performance {out_lang}. \"\n", + " convert_system_message += f\"Respond only with {out_lang} code; use comments sparingly and do not provide any explanation other than occasional comments. \"\n", + " convert_system_message += f\"The {out_lang} response needs to produce an identical output in the fastest possible time. Keep implementations of random number generators identical so that results match exactly.\"\n", + " return convert_system_message" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8e7b3546-57aa-4c29-bc5d-f211970d04eb", + "metadata": {}, + "outputs": [], + "source": [ + "def convert_user_prompt_for(in_lang, out_lang, input_instruct, in_code):\n", + " convert_user_prompt = f\"Rewrite this {in_lang} code in {out_lang} with the fastest possible implementation that produces identical output in the least time. \"\n", + " convert_user_prompt += f\"Respond only with {out_lang} code; do not explain your work other than a few comments. \"\n", + " convert_user_prompt += f\"Pay attention to number types to ensure no int overflows. Remember to include all necessary {out_lang} packages or modules, for example, iomanip for C++.\\n\\n\"\n", + " if input_instruct:\n", + " convert_user_prompt += \"Addtional instruction is: \" + input_instruct\n", + " convert_user_prompt += in_code\n", + " return convert_user_prompt" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c6190659-f54c-4951-bef4-4960f8e51cc4", + "metadata": {}, + "outputs": [], + "source": [ + "def convert_messages_for(in_lang, out_lang, input_instruct, in_code):\n", + " return [\n", + " {\"role\": \"system\", \"content\": convert_system_prompt_for(in_lang, out_lang)},\n", + " {\"role\": \"user\", \"content\": convert_user_prompt_for(in_lang, out_lang, input_instruct, in_code)}\n", + " ]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c3b497b3-f569-420e-b92e-fb0f49957ce0", + "metadata": {}, + "outputs": [], + "source": [ + "python_hard = \"\"\"# Be careful to support large number sizes\n", + "\n", + "def lcg(seed, a=1664525, c=1013904223, m=2**32):\n", + " value = seed\n", + " while True:\n", + " value = (a * value + c) % m\n", + " yield value\n", + " \n", + "def max_subarray_sum(n, seed, min_val, max_val):\n", + " lcg_gen = lcg(seed)\n", + " random_numbers = [next(lcg_gen) % (max_val - min_val + 1) + min_val for _ in range(n)]\n", + " max_sum = float('-inf')\n", + " for i in range(n):\n", + " current_sum = 0\n", + " for j in range(i, n):\n", + " current_sum += random_numbers[j]\n", + " if current_sum > max_sum:\n", + " max_sum = current_sum\n", + " return max_sum\n", + "\n", + "def total_max_subarray_sum(n, initial_seed, min_val, max_val):\n", + " total_sum = 0\n", + " lcg_gen = lcg(initial_seed)\n", + " for _ in range(20):\n", + " seed = next(lcg_gen)\n", + " total_sum += max_subarray_sum(n, seed, min_val, max_val)\n", + " return total_sum\n", + "\n", + "# Parameters\n", + "n = 10000 # Number of random numbers\n", + "initial_seed = 42 # Initial seed for the LCG\n", + "min_val = -10 # Minimum value of random numbers\n", + "max_val = 10 # Maximum value of random numbers\n", + "\n", + "# Timing the function\n", + "import time\n", + "start_time = time.time()\n", + "result = total_max_subarray_sum(n, initial_seed, min_val, max_val)\n", + "end_time = time.time()\n", + "\n", + "print(\"Total Maximum Subarray Sum (20 runs):\", result)\n", + "print(\"Execution Time: {:.6f} seconds\".format(end_time - start_time))\n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0be9f47d-5213-4700-b0e2-d444c7c738c0", + "metadata": {}, + "outputs": [], + "source": [ + "def convert_stream_gpt(in_lang, out_lang, input_instruct, in_code): \n", + " stream = openai.chat.completions.create(model=OPENAI_MODEL, messages=convert_messages_for(in_lang, out_lang, input_instruct, in_code), temperature=0.0, stream=True)\n", + " reply = \"\"\n", + " for chunk in stream:\n", + " fragment = chunk.choices[0].delta.content or \"\"\n", + " reply += fragment\n", + " yield reply" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8669f56b-8314-4582-a167-78842caea131", + "metadata": {}, + "outputs": [], + "source": [ + "def convert_stream_claude(in_lang, out_lang, input_instruct, in_code):\n", + " result = claude.messages.stream(\n", + " model=CLAUDE_MODEL,\n", + " max_tokens=2000,\n", + " temperature=0.0,\n", + " system=convert_system_prompt_for(in_lang, out_lang),\n", + " messages=[{\"role\": \"user\", \"content\": convert_user_prompt_for(in_lang, out_lang, input_instruct, in_code)}],\n", + " )\n", + " reply = \"\"\n", + " with result as stream:\n", + " for text in stream.text_stream:\n", + " reply += text\n", + " yield reply" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "01d3cd4f-c100-4e25-8670-0663513f6136", + "metadata": {}, + "outputs": [], + "source": [ + "def convert_stream_gemini(in_lang, out_lang, input_instruct, in_code): \n", + " stream = gemini_via_openai_client.chat.completions.create(model=GEMINI_MODEL, messages=convert_messages_for(in_lang, out_lang, input_instruct, in_code), temperature=0.0, stream=True)\n", + " reply = \"\"\n", + " for chunk in stream:\n", + " fragment = chunk.choices[0].delta.content or \"\"\n", + " reply += fragment\n", + " yield reply" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2f1ae8f5-16c8-40a0-aa18-63b617df078d", + "metadata": {}, + "outputs": [], + "source": [ + "def optimize(in_lang, out_lang, in_code, input_instruct, convert_model):\n", + " if \"gpt\" in convert_model.lower():\n", + " result = convert_stream_gpt(in_lang, out_lang, input_instruct, in_code)\n", + " elif \"claude\" in convert_model.lower():\n", + " result = convert_stream_claude(in_lang, out_lang, input_instruct, in_code)\n", + " elif \"gemini\" in convert_model.lower():\n", + " result = convert_stream_gemini(in_lang, out_lang, input_instruct, in_code)\n", + " else:\n", + " raise ValueError(\"Unknown convert model\")\n", + " for stream_so_far in result:\n", + " yield stream_so_far " + ] + }, + { + "cell_type": "markdown", + "id": "07383878-f887-464f-8bc7-527c669d3edd", + "metadata": {}, + "source": [ + "## 2. Comment part" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d254038c-fdd6-4ef8-8b7a-a074f1e7405d", + "metadata": {}, + "outputs": [], + "source": [ + "def comment_system_prompt_for(lang, comment_style):\n", + " comment_system_message = f\"You are an assistant that generate necessary, concise and clear comment/docstring for the {lang} code by applying {comment_style} comment style. \"\n", + " comment_system_message += f\"Respond only with added comments, and do not provide any redundant explanation. \"\n", + " return comment_system_message" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e95cee4f-f229-4c9f-8e67-8a68cc9534c3", + "metadata": {}, + "outputs": [], + "source": [ + "def comment_user_prompt_for(lang, code, comment_style):\n", + " comment_user_prompt = f\"Add the comments/docstring on the given code for the {lang} programming language in {comment_style} comment style. \"\n", + " comment_user_prompt += f\"Respond only with added comments, and do not provide any redundant explanation.\\n\\n\"\n", + " comment_user_prompt += f\"The given code is as follows: \"\n", + " comment_user_prompt += code\n", + " return comment_user_prompt" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "507426c2-cf5a-4041-b904-b18a5afe83b6", + "metadata": {}, + "outputs": [], + "source": [ + "def comment_messages_for(lang, code, comment_style):\n", + " return [\n", + " {\"role\": \"system\", \"content\": comment_system_prompt_for(lang, comment_style)},\n", + " {\"role\": \"user\", \"content\": comment_user_prompt_for(lang, code, comment_style)}\n", + " ]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7e1c8cf6-7a15-4e79-82f6-6bb2a0b85773", + "metadata": {}, + "outputs": [], + "source": [ + "def comment_stream_gpt(lang, code, comment_style): \n", + " stream = openai.chat.completions.create(model=OPENAI_MODEL, messages=comment_messages_for(lang, code, comment_style), temperature=0.0, stream=True)\n", + " reply = \"\"\n", + " for chunk in stream:\n", + " fragment = chunk.choices[0].delta.content or \"\"\n", + " reply += fragment\n", + " yield reply" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "26f27781-4a3e-4e5f-a8ab-9a25944a9879", + "metadata": {}, + "outputs": [], + "source": [ + "def comment_stream_claude(lang, code, comment_style):\n", + " result = claude.messages.stream(\n", + " model=CLAUDE_MODEL,\n", + " max_tokens=2000,\n", + " temperature=0.0,\n", + " system=comment_system_prompt_for(lang, comment_style),\n", + " messages=[{\"role\": \"user\", \"content\": comment_user_prompt_for(lang, code, comment_style)}],\n", + " )\n", + " reply = \"\"\n", + " with result as stream:\n", + " for text in stream.text_stream:\n", + " reply += text\n", + " yield reply" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8e6719e7-f2f3-40ea-8fed-01d84a641306", + "metadata": {}, + "outputs": [], + "source": [ + "def comment_stream_gemini(lang, code, comment_style): \n", + " stream = gemini_via_openai_client.chat.completions.create(model=GEMINI_MODEL, messages=comment_messages_for(lang, code, comment_style), temperature=0.0, stream=True)\n", + " reply = \"\"\n", + " for chunk in stream:\n", + " fragment = chunk.choices[0].delta.content or \"\"\n", + " reply += fragment\n", + " yield reply" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2b98acc4-23d8-4671-8f19-92d72631b55d", + "metadata": {}, + "outputs": [], + "source": [ + "def generate_comments_via_model(lang, code, comment_style, comment_model):\n", + " if \"gpt\" in comment_model.lower():\n", + " result = comment_stream_gpt(lang, code, comment_style)\n", + " elif \"claude\" in comment_model.lower():\n", + " result = comment_stream_claude(lang, code, comment_style)\n", + " elif \"gemini\" in comment_model.lower():\n", + " result = comment_stream_gemini(lang, code, comment_style)\n", + " else:\n", + " raise ValueError(\"Unknown comment model\")\n", + " for stream_so_far in result:\n", + " yield stream_so_far " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "282c75ae-d8c3-4866-a024-f7ecf87b3cde", + "metadata": {}, + "outputs": [], + "source": [ + "def generate_comments_fn(comment_option, in_lang, out_lang, in_code, out_code, in_comment_style, out_comment_style, comment_model):\n", + " if 'input' in comment_option:\n", + " in_gen = generate_comments_via_model(in_lang, in_code, in_comment_style, comment_model)\n", + " for in_output in in_gen:\n", + " yield in_output, \"\"\n", + " elif 'output' in comment_option:\n", + " out_gen = generate_comments_via_model(out_lang, out_code, out_comment_style, comment_model)\n", + " for out_output in out_gen:\n", + " yield \"\", out_output\n", + " elif 'both' in comment_option:\n", + " in_gen = generate_comments_via_model(in_lang, in_code, in_comment_style, comment_model)\n", + " out_gen = generate_comments_via_model(out_lang, out_code, out_comment_style, comment_model)\n", + " for in_output, out_output in zip(in_gen, out_gen):\n", + " yield in_output, out_output" + ] + }, + { + "cell_type": "markdown", + "id": "ce2c178c-d03c-49c0-b0e9-c57c699bca08", + "metadata": {}, + "source": [ + "## 3. Unit test part" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e5a4743e-e1a8-42c7-8f1f-a73d49c0895d", + "metadata": {}, + "outputs": [], + "source": [ + "def unit_test_system_prompt_for(lang, unit_test_style):\n", + " unit_test_system_message = f\"You are an assistant that generate necessary, concise, clear and executable unit tests for the {lang} code by applying {unit_test_style} unit test style. \"\n", + " unit_test_system_message += f\"Respond only with generated unit tests; use comments sparingly and do not provide any explanation other than occasional comments. \"\n", + " return unit_test_system_message" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "334d5e40-71ff-4d24-8cef-b6c81c188e4d", + "metadata": {}, + "outputs": [], + "source": [ + "def unit_test_user_prompt_for(lang, code, unit_test_style):\n", + " unit_test_user_prompt = f\"Add the unit tests on the given code for the {lang} programming language in {unit_test_style} unit test style. \"\n", + " unit_test_user_prompt += f\"Respond only with generated unit tests; use comments sparingly and do not provide any explanation other than occasional comments.\\n\\n\"\n", + " unit_test_user_prompt += f\"The given code is as follows: \"\n", + " unit_test_user_prompt += code\n", + " return unit_test_user_prompt" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8a8e061f-3993-4746-9425-d938d2537f65", + "metadata": {}, + "outputs": [], + "source": [ + "def unit_test_messages_for(lang, code, unit_test_style):\n", + " return [\n", + " {\"role\": \"system\", \"content\": unit_test_system_prompt_for(lang, unit_test_style)},\n", + " {\"role\": \"user\", \"content\": unit_test_user_prompt_for(lang, code, unit_test_style)}\n", + " ]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "71c1613b-7a16-4443-acec-d0a2d9bed192", + "metadata": {}, + "outputs": [], + "source": [ + "def unit_test_stream_gpt(lang, code, unit_test_style): \n", + " stream = openai.chat.completions.create(model=OPENAI_MODEL, messages=unit_test_messages_for(lang, code, unit_test_style), stream=True)\n", + " reply = \"\"\n", + " for chunk in stream:\n", + " fragment = chunk.choices[0].delta.content or \"\"\n", + " reply += fragment\n", + " yield reply" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8a6e3502-f7ff-42b8-8fc5-2697b2d1f36e", + "metadata": {}, + "outputs": [], + "source": [ + "def unit_test_stream_claude(lang, code, unit_test_style):\n", + " result = claude.messages.stream(\n", + " model=CLAUDE_MODEL,\n", + " max_tokens=2000,\n", + " system=unit_test_system_prompt_for(lang, unit_test_style),\n", + " messages=[{\"role\": \"user\", \"content\": unit_test_user_prompt_for(lang, code, unit_test_style)}],\n", + " )\n", + " reply = \"\"\n", + " with result as stream:\n", + " for text in stream.text_stream:\n", + " reply += text\n", + " yield reply" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8d7f694f-a276-4bdc-9cfb-755483fd4380", + "metadata": {}, + "outputs": [], + "source": [ + "def unit_test_stream_gemini(lang, code, unit_test_style): \n", + " stream = gemini_via_openai_client.chat.completions.create(model=GEMINI_MODEL, messages=unit_test_messages_for(lang, code, unit_test_style), stream=True)\n", + " reply = \"\"\n", + " for chunk in stream:\n", + " fragment = chunk.choices[0].delta.content or \"\"\n", + " reply += fragment\n", + " yield reply" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c824429a-b18a-4320-8258-0141037a6531", + "metadata": {}, + "outputs": [], + "source": [ + "def generate_unit_test_via_model(lang, code, unit_test_style, unit_test_model):\n", + " if \"gpt\" in unit_test_model.lower():\n", + " result = unit_test_stream_gpt(lang, code, unit_test_style)\n", + " elif \"claude\" in unit_test_model.lower():\n", + " result = unit_test_stream_claude(lang, code, unit_test_style)\n", + " elif \"gemini\" in unit_test_model.lower():\n", + " result = unit_test_stream_gemini(lang, code, unit_test_style)\n", + " else:\n", + " raise ValueError(\"Unknown unit test model\")\n", + " for stream_so_far in result:\n", + " yield stream_so_far " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c3e59e26-37c0-4429-b69c-deb581423dd0", + "metadata": {}, + "outputs": [], + "source": [ + "def generate_unit_test_fn(unit_test_option, in_lang, out_lang, in_code, out_code, in_unit_test_style, out_unit_test_style, unit_test_model):\n", + " if 'input' in unit_test_option:\n", + " in_gen = generate_unit_test_via_model(in_lang, in_code, in_unit_test_style, unit_test_model)\n", + " for in_output in in_gen:\n", + " yield in_output, \"\"\n", + " elif 'output' in unit_test_option:\n", + " out_gen = generate_unit_test_via_model(out_lang, out_code, out_unit_test_style, unit_test_model)\n", + " for out_output in out_gen:\n", + " yield \"\", out_output\n", + " elif 'both' in unit_test_option:\n", + " in_gen = generate_unit_test_via_model(in_lang, in_code, in_unit_test_style, unit_test_model)\n", + " out_gen = generate_unit_test_via_model(out_lang, out_code, out_unit_test_style, unit_test_model)\n", + " for in_output, out_output in zip(in_gen, out_gen):\n", + " yield in_output, out_output" + ] + }, + { + "cell_type": "markdown", + "id": "2a1f4d0c-f417-4de4-be9f-441cbe5a6db3", + "metadata": {}, + "source": [ + "## 4. Gradio UI part" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9a2274f1-d03b-42c0-8dcc-4ce159b18442", + "metadata": {}, + "outputs": [], + "source": [ + "LANGUAGE_INFO = {\n", + " \"Python\": {\n", + " \"doc_style\": [\"Google-style\", \"NumPy-style\", \"reST\", \"Doxygen\"],\n", + " \"unit_test_style\": [\"unittest\", \"pytest\", \"doctest\"]\n", + " },\n", + " \"Java\": {\n", + " \"doc_style\": [\"Javadoc\"],\n", + " \"unit_test_style\": [\"JUnit4\", \"JUnit5\", \"TestNG\"]\n", + " },\n", + " \"JavaScript\": {\n", + " \"doc_style\": [\"JSDoc\"],\n", + " \"unit_test_style\": [\"Jest\", \"Mocha + Chai\", \"Jasmine\"]\n", + " },\n", + " \"TypeScript\": {\n", + " \"doc_style\": [\"JSDoc\", \"TSDoc\"],\n", + " \"unit_test_style\": [\"Jest\", \"Mocha + Chai\", \"Vitest\"]\n", + " },\n", + " \"C\": {\n", + " \"doc_style\": [\"Doxygen\"],\n", + " \"unit_test_style\": [\"Google Test (gtest)\", \"CppUnit\", \"Catch2\"]\n", + " },\n", + " \"C++\": {\n", + " \"doc_style\": [\"Doxygen\"],\n", + " \"unit_test_style\": [\"Google Test (gtest)\", \"CppUnit\", \"Catch2\"]\n", + " },\n", + " \"C#\": {\n", + " \"doc_style\": [\"XML comments\"],\n", + " \"unit_test_style\": [\"xUnit\", \"NUnit\", \"MSTest\"]\n", + " },\n", + " \"Go\": {\n", + " \"doc_style\": [\"Godoc\"],\n", + " \"unit_test_style\": [\"Built-in testing package\"]\n", + " },\n", + " \"Rust\": {\n", + " \"doc_style\": [\"Rustdoc\", \"Markdown\"],\n", + " \"unit_test_style\": [\"Built-in #[test] annotation\"]\n", + " },\n", + " \"Kotlin\": {\n", + " \"doc_style\": [\"KDoc\"],\n", + " \"unit_test_style\": [\"JUnit\", \"Kotest\", \"Spek\"]\n", + " },\n", + " \"Swift\": {\n", + " \"doc_style\": [\"Mark-style comments\"],\n", + " \"unit_test_style\": [\"XCTest\"]\n", + " },\n", + " \"PHP\": {\n", + " \"doc_style\": [\"PHPDoc\"],\n", + " \"unit_test_style\": [\"PHPUnit\"]\n", + " },\n", + " \"Julia\": {\n", + " \"doc_style\": [\"Markdown\"],\n", + " \"unit_test_style\": [\"Built-in Test standard library\"]\n", + " }\n", + "}\n", + "LANGUAGES = list(LANGUAGE_INFO.keys())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b50e7833-8f6f-407e-8174-37af9cec2030", + "metadata": {}, + "outputs": [], + "source": [ + "with gr.Blocks(title=\"Power Coder\", theme=gr.themes.Citrus(), css=\"\"\"\n", + ".selected {\n", + " background-color: orange !important;\n", + " box-shadow: 0 4px 12px rgba(255, 140, 0, 0.5) !important;\n", + " color: black;\n", + "}\n", + ".unselected {\n", + " background-color: gray !important;\n", + " box-shadow: 0 4px 12px rgba(128, 128, 128, 0.4);\n", + " color: white;\n", + "}\n", + "\"\"\") as ui:\n", + " current_selected = gr.State(\"\")\n", + " initial_in_lang = \"Python\"\n", + " initial_out_lang = \"Java\"\n", + " in_comment_style_choices = [\"Standard\"] + LANGUAGE_INFO[initial_in_lang][\"doc_style\"]\n", + " out_comment_style_choices = [\"Standard\"] + LANGUAGE_INFO[initial_out_lang][\"doc_style\"]\n", + " in_unit_test_style_choices = [\"Standard\"] + LANGUAGE_INFO[initial_in_lang][\"unit_test_style\"]\n", + " out_unit_test_style_choices = [\"Standard\"] + LANGUAGE_INFO[initial_out_lang][\"unit_test_style\"]\n", + " in_code_file_name = gr.State(\"in_code.txt\")\n", + " out_code_file_name = gr.State(\"out_code.txt\")\n", + " in_comments_file_name = gr.State(\"in_comments.txt\")\n", + " out_comments_file_name = gr.State(\"out_comments.txt\")\n", + " in_unit_test_file_name = gr.State(\"in_unit_tests.txt\")\n", + " out_unit_test_file_name = gr.State(\"out_unit_tests.txt\")\n", + " \n", + " \n", + " gr.Markdown(\"## Code Helper\")\n", + "\n", + " def load_file_content(file):\n", + " if file is None:\n", + " return \"\"\n", + " with open(file.name, \"r\", encoding=\"utf-8\") as f:\n", + " return f.read()\n", + "\n", + " def change_lang(lang):\n", + " comment_style_choices = [\"Standard\"] + LANGUAGE_INFO[lang][\"doc_style\"]\n", + " unit_test_style_choices = [\"Standard\"] + LANGUAGE_INFO[lang][\"unit_test_style\"]\n", + " return (\n", + " gr.update(choices=comment_style_choices, value=str(comment_style_choices[0])), \n", + " gr.update(choices=unit_test_style_choices, value=str(unit_test_style_choices[0]))\n", + " )\n", + "\n", + " def download_fn(in_text, out_text, in_file_name, out_file_name):\n", + " if in_text:\n", + " with open(in_file_name, \"w\") as f:\n", + " f.write(in_text)\n", + " if out_text:\n", + " with open(out_file_name, \"w\") as f:\n", + " f.write(out_text)\n", + " \n", + " # Conversion part\n", + " with gr.Row():\n", + " in_lang = gr.Dropdown(choices=LANGUAGES, label=\"Select input language\", value=initial_in_lang, interactive=True)\n", + " out_lang = gr.Dropdown(choices=LANGUAGES, label=\"Select output language\", value=initial_out_lang, interactive=True)\n", + " with gr.Row():\n", + " input_file = gr.File(label=\"Upload a source code file or input below\")\n", + " input_instruct = gr.Textbox(\n", + " label=\"Additional instruction(optional)\",\n", + " placeholder=\"Enter the instruction you want the ouput code to follow...\\n\\nFor example: Define the variable using snake_case style.\",\n", + " lines=8\n", + " )\n", + " with gr.Row():\n", + " in_code = gr.Textbox(label=\"Input Code:\", value=python_hard, lines=10)\n", + " out_code = gr.Textbox(label=\"Output Code:\", lines=10)\n", + " with gr.Row():\n", + " convert_model = gr.Dropdown([\"Claude\", \"GPT\", \"Gemini\"], label=\"Select model\", value=\"Claude\")\n", + " with gr.Row():\n", + " convert = gr.Button(\"Convert code\")\n", + " download_code = gr.Button(\"Download code\")\n", + "\n", + " gr.HTML(\"
\")\n", + "\n", + " def show_comment(current_selected):\n", + " if current_selected == \"comment\":\n", + " return (\n", + " gr.update(visible=False),\n", + " gr.update(visible=False),\n", + " gr.update(elem_classes=[\"unselected\"]),\n", + " gr.update(elem_classes=[\"unselected\"]),\n", + " \"\"\n", + " )\n", + " else:\n", + " return (\n", + " gr.update(visible=True),\n", + " gr.update(visible=False),\n", + " gr.update(elem_classes=[\"selected\"]),\n", + " gr.update(elem_classes=[\"unselected\"]),\n", + " \"comment\"\n", + " )\n", + "\n", + " def show_unit_test(current_selected):\n", + " if current_selected == \"unit_test\":\n", + " return (\n", + " gr.update(visible=False),\n", + " gr.update(visible=False),\n", + " gr.update(elem_classes=[\"unselected\"]),\n", + " gr.update(elem_classes=[\"unselected\"]),\n", + " \"\"\n", + " )\n", + " else:\n", + " return (\n", + " gr.update(visible=False),\n", + " gr.update(visible=True),\n", + " gr.update(elem_classes=[\"unselected\"]),\n", + " gr.update(elem_classes=[\"selected\"]),\n", + " \"unit_test\"\n", + " )\n", + " \n", + " with gr.Blocks() as demo:\n", + " with gr.Row():\n", + " comment_show_up = gr.Button(\"Comment\", elem_id=\"comment-btn\", elem_classes=[\"unselected\"])\n", + " unit_test_show_up = gr.Button(\"Unit Test\", elem_id=\"unit-test-btn\", elem_classes=[\"unselected\"])\n", + " \n", + " comment_section = gr.Column(visible=False)\n", + " unit_test_section = gr.Column(visible=False)\n", + " \n", + " with comment_section:\n", + " # Comment section\n", + " with gr.Row():\n", + " comment_option = gr.Radio(\n", + " choices=[\n", + " \"Comment input code\",\n", + " \"Comment output code\",\n", + " \"Comment both\"\n", + " ],\n", + " label=\"Commenting Options\",\n", + " value=\"Comment input code\",\n", + " interactive=True\n", + " )\n", + " with gr.Row():\n", + " in_comment_style = gr.Dropdown(choices=in_comment_style_choices, label=\"Select comment style for input code\", value=in_comment_style_choices[0], interactive=True)\n", + " out_comment_style = gr.Dropdown(choices=out_comment_style_choices, label=\"Select comment style for oupt code\", value=out_comment_style_choices[0], interactive=True)\n", + " with gr.Row():\n", + " comment_model = gr.Dropdown([\"Claude\", \"GPT\", \"Gemini\"], label=\"Select model\", value=\"Claude\")\n", + " with gr.Row():\n", + " generate_comments = gr.Button(\"Generate comments\")\n", + " download_comments = gr.Button(\"Download comments\")\n", + " with gr.Row():\n", + " in_comments = gr.Textbox(label=\"Comments for Input Code:\", lines=10)\n", + " out_comments = gr.Textbox(label=\"Comments for Output Code:\", lines=10)\n", + " \n", + " with unit_test_section:\n", + " # Unit test part\n", + " with gr.Row():\n", + " unit_test_option = gr.Radio(\n", + " choices=[\n", + " \"Add unit test for input code\",\n", + " \"Add unit test for output code\",\n", + " \"Add unit test for both\"\n", + " ],\n", + " label=\"Unit Test Options\",\n", + " value=\"Add unit test for input code\",\n", + " interactive=True\n", + " )\n", + " with gr.Row():\n", + " in_unit_test_style = gr.Dropdown(choices=in_unit_test_style_choices, label=\"Select unit test style for input code\", value=in_unit_test_style_choices[0], interactive=True)\n", + " out_unit_test_style = gr.Dropdown(choices=out_unit_test_style_choices, label=\"Select unit test style for oupt code\", value=out_unit_test_style_choices[0], interactive=True)\n", + " with gr.Row():\n", + " unit_test_model = gr.Dropdown([\"Claude\", \"GPT\", \"Gemini\"], label=\"Select model\", value=\"Claude\")\n", + " with gr.Row():\n", + " generate_unit_test = gr.Button(\"Generate unit test\")\n", + " download_unit_test = gr.Button(\"Download unit text\")\n", + " with gr.Row():\n", + " in_unit_test = gr.Textbox(label=\"Unit Test for Input Code:\", lines=10)\n", + " out_unit_test = gr.Textbox(label=\"Unit Test for Output Code:\", lines=10)\n", + "\n", + " in_lang.change(fn=change_lang, inputs=in_lang, outputs=[in_comment_style, in_unit_test_style])\n", + " out_lang.change(fn=change_lang, inputs=out_lang, outputs=[out_comment_style, out_unit_test_style])\n", + " input_file.change(fn=load_file_content, inputs=input_file, outputs=in_code)\n", + " \n", + " convert.click(optimize, inputs=[in_lang, out_lang, in_code, input_instruct, convert_model], outputs=[out_code])\n", + " download_code.click(download_fn, inputs=[in_code, out_code, in_code_file_name, out_code_file_name])\n", + " \n", + " comment_show_up.click(fn=show_comment, inputs=current_selected, outputs=[comment_section, unit_test_section, comment_show_up, unit_test_show_up, current_selected])\n", + " unit_test_show_up.click(fn=show_unit_test, inputs=current_selected, outputs=[comment_section, unit_test_section, comment_show_up, unit_test_show_up, current_selected])\n", + "\n", + " generate_comments.click(generate_comments_fn, inputs=[comment_option, in_lang, out_lang, in_code, out_code, in_comment_style, out_comment_style, comment_model], outputs=[in_comments, out_comments])\n", + " download_comments.click(download_fn, inputs=[in_comments, out_comments, in_comments_file_name, out_comments_file_name])\n", + " generate_unit_test.click(generate_unit_test_fn, inputs=[unit_test_option, in_lang, out_lang, in_code, out_code, in_unit_test_style, out_unit_test_style, unit_test_model], outputs=[in_unit_test, out_unit_test])\n", + " download_unit_test.click(download_fn, inputs=[in_unit_test, out_unit_test, in_unit_test_file_name, out_unit_test_file_name])\n", + " \n", + "ui.launch()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0266734c-0bee-46c0-9b17-9fd2ae86cc3a", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.13" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From 62ed15229635e4b105ba2e9450c039bd8e3f0f3b Mon Sep 17 00:00:00 2001 From: Vanshika Mahajan Date: Fri, 11 Jul 2025 16:57:48 +0530 Subject: [PATCH 22/25] Moved notebook to community/vanshika-mahajan folder as per guideline --- .../vanshika-mahajan/web_summary_fashion.ipynb | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename web_summary_fashion.ipynb => community/vanshika-mahajan/web_summary_fashion.ipynb (100%) diff --git a/web_summary_fashion.ipynb b/community/vanshika-mahajan/web_summary_fashion.ipynb similarity index 100% rename from web_summary_fashion.ipynb rename to community/vanshika-mahajan/web_summary_fashion.ipynb From a10872469d67d3a5c208fdcff53b2e44cbd1d397 Mon Sep 17 00:00:00 2001 From: Edward Donner Date: Sat, 12 Jul 2025 15:26:07 -0400 Subject: [PATCH 23/25] Minor updates including pinning datasets version --- environment.yml | 5 +- extras/trading/prototype_trader.ipynb | 2 +- requirements.txt | 16 ++---- week2/day1.ipynb | 53 ++++++++++++++----- week2/day2.ipynb | 2 +- week2/day3.ipynb | 2 +- .../RAG_chat_no_LangChain.ipynb | 2 +- .../docuSeekAI/docuSeekAI.ipynb | 18 ++++++- week5/day1.ipynb | 2 +- week5/day4.5.ipynb | 16 +++++- week6/day1.ipynb | 16 ++++-- week6/day2.ipynb | 4 +- week6/day3.ipynb | 2 +- week6/day4.ipynb | 2 +- week6/lite.ipynb | 2 +- week8/day3.ipynb | 2 +- week8/day5.ipynb | 4 +- 17 files changed, 104 insertions(+), 46 deletions(-) diff --git a/environment.yml b/environment.yml index 56654d7..7f31d58 100644 --- a/environment.yml +++ b/environment.yml @@ -17,16 +17,13 @@ dependencies: - scikit-learn - chromadb - jupyter-dash - - sentencepiece - pyarrow - pip: - beautifulsoup4 - plotly - - bitsandbytes - transformers - sentence-transformers - - datasets - - accelerate + - datasets==3.6.0 - openai - anthropic - google-generativeai diff --git a/extras/trading/prototype_trader.ipynb b/extras/trading/prototype_trader.ipynb index 30358b9..1143a10 100644 --- a/extras/trading/prototype_trader.ipynb +++ b/extras/trading/prototype_trader.ipynb @@ -346,7 +346,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.11" + "version": "3.11.13" } }, "nbformat": 4, diff --git a/requirements.txt b/requirements.txt index 0f5bd66..1615af4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -14,18 +14,15 @@ tqdm openai gradio langchain -tiktoken +langchain-core +langchain-text-splitters langchain-openai -langchain_experimental -langchain_chroma -langchain[docarray] -datasets -sentencepiece +langchain-chroma +langchain-community +datasets==3.6.0 matplotlib google-generativeai anthropic -scikit-learn -unstructured chromadb plotly jupyter-dash @@ -33,9 +30,6 @@ beautifulsoup4 pydub modal ollama -accelerate -sentencepiece -bitsandbytes psutil setuptools speedtest-cli diff --git a/week2/day1.ipynb b/week2/day1.ipynb index c6d98f4..4939f47 100644 --- a/week2/day1.ipynb +++ b/week2/day1.ipynb @@ -290,12 +290,12 @@ "metadata": {}, "outputs": [], "source": [ - "# If you have access to this, here is the reasoning model o3-mini\n", + "# If you have access to this, here is the reasoning model o4-mini\n", "# This is trained to think through its response before replying\n", "# So it will take longer but the answer should be more reasoned - not that this helps..\n", "\n", "completion = openai.chat.completions.create(\n", - " model='o3-mini',\n", + " model='o4-mini',\n", " messages=prompts\n", ")\n", "print(completion.choices[0].message.content)" @@ -308,12 +308,12 @@ "metadata": {}, "outputs": [], "source": [ - "# Claude 3.7 Sonnet\n", + "# Claude 4.0 Sonnet\n", "# API needs system message provided separately from user prompt\n", "# Also adding max_tokens\n", "\n", "message = claude.messages.create(\n", - " model=\"claude-3-7-sonnet-latest\",\n", + " model=\"claude-sonnet-4-20250514\",\n", " max_tokens=200,\n", " temperature=0.7,\n", " system=system_message,\n", @@ -332,12 +332,12 @@ "metadata": {}, "outputs": [], "source": [ - "# Claude 3.7 Sonnet again\n", + "# Claude 4.0 Sonnet again\n", "# Now let's add in streaming back results\n", "# If the streaming looks strange, then please see the note below this cell!\n", "\n", "result = claude.messages.stream(\n", - " model=\"claude-3-7-sonnet-latest\",\n", + " model=\"claude-sonnet-4-20250514\",\n", " max_tokens=200,\n", " temperature=0.7,\n", " system=system_message,\n", @@ -408,12 +408,28 @@ ")\n", "\n", "response = gemini_via_openai_client.chat.completions.create(\n", - " model=\"gemini-2.5-flash-preview-04-17\",\n", + " model=\"gemini-2.5-flash\",\n", " messages=prompts\n", ")\n", "print(response.choices[0].message.content)" ] }, + { + "cell_type": "markdown", + "id": "492f0ff2-8581-4836-bf00-37fddbe120eb", + "metadata": {}, + "source": [ + "# Sidenote:\n", + "\n", + "This alternative approach of using the client library from OpenAI to connect with other models has become extremely popular in recent months.\n", + "\n", + "So much so, that all the models now support this approach - including Anthropic.\n", + "\n", + "You can read more about this approach, with 4 examples, in the first section of this guide:\n", + "\n", + "https://github.com/ed-donner/agents/blob/main/guides/09_ai_apis_and_ollama.ipynb" + ] + }, { "cell_type": "markdown", "id": "33f70c88-7ca9-470b-ad55-d93a57dcc0ab", @@ -583,7 +599,7 @@ "# Have it stream back results in markdown\n", "\n", "stream = openai.chat.completions.create(\n", - " model='gpt-4o-mini',\n", + " model='gpt-4.1-mini',\n", " messages=prompts,\n", " temperature=0.7,\n", " stream=True\n", @@ -634,11 +650,11 @@ "metadata": {}, "outputs": [], "source": [ - "# Let's make a conversation between GPT-4o-mini and Claude-3-haiku\n", + "# Let's make a conversation between GPT-4.1-mini and Claude-3.5-haiku\n", "# We're using cheap versions of models so the costs will be minimal\n", "\n", - "gpt_model = \"gpt-4o-mini\"\n", - "claude_model = \"claude-3-haiku-20240307\"\n", + "gpt_model = \"gpt-4.1-mini\"\n", + "claude_model = \"claude-3-5-haiku-latest\"\n", "\n", "gpt_system = \"You are a chatbot who is very argumentative; \\\n", "you disagree with anything in the conversation and you challenge everything, in a snarky way.\"\n", @@ -774,6 +790,19 @@ "\n", "Try creating a 3-way, perhaps bringing Gemini into the conversation! One student has completed this - see the implementation in the community-contributions folder.\n", "\n", + "The most reliable way to do this involves thinking a bit differently about your prompts: just 1 system prompt and 1 user prompt each time, and in the user prompt list the full conversation so far.\n", + "\n", + "Something like:\n", + "\n", + "```python\n", + "user_prompt = f\"\"\"\n", + " You are Alex, in conversation with Blake and Charlie.\n", + " The conversation so far is as follows:\n", + " {conversation}\n", + " Now with this, respond with what you would like to say next, as Alex.\n", + " \"\"\"\n", + "```\n", + "\n", "Try doing this yourself before you look at the solutions. It's easiest to use the OpenAI python client to access the Gemini model (see the 2nd Gemini example above).\n", "\n", "## Additional exercise\n", @@ -824,7 +853,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.12" + "version": "3.11.13" } }, "nbformat": 4, diff --git a/week2/day2.ipynb b/week2/day2.ipynb index 801bfe0..00a2840 100644 --- a/week2/day2.ipynb +++ b/week2/day2.ipynb @@ -568,7 +568,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.12" + "version": "3.11.13" } }, "nbformat": 4, diff --git a/week2/day3.ipynb b/week2/day3.ipynb index 2d955f5..9f044b7 100644 --- a/week2/day3.ipynb +++ b/week2/day3.ipynb @@ -301,7 +301,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.11" + "version": "3.11.13" } }, "nbformat": 4, diff --git a/week5/community-contributions/day 4 no_langchain/RAG_chat_no_LangChain.ipynb b/week5/community-contributions/day 4 no_langchain/RAG_chat_no_LangChain.ipynb index 7c2572d..685f7fa 100644 --- a/week5/community-contributions/day 4 no_langchain/RAG_chat_no_LangChain.ipynb +++ b/week5/community-contributions/day 4 no_langchain/RAG_chat_no_LangChain.ipynb @@ -386,7 +386,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.11" + "version": "3.11.13" } }, "nbformat": 4, diff --git a/week5/community-contributions/docuSeekAI/docuSeekAI.ipynb b/week5/community-contributions/docuSeekAI/docuSeekAI.ipynb index fb49ebd..4f16577 100644 --- a/week5/community-contributions/docuSeekAI/docuSeekAI.ipynb +++ b/week5/community-contributions/docuSeekAI/docuSeekAI.ipynb @@ -92,10 +92,24 @@ } ], "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, "language_info": { - "name": "python" + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.12" } }, "nbformat": 4, - "nbformat_minor": 2 + "nbformat_minor": 4 } diff --git a/week5/day1.ipynb b/week5/day1.ipynb index 416a1a0..c9d82b0 100644 --- a/week5/day1.ipynb +++ b/week5/day1.ipynb @@ -256,7 +256,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.11" + "version": "3.11.13" } }, "nbformat": 4, diff --git a/week5/day4.5.ipynb b/week5/day4.5.ipynb index 2cad1ed..ea3518c 100644 --- a/week5/day4.5.ipynb +++ b/week5/day4.5.ipynb @@ -27,6 +27,20 @@ "import gradio as gr" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "94a564ed-5cda-42d9-aada-2a5e85d02d15", + "metadata": {}, + "outputs": [], + "source": [ + "# install faiss-cpu!\n", + "# Mac users - this may fail if you don't have a recent version of MacOS\n", + "# In which case I recommend you skip this lab -- FAISS is not essential! (Or upgrade MacOS if you wish..)\n", + "\n", + "!pip install faiss-cpu" + ] + }, { "cell_type": "code", "execution_count": null, @@ -400,7 +414,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.12" + "version": "3.11.13" } }, "nbformat": 4, diff --git a/week6/day1.ipynb b/week6/day1.ipynb index 3035814..8202845 100644 --- a/week6/day1.ipynb +++ b/week6/day1.ipynb @@ -102,6 +102,18 @@ "%matplotlib inline" ] }, + { + "cell_type": "markdown", + "id": "cd6d801e-d195-45fe-898e-495dbcb19d7d", + "metadata": {}, + "source": [ + "## Load our dataset\n", + "\n", + "In the next cell, we load in the dataset from huggingface.\n", + "\n", + "If this gives you an error like \"trust_remote_code is no longer supported\", then please run this command in a new cell: `!pip install datasets==3.6.0` and then restart the Kernel, and try again." + ] + }, { "cell_type": "code", "execution_count": null, @@ -109,8 +121,6 @@ "metadata": {}, "outputs": [], "source": [ - "# Load in our dataset\n", - "\n", "dataset = load_dataset(\"McAuley-Lab/Amazon-Reviews-2023\", f\"raw_meta_Appliances\", split=\"full\", trust_remote_code=True)" ] }, @@ -429,7 +439,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.12" + "version": "3.11.13" } }, "nbformat": 4, diff --git a/week6/day2.ipynb b/week6/day2.ipynb index 7eb5c8b..d179bff 100644 --- a/week6/day2.ipynb +++ b/week6/day2.ipynb @@ -119,7 +119,7 @@ "source": [ "# Load in the same dataset as last time\n", "\n", - "items = ItemLoader(\"Appliances\").load()" + "items = ItemLoader(\"Home_and_Kitchen\").load()" ] }, { @@ -624,7 +624,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.12" + "version": "3.11.13" } }, "nbformat": 4, diff --git a/week6/day3.ipynb b/week6/day3.ipynb index 9e51979..2170e9d 100644 --- a/week6/day3.ipynb +++ b/week6/day3.ipynb @@ -918,7 +918,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.12" + "version": "3.11.13" } }, "nbformat": 4, diff --git a/week6/day4.ipynb b/week6/day4.ipynb index cb7058f..56885b5 100644 --- a/week6/day4.ipynb +++ b/week6/day4.ipynb @@ -398,7 +398,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.11" + "version": "3.11.13" } }, "nbformat": 4, diff --git a/week6/lite.ipynb b/week6/lite.ipynb index 502d959..1a30deb 100644 --- a/week6/lite.ipynb +++ b/week6/lite.ipynb @@ -427,7 +427,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.12" + "version": "3.11.13" } }, "nbformat": 4, diff --git a/week8/day3.ipynb b/week8/day3.ipynb index 9188717..6f42c0e 100644 --- a/week8/day3.ipynb +++ b/week8/day3.ipynb @@ -227,7 +227,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.11" + "version": "3.11.13" } }, "nbformat": 4, diff --git a/week8/day5.ipynb b/week8/day5.ipynb index 5e11432..d42d181 100644 --- a/week8/day5.ipynb +++ b/week8/day5.ipynb @@ -171,7 +171,7 @@ " If you're not fed up of product prices yet 😂 I've built this out some more!
\n", " If you look in my repo
tech2ai, in segment3/lab1 is a neural network implementation of the pricer in pure PyTorch. It does pretty well..
\n", " And if you look in my repo Agentic in the workshop folder is the same Agent project taken further. There's a new version of the PlanningAgent called AutonomousPlanningAgent that uses multiple Tools, and a MessagingAgent that uses claude-3.7 to write texts. The AutonomousPlanningAgent uses the fantastic OpenAI Agents SDK and the mighty MCP protocol from Anthropic.
\n", - " If you're intrigued by Agents and MCP, and would like to learn more, then I also have a companion course called the Complete Agentic AI Engineering Course that might interest you (if you haven't had enough of me by now!!)\n", + " If you're intrigued by Agents and MCP, and would like to learn more, then I also have a companion course called the Complete Agentic AI Engineering Course that might interest you (if you haven't had enough of me by now!!), and also another course for leaders and founders looking to build a valuable business with LLMs.\n", " \n", " \n", " \n", @@ -223,7 +223,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.12" + "version": "3.11.13" } }, "nbformat": 4, From b3a20de60708c6411c844db3f70c360c4225cb15 Mon Sep 17 00:00:00 2001 From: Edward Donner Date: Sat, 12 Jul 2025 15:28:08 -0400 Subject: [PATCH 24/25] Moved a community contribution to the right place --- .../vanshika-mahajan/web_summary_fashion.ipynb | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename {community => community-contributions}/vanshika-mahajan/web_summary_fashion.ipynb (100%) diff --git a/community/vanshika-mahajan/web_summary_fashion.ipynb b/community-contributions/vanshika-mahajan/web_summary_fashion.ipynb similarity index 100% rename from community/vanshika-mahajan/web_summary_fashion.ipynb rename to community-contributions/vanshika-mahajan/web_summary_fashion.ipynb From e6f086463a0e395119e9666c44db4e58fbe913e3 Mon Sep 17 00:00:00 2001 From: Edward Donner Date: Mon, 14 Jul 2025 14:19:34 -0400 Subject: [PATCH 25/25] Pinned protobuf version so that Chroma works --- environment.yml | 1 + requirements.txt | 1 + week2/day2.ipynb | 441 ++++++++++++++++++++++++++++++++++++++++++----- week5/day4.ipynb | 6 +- 4 files changed, 407 insertions(+), 42 deletions(-) diff --git a/environment.yml b/environment.yml index 7f31d58..470b64b 100644 --- a/environment.yml +++ b/environment.yml @@ -43,3 +43,4 @@ dependencies: - feedparser - twilio - pydub + - protobuf==3.20.2 diff --git a/requirements.txt b/requirements.txt index 1615af4..edcb3de 100644 --- a/requirements.txt +++ b/requirements.txt @@ -35,3 +35,4 @@ setuptools speedtest-cli sentence_transformers feedparser +protobuf==3.20.2 diff --git a/week2/day2.ipynb b/week2/day2.ipynb index 00a2840..9954ea7 100644 --- a/week2/day2.ipynb +++ b/week2/day2.ipynb @@ -16,7 +16,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "c44c5494-950d-4d2f-8d4f-b87b57c5b330", "metadata": {}, "outputs": [], @@ -35,7 +35,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "id": "d1715421-cead-400b-99af-986388a97aff", "metadata": {}, "outputs": [], @@ -45,10 +45,20 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "id": "337d5dfc-0181-4e3b-8ab9-e78e0c3f657b", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "OpenAI API Key exists and begins sk-proj-\n", + "Anthropic API Key exists and begins sk-ant-\n", + "Google API Key exists and begins AIzaSyA5\n" + ] + } + ], "source": [ "# Load environment variables in a file called .env\n", "# Print the key prefixes to help with any debugging\n", @@ -76,7 +86,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "id": "22586021-1795-4929-8079-63f5bb4edd4c", "metadata": {}, "outputs": [], @@ -92,7 +102,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "id": "b16e6021-6dc4-4397-985a-6679d6c8ffd5", "metadata": {}, "outputs": [], @@ -104,7 +114,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "id": "02ef9b69-ef31-427d-86d0-b8c799e1c1b1", "metadata": {}, "outputs": [], @@ -125,10 +135,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "id": "aef7d314-2b13-436b-b02d-8de3b72b193f", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "\"Today's date is October 10, 2023.\"" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# This can reveal the \"training cut off\", or the most recent date in the training data\n", "\n", @@ -145,7 +166,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "id": "bc664b7a-c01d-4fea-a1de-ae22cdd5141a", "metadata": {}, "outputs": [], @@ -159,20 +180,67 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "id": "083ea451-d3a0-4d13-b599-93ed49b975e4", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Shout has been called with input hello\n" + ] + }, + { + "data": { + "text/plain": [ + "'HELLO'" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "shout(\"hello\")" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "id": "08f1f15a-122e-4502-b112-6ee2817dda32", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "* Running on local URL: http://127.0.0.1:7860\n", + "* To create a public link, set `share=True` in `launch()`.\n" + ] + }, + { + "data": { + "text/html": [ + "
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# The simplicty of gradio. This might appear in \"light mode\" - I'll show you how to make this in dark mode later.\n", "\n", @@ -181,10 +249,41 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "id": "c9a359a4-685c-4c99-891c-bb4d1cb7f426", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "* Running on local URL: http://127.0.0.1:7861\n", + "* Running on public URL: https://c1f6ab5bdc2722c539.gradio.live\n", + "\n", + "This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)\n" + ] + }, + { + "data": { + "text/html": [ + "
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Adding share=True means that it can be accessed publically\n", "# A more permanent hosting is available using a platform called Spaces from HuggingFace, which we will touch on next week\n", @@ -195,10 +294,39 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "id": "cd87533a-ff3a-4188-8998-5bedd5ba2da3", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "* Running on local URL: http://127.0.0.1:7862\n", + "* To create a public link, set `share=True` in `launch()`.\n" + ] + }, + { + "data": { + "text/html": [ + "
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Adding inbrowser=True opens up a new browser window automatically\n", "\n", @@ -217,10 +345,39 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 13, "id": "e8129afa-532b-4b15-b93c-aa9cca23a546", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "* Running on local URL: http://127.0.0.1:7863\n", + "* To create a public link, set `share=True` in `launch()`.\n" + ] + }, + { + "data": { + "text/html": [ + "
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Define this variable and then pass js=force_dark_mode when creating the Interface\n", "\n", @@ -238,10 +395,39 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 15, "id": "3cc67b26-dd5f-406d-88f6-2306ee2950c0", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "* Running on local URL: http://127.0.0.1:7865\n", + "* To create a public link, set `share=True` in `launch()`.\n" + ] + }, + { + "data": { + "text/html": [ + "
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Inputs and Outputs\n", "\n", @@ -256,10 +442,39 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 16, "id": "f235288e-63a2-4341-935b-1441f9be969b", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "* Running on local URL: http://127.0.0.1:7866\n", + "* To create a public link, set `share=True` in `launch()`.\n" + ] + }, + { + "data": { + "text/html": [ + "
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# And now - changing the function from \"shout\" to \"message_gpt\"\n", "\n", @@ -274,10 +489,39 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 17, "id": "af9a3262-e626-4e4b-80b0-aca152405e63", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "* Running on local URL: http://127.0.0.1:7867\n", + "* To create a public link, set `share=True` in `launch()`.\n" + ] + }, + { + "data": { + "text/html": [ + "
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Let's use Markdown\n", "# Are you wondering why it makes any difference to set system_message when it's not referred to in the code below it?\n", @@ -297,7 +541,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 18, "id": "88c04ebf-0671-4fea-95c9-bc1565d4bb4f", "metadata": {}, "outputs": [], @@ -324,10 +568,39 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 19, "id": "0bb1f789-ff11-4cba-ac67-11b815e29d09", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "* Running on local URL: http://127.0.0.1:7868\n", + "* To create a public link, set `share=True` in `launch()`.\n" + ] + }, + { + "data": { + "text/html": [ + "
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "view = gr.Interface(\n", " fn=stream_gpt,\n", @@ -340,7 +613,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 20, "id": "bbc8e930-ba2a-4194-8f7c-044659150626", "metadata": {}, "outputs": [], @@ -364,10 +637,39 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 21, "id": "a0066ffd-196e-4eaf-ad1e-d492958b62af", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "* Running on local URL: http://127.0.0.1:7869\n", + "* To create a public link, set `share=True` in `launch()`.\n" + ] + }, + { + "data": { + "text/html": [ + "
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "view = gr.Interface(\n", " fn=stream_claude,\n", @@ -403,7 +705,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 22, "id": "0087623a-4e31-470b-b2e6-d8d16fc7bcf5", "metadata": {}, "outputs": [], @@ -420,10 +722,39 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 23, "id": "8d8ce810-997c-4b6a-bc4f-1fc847ac8855", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "* Running on local URL: http://127.0.0.1:7870\n", + "* To create a public link, set `share=True` in `launch()`.\n" + ] + }, + { + "data": { + "text/html": [ + "
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "view = gr.Interface(\n", " fn=stream_model,\n", @@ -466,7 +797,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 24, "id": "1626eb2e-eee8-4183-bda5-1591b58ae3cf", "metadata": {}, "outputs": [], @@ -494,7 +825,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 25, "id": "c701ec17-ecd5-4000-9f68-34634c8ed49d", "metadata": {}, "outputs": [], @@ -507,12 +838,13 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 28, "id": "5def90e0-4343-4f58-9d4a-0e36e445efa4", "metadata": {}, "outputs": [], "source": [ "def stream_brochure(company_name, url, model):\n", + " yield \"\"\n", " prompt = f\"Please generate a company brochure for {company_name}. Here is their landing page:\\n\"\n", " prompt += Website(url).get_contents()\n", " if model==\"GPT\":\n", @@ -526,10 +858,39 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 30, "id": "66399365-5d67-4984-9d47-93ed26c0bd3d", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "* Running on local URL: http://127.0.0.1:7873\n", + "* To create a public link, set `share=True` in `launch()`.\n" + ] + }, + { + "data": { + "text/html": [ + "
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "view = gr.Interface(\n", " fn=stream_brochure,\n", diff --git a/week5/day4.ipynb b/week5/day4.ipynb index d12a111..dd67e99 100644 --- a/week5/day4.ipynb +++ b/week5/day4.ipynb @@ -33,7 +33,9 @@ "cell_type": "code", "execution_count": null, "id": "802137aa-8a74-45e0-a487-d1974927d7ca", - "metadata": {}, + "metadata": { + "scrolled": true + }, "outputs": [], "source": [ "# imports for langchain\n", @@ -434,7 +436,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.12" + "version": "3.11.13" } }, "nbformat": 4,