Merge branch 'main' of github.com:ed-donner/llm_engineering

2024-12-22 10:30:57 +00:00
parent 26f1135ead d3b99db97f
commit f10a9793ad
17 changed files with 4806 additions and 2 deletions
--- a/week2/community-contributions/TTS_STT.ipynb
+++ b/week2/community-contributions/TTS_STT.ipynb
@@ -0,0 +1,196 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a60e0f78-4637-4318-9ab6-309c3f7f2799",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "import json\n",
+    "from dotenv import load_dotenv\n",
+    "from openai import OpenAI\n",
+    "\n",
+    "load_dotenv()\n",
+    "\n",
+    "openai_api_key = os.getenv('OPENAI_API_KEY')\n",
+    "if openai_api_key:\n",
+    "    print(\"API Key set\")\n",
+    "else:\n",
+    "    print(\"OpenAI API Key not set\")\n",
+    "    \n",
+    "MODEL = \"gpt-4o-mini\"\n",
+    "openai = OpenAI()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "67026ef0-23be-4101-9371-b11f96f505bf",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# TTS\n",
+    "\n",
+    "from pydub import AudioSegment\n",
+    "import os\n",
+    "import subprocess\n",
+    "from io import BytesIO\n",
+    "import tempfile\n",
+    "\n",
+    "# Set custom temp directory\n",
+    "custom_temp_dir = r\"D:\\projects\\llm_engineering-main\\temp\"\n",
+    "os.makedirs(custom_temp_dir, exist_ok=True)\n",
+    "\n",
+    "# Explicitly set FFmpeg paths\n",
+    "AudioSegment.converter = r\"D:\\Anaconda3\\envs\\llms\\Library\\bin\\ffmpeg.exe\"\n",
+    "AudioSegment.ffprobe = r\"D:\\Anaconda3\\envs\\llms\\Library\\bin\\ffprobe.exe\"\n",
+    "\n",
+    "def play_audio_with_ffplay(audio_segment, temp_dir):\n",
+    "    # Explicitly create and manage a temporary file\n",
+    "    temp_file_path = os.path.join(temp_dir, \"temp_output.wav\")\n",
+    "    \n",
+    "    # Export the audio to the temporary file\n",
+    "    audio_segment.export(temp_file_path, format=\"wav\")\n",
+    "    \n",
+    "    try:\n",
+    "        # Play the audio using ffplay\n",
+    "        subprocess.call([\"ffplay\", \"-nodisp\", \"-autoexit\", temp_file_path])\n",
+    "    finally:\n",
+    "        # Clean up the temporary file after playback\n",
+    "        if os.path.exists(temp_file_path):\n",
+    "            os.remove(temp_file_path)\n",
+    "\n",
+    "def talker(message):\n",
+    "    # Mocked OpenAI response for testing\n",
+    "    response = openai.audio.speech.create(\n",
+    "        model=\"tts-1\",\n",
+    "        voice=\"nova\",\n",
+    "        input=message\n",
+    "    )\n",
+    "    \n",
+    "    # Handle audio stream\n",
+    "    audio_stream = BytesIO(response.content)\n",
+    "    audio = AudioSegment.from_file(audio_stream, format=\"mp3\")\n",
+    "    \n",
+    "    # Play the audio\n",
+    "    play_audio_with_ffplay(audio, custom_temp_dir)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "12c66b44-293a-4bf9-b81e-0f6905fbf607",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# STT Whisper\n",
+    "\n",
+    "import whisper\n",
+    "import sounddevice as sd\n",
+    "import numpy as np\n",
+    "from scipy.io.wavfile import write\n",
+    "\n",
+    "def record_audio(temp_dir, duration=5, samplerate=16000, device_id=2):\n",
+    "    # print(f\"Recording for {duration} seconds...\")\n",
+    "    sd.default.device = (device_id, None)\n",
+    "    audio = sd.rec(int(duration * samplerate), samplerate=samplerate, channels=1, dtype=\"int16\")\n",
+    "    sd.wait()  # Wait until the recording is finished\n",
+    "    \n",
+    "    audio_path = os.path.join(temp_dir, \"mic_input.wav\")\n",
+    "    write(audio_path, samplerate, audio)\n",
+    "    # print(f\"Audio recorded and saved to {audio_path}\")\n",
+    "\n",
+    "    return audio_path\n",
+    "\n",
+    "\n",
+    "whisper_model = whisper.load_model(\"base\")\n",
+    "def transcribe_audio(audio_path):    \n",
+    "    # print(\"Transcribing audio...\")\n",
+    "    result = whisper_model.transcribe(audio_path, language=\"en\")\n",
+    "    return result[\"text\"]\n",
+    "\n",
+    "def mic_to_text():\n",
+    "    audio_path = record_audio(custom_temp_dir, duration=10)\n",
+    "    transcription = transcribe_audio(audio_path)\n",
+    "    # print(f\"Transcription: {transcription}\")\n",
+    "    return transcription"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0156c106-1844-444a-9a22-88c3475805d9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Chat Functions\n",
+    "\n",
+    "import requests\n",
+    "history = [{\"role\": \"system\", \"content\": \"You are Nova the friendly robot. Reply within couple of sentences.\"}]\n",
+    "\n",
+    "def run_chat():\n",
+    "    running = True\n",
+    "    while running:\n",
+    "        input_text = input(\"press Enter to talk\")        \n",
+    "        user_input = input_text if input_text.strip() else mic_to_text()\n",
+    "        running = False if input_text == \"bye\" or user_input.strip() == \"bye\" else True\n",
+    "        print(f\"\\nYou: {user_input}\\n\\n\")\n",
+    "        history.append({\"role\": \"user\", \"content\": user_input})    \n",
+    "        api_run = requests.post(\n",
+    "            \"http://localhost:11434/api/chat\", \n",
+    "            json={\n",
+    "                \"model\": \"llama3.2\",\n",
+    "                \"messages\": history,\n",
+    "                \"stream\": False\n",
+    "            }, \n",
+    "            headers={\"Content-Type\": \"application/json\"}\n",
+    "        )\n",
+    "        output_message = api_run.json()['message']['content']\n",
+    "        print(f\"Nova: {output_message}\\n\\n\")        \n",
+    "        talker(output_message)\n",
+    "        history.append({\"role\": \"assistant\", \"content\": output_message})"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "de61b54e-387e-4480-a592-c78e3245ddde",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "run_chat()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ce16bee7-6ea6-46d5-a407-385e6ae31db8",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.11"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
--- a/week2/community-contributions/day3.upsell.ipynb
+++ b/week2/community-contributions/day3.upsell.ipynb
@@ -0,0 +1,355 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "75e2ef28-594f-4c18-9d22-c6b8cd40ead2",
+   "metadata": {},
+   "source": [
+    "# Day 3 - Conversational AI - aka Chatbot!"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "70e39cd8-ec79-4e3e-9c26-5659d42d0861",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# imports\n",
+    "\n",
+    "import os\n",
+    "from dotenv import load_dotenv\n",
+    "from openai import OpenAI\n",
+    "import gradio as gr"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "231605aa-fccb-447e-89cf-8b187444536a",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "OpenAI API Key exists and begins sk-proj-\n",
+      "Anthropic API Key exists and begins sk-ant-\n",
+      "Google API Key exists and begins AIzaSyA-\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Load environment variables in a file called .env\n",
+    "# Print the key prefixes to help with any debugging\n",
+    "\n",
+    "load_dotenv()\n",
+    "openai_api_key = os.getenv('OPENAI_API_KEY')\n",
+    "anthropic_api_key = os.getenv('ANTHROPIC_API_KEY')\n",
+    "google_api_key = os.getenv('GOOGLE_API_KEY')\n",
+    "\n",
+    "if openai_api_key:\n",
+    "    print(f\"OpenAI API Key exists and begins {openai_api_key[:8]}\")\n",
+    "else:\n",
+    "    print(\"OpenAI API Key not set\")\n",
+    "    \n",
+    "if anthropic_api_key:\n",
+    "    print(f\"Anthropic API Key exists and begins {anthropic_api_key[:7]}\")\n",
+    "else:\n",
+    "    print(\"Anthropic API Key not set\")\n",
+    "\n",
+    "if google_api_key:\n",
+    "    print(f\"Google API Key exists and begins {google_api_key[:8]}\")\n",
+    "else:\n",
+    "    print(\"Google API Key not set\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "6541d58e-2297-4de1-b1f7-77da1b98b8bb",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Initialize\n",
+    "\n",
+    "openai = OpenAI()\n",
+    "MODEL = 'gpt-4o-mini'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "e16839b5-c03b-4d9d-add6-87a0f6f37575",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "system_message = \"You are a helpful assistant\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "98e97227-f162-4d1a-a0b2-345ff248cbe7",
+   "metadata": {},
+   "source": [
+    "# Please read this! A change from the video:\n",
+    "\n",
+    "In the video, I explain how we now need to write a function called:\n",
+    "\n",
+    "`chat(message, history)`\n",
+    "\n",
+    "Which expects to receive `history` in a particular format, which we need to map to the OpenAI format before we call OpenAI:\n",
+    "\n",
+    "```\n",
+    "[\n",
+    "    {\"role\": \"system\", \"content\": \"system message here\"},\n",
+    "    {\"role\": \"user\", \"content\": \"first user prompt here\"},\n",
+    "    {\"role\": \"assistant\", \"content\": \"the assistant's response\"},\n",
+    "    {\"role\": \"user\", \"content\": \"the new user prompt\"},\n",
+    "]\n",
+    "```\n",
+    "\n",
+    "But Gradio has been upgraded! Now it will pass in `history` in the exact OpenAI format, perfect for us to send straight to OpenAI.\n",
+    "\n",
+    "So our work just got easier!\n",
+    "\n",
+    "We will write a function `chat(message, history)` where:  \n",
+    "**message** is the prompt to use  \n",
+    "**history** is the past conversation, in OpenAI format  \n",
+    "\n",
+    "We will combine the system message, history and latest message, then call OpenAI."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "1eacc8a4-4b48-4358-9e06-ce0020041bc1",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Simpler than in my video - we can easily create this function that calls OpenAI\n",
+    "# It's now just 1 line of code to prepare the input to OpenAI!\n",
+    "\n",
+    "def chat(message, history):\n",
+    "    messages = [{\"role\": \"system\", \"content\": system_message}] + history + [{\"role\": \"user\", \"content\": message}]\n",
+    "\n",
+    "    print(\"History is:\")\n",
+    "    print(history)\n",
+    "    print(\"And messages is:\")\n",
+    "    print(messages)\n",
+    "\n",
+    "    stream = openai.chat.completions.create(model=MODEL, messages=messages, stream=True)\n",
+    "\n",
+    "    response = \"\"\n",
+    "    for chunk in stream:\n",
+    "        response += chunk.choices[0].delta.content or ''\n",
+    "        yield response"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "1334422a-808f-4147-9c4c-57d63d9780d0",
+   "metadata": {},
+   "source": [
+    "## And then enter Gradio's magic!"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0866ca56-100a-44ab-8bd0-1568feaf6bf2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "gr.ChatInterface(fn=chat, type=\"messages\").launch()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "1f91b414-8bab-472d-b9c9-3fa51259bdfe",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "system_message = \"You are a helpful assistant in a clothes store. You should try to gently encourage \\\n",
+    "the customer to try items that are on sale. Hats are 60% off, and most other items are 50% off. \\\n",
+    "For example, if the customer says 'I'm looking to buy a hat', \\\n",
+    "you could reply something like, 'Wonderful - we have lots of hats - including several that are part of our sales evemt.'\\\n",
+    "Encourage the customer to buy hats if they are unsure what to get.\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "4e5be3ec-c26c-42bc-ac16-c39d369883f6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def chat(message, history):\n",
+    "    messages = [{\"role\": \"system\", \"content\": system_message}] + history + [{\"role\": \"user\", \"content\": message}]\n",
+    "\n",
+    "    stream = openai.chat.completions.create(model=MODEL, messages=messages, stream=True)\n",
+    "\n",
+    "    response = \"\"\n",
+    "    for chunk in stream:\n",
+    "        response += chunk.choices[0].delta.content or ''\n",
+    "        yield response"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "413e9e4e-7836-43ac-a0c3-e1ab5ed6b136",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "gr.ChatInterface(fn=chat, type=\"messages\").launch()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "d75f0ffa-55c8-4152-b451-945021676837",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "system_message += \"\\nIf the customer asks for shoes, you should respond that shoes are not on sale today, \\\n",
+    "but remind the customer to look at hats!\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c602a8dd-2df7-4eb7-b539-4e01865a6351",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "gr.ChatInterface(fn=chat, type=\"messages\").launch()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "id": "0a987a66-1061-46d6-a83a-a30859dc88bf",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Fixed a bug in this function brilliantly identified by student Gabor M.!\n",
+    "# I've also improved the structure of this function\n",
+    "# Paul Goodwin added \"Buy One get one free offer\" for a bit of fun\n",
+    "\n",
+    "def chat(message, history):\n",
+    "\n",
+    "    relevant_system_message = system_message\n",
+    "    keywords = ['discount', 'offer', 'promotion']  # Define words that imply customer is looking for a better deal\n",
+    "\n",
+    "    if 'belt' in message.strip().lower():\n",
+    "        relevant_system_message += (\n",
+    "            \" The store does not sell belts; if you are asked for belts, be sure to point out other items on sale.\"\n",
+    "        )\n",
+    "    elif any(word in message.strip().lower() for word in keywords):  # Use elif for clarity\n",
+    "        relevant_system_message += (\n",
+    "            \" If the customer asks for more money off the selling price, the store is currently running 'buy 2 get one free' campaign, so be sure to mention this.\"\n",
+    "        )\n",
+    "\n",
+    "    messages = [{\"role\": \"system\", \"content\": relevant_system_message}] + history + [{\"role\": \"user\", \"content\": message}]\n",
+    "\n",
+    "    stream = openai.chat.completions.create(model=MODEL, messages=messages, stream=True)\n",
+    "\n",
+    "    response = \"\"\n",
+    "    for chunk in stream:\n",
+    "        response += chunk.choices[0].delta.content or ''\n",
+    "        yield response"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "id": "20570de2-eaad-42cc-a92c-c779d71b48b6",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "* Running on local URL:  http://127.0.0.1:7862\n",
+      "\n",
+      "To create a public link, set `share=True` in `launch()`.\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div><iframe src=\"http://127.0.0.1:7862/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/plain": []
+     },
+     "execution_count": 14,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "gr.ChatInterface(fn=chat, type=\"messages\").launch()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "82a57ee0-b945-48a7-a024-01b56a5d4b3e",
+   "metadata": {},
+   "source": [
+    "<table style=\"margin: 0; text-align: left;\">\n",
+    "    <tr>\n",
+    "        <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
+    "            <img src=\"../business.jpg\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
+    "        </td>\n",
+    "        <td>\n",
+    "            <h2 style=\"color:#181;\">Business Applications</h2>\n",
+    "            <span style=\"color:#181;\">Conversational Assistants are of course a hugely common use case for Gen AI, and the latest frontier models are remarkably good at nuanced conversation. And Gradio makes it easy to have a user interface. Another crucial skill we covered is how to use prompting to provide context, information and examples.\n",
+    "<br/><br/>\n",
+    "Consider how you could apply an AI Assistant to your business, and make yourself a prototype. Use the system prompt to give context on your business, and set the tone for the LLM.</span>\n",
+    "        </td>\n",
+    "    </tr>\n",
+    "</table>"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6dfb9e21-df67-4c2b-b952-5e7e7961b03d",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.10"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
--- a/week2/community-contributions/week2_multimodal_chatbot_with_audio.ipynb
+++ b/week2/community-contributions/week2_multimodal_chatbot_with_audio.ipynb
@@ -0,0 +1,475 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "ad900e1c-b4a9-4f05-93d5-e364fae208dd",
+   "metadata": {},
+   "source": [
+    "# Multimodal Expert Tutor\n",
+    "\n",
+    "An AI assistant which leverages expertise from other sources for you.\n",
+    "\n",
+    "Features:\n",
+    "- Multimodal\n",
+    "- Uses tools\n",
+    "- Streams responses\n",
+    "- Reads out the responses after streaming\n",
+    "- Coverts voice to text during input\n",
+    "\n",
+    "Scope for Improvement\n",
+    "- Read response faster (as streaming starts)\n",
+    "- code optimization\n",
+    "- UI enhancements\n",
+    "- Make it more real time"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "c1070317-3ed9-4659-abe3-828943230e03",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# imports\n",
+    "\n",
+    "import os\n",
+    "import json\n",
+    "from dotenv import load_dotenv\n",
+    "from IPython.display import Markdown, display, update_display\n",
+    "from openai import OpenAI\n",
+    "import gradio as gr\n",
+    "import google.generativeai\n",
+    "import anthropic"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4a456906-915a-4bfd-bb9d-57e505c5093f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# constants\n",
+    "\n",
+    "MODEL_GPT = 'gpt-4o-mini'\n",
+    "MODEL_CLAUDE = 'claude-3-5-sonnet-20240620'\n",
+    "MODEL_GEMINI = 'gemini-1.5-flash'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a8d7923c-5f28-4c30-8556-342d7c8497c1",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# set up environment\n",
+    "\n",
+    "load_dotenv()\n",
+    "os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY', 'your-key-if-not-using-env')\n",
+    "os.environ['ANTHROPIC_API_KEY'] = os.getenv('ANTHROPIC_API_KEY', 'your-key-if-not-using-env')\n",
+    "os.environ['GOOGLE_API_KEY'] = os.getenv('GOOGLE_API_KEY', 'your-key-if-not-using-env')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a6fd8538-0be6-4539-8add-00e42133a641",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Connect to OpenAI, Anthropic and Google\n",
+    "\n",
+    "openai = OpenAI()\n",
+    "\n",
+    "claude = anthropic.Anthropic()\n",
+    "\n",
+    "google.generativeai.configure()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "852faee9-79aa-4741-a676-4f5145ccccdc",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import tempfile\n",
+    "import subprocess\n",
+    "from io import BytesIO\n",
+    "from pydub import AudioSegment\n",
+    "import time\n",
+    "\n",
+    "def play_audio(audio_segment):\n",
+    "    temp_dir = tempfile.gettempdir()\n",
+    "    temp_path = os.path.join(temp_dir, \"temp_audio.wav\")\n",
+    "    try:\n",
+    "        audio_segment.export(temp_path, format=\"wav\")\n",
+    "        subprocess.call([\n",
+    "            \"ffplay\",\n",
+    "            \"-nodisp\",\n",
+    "            \"-autoexit\",\n",
+    "            \"-hide_banner\",\n",
+    "            temp_path\n",
+    "        ], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)\n",
+    "    finally:\n",
+    "        try:\n",
+    "            os.remove(temp_path)\n",
+    "        except Exception:\n",
+    "            pass\n",
+    " \n",
+    "def talker(message):\n",
+    "    response = openai.audio.speech.create(\n",
+    "        model=\"tts-1\",\n",
+    "        voice=\"onyx\",  # Also, try replacing onyx with alloy\n",
+    "        input=message\n",
+    "    )\n",
+    "    audio_stream = BytesIO(response.content)\n",
+    "    audio = AudioSegment.from_file(audio_stream, format=\"mp3\")\n",
+    "    play_audio(audio)\n",
+    "\n",
+    "talker(\"Well hi there\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8595807b-8ae2-4e1b-95d9-e8532142e8bb",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# prompts\n",
+    "general_prompt = \"Please be as technical as possible with your answers.\\\n",
+    "Only answer questions about topics you have expertise in.\\\n",
+    "If you do not know something say so.\"\n",
+    "\n",
+    "additional_prompt_gpt = \"Analyze the user query and determine if the content is primarily related to \\\n",
+    "coding, software engineering, data science and LLMs. \\\n",
+    "If so please answer it yourself else if it is primarily related to \\\n",
+    "physics, chemistry or biology get answers from tool ask_gemini or \\\n",
+    "if it belongs to subject related to finance, business or economics get answers from tool ask_claude.\"\n",
+    "\n",
+    "system_prompt_gpt = \"You are a helpful technical tutor who is an expert in \\\n",
+    "coding, software engineering, data science and LLMs.\"+ additional_prompt_gpt + general_prompt\n",
+    "system_prompt_gemini = \"You are a helpful technical tutor who is an expert in physics, chemistry and biology.\" + general_prompt\n",
+    "system_prompt_claude = \"You are a helpful technical tutor who is an expert in finance, business and economics.\" + general_prompt\n",
+    "\n",
+    "def get_user_prompt(question):\n",
+    "    return \"Please give a detailed explanation to the following question: \" + question"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "24d4a313-60b0-4696-b455-6cfef95ad2fe",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def call_claude(question):\n",
+    "    result = claude.messages.create(\n",
+    "        model=MODEL_CLAUDE,\n",
+    "        max_tokens=200,\n",
+    "        temperature=0.7,\n",
+    "        system=system_prompt_claude,\n",
+    "        messages=[\n",
+    "            {\"role\": \"user\", \"content\": get_user_prompt(question)},\n",
+    "        ],\n",
+    "    )\n",
+    "    \n",
+    "    return result.content[0].text"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "cd5d5345-54ab-470b-9b5b-5611a7981458",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def call_gemini(question):\n",
+    "    gemini = google.generativeai.GenerativeModel(\n",
+    "        model_name=MODEL_GEMINI,\n",
+    "        system_instruction=system_prompt_gemini\n",
+    "    )\n",
+    "    response = gemini.generate_content(get_user_prompt(question))\n",
+    "    response = response.text\n",
+    "    return response"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6f74da8f-56d1-405e-bc81-040f5428d296",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# tools and functions\n",
+    "\n",
+    "def ask_claude(question):\n",
+    "    print(f\"Tool ask_claude called for {question}\")\n",
+    "    return call_claude(question)\n",
+    "def ask_gemini(question):\n",
+    "    print(f\"Tool ask_gemini called for {question}\")\n",
+    "    return call_gemini(question)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c469304d-99b4-42ee-ab02-c9216b61594b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ask_claude_function = {\n",
+    "    \"name\": \"ask_claude\",\n",
+    "    \"description\": \"Get the answer to the question related to a topic this agent is faimiliar with. Call this whenever you need to answer something related to finance, marketing, sales or business in general.For example 'What is gross margin' or 'Explain stock market'\",\n",
+    "    \"parameters\": {\n",
+    "        \"type\": \"object\",\n",
+    "        \"properties\": {\n",
+    "            \"question_for_topic\": {\n",
+    "                \"type\": \"string\",\n",
+    "                \"description\": \"The question which is related to finance, business or economics.\",\n",
+    "            },\n",
+    "        },\n",
+    "        \"required\": [\"question_for_topic\"],\n",
+    "        \"additionalProperties\": False\n",
+    "    }\n",
+    "}\n",
+    "\n",
+    "ask_gemini_function = {\n",
+    "    \"name\": \"ask_gemini\",\n",
+    "    \"description\": \"Get the answer to the question related to a topic this agent is faimiliar with. Call this whenever you need to answer something related to physics, chemistry or biology.Few examples: 'What is gravity','How do rockets work?', 'What is ATP'\",\n",
+    "    \"parameters\": {\n",
+    "        \"type\": \"object\",\n",
+    "        \"properties\": {\n",
+    "            \"question_for_topic\": {\n",
+    "                \"type\": \"string\",\n",
+    "                \"description\": \"The question which is related to physics, chemistry or biology\",\n",
+    "            },\n",
+    "        },\n",
+    "        \"required\": [\"question_for_topic\"],\n",
+    "        \"additionalProperties\": False\n",
+    "    }\n",
+    "}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "73a60096-c49b-401f-bfd3-d1d40f4563d2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "tools = [{\"type\": \"function\", \"function\": ask_claude_function},\n",
+    "        {\"type\": \"function\", \"function\": ask_gemini_function}]\n",
+    "tools_functions_map = {\n",
+    "    \"ask_claude\":ask_claude,\n",
+    "    \"ask_gemini\":ask_gemini\n",
+    "}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9d54e758-42b2-42f2-a8eb-49c35d44acc6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def chat(history):\n",
+    "    messages = [{\"role\": \"system\", \"content\": system_prompt_gpt}] + history\n",
+    "    stream = openai.chat.completions.create(model=MODEL_GPT, messages=messages, tools=tools, stream=True)\n",
+    "    \n",
+    "    full_response = \"\"\n",
+    "    history += [{\"role\":\"assistant\", \"content\":full_response}]\n",
+    "    \n",
+    "    tool_call_accumulator = \"\"  # Accumulator for JSON fragments of tool call arguments\n",
+    "    tool_call_id = None  # Current tool call ID\n",
+    "    tool_call_function_name = None # Function name\n",
+    "    tool_calls = []  # List to store complete tool calls\n",
+    "\n",
+    "    for chunk in stream:\n",
+    "        if chunk.choices[0].delta.content:\n",
+    "            full_response += chunk.choices[0].delta.content or \"\"\n",
+    "            history[-1]['content']=full_response\n",
+    "            yield history\n",
+    "        \n",
+    "        if chunk.choices[0].delta.tool_calls:\n",
+    "            message = chunk.choices[0].delta\n",
+    "            for tc in chunk.choices[0].delta.tool_calls:\n",
+    "                if tc.id:  # New tool call detected here\n",
+    "                    tool_call_id = tc.id\n",
+    "                    if tool_call_function_name is None:\n",
+    "                        tool_call_function_name = tc.function.name\n",
+    "                \n",
+    "                tool_call_accumulator += tc.function.arguments if tc.function.arguments else \"\"\n",
+    "                \n",
+    "                # When the accumulated JSON string seems complete then:\n",
+    "                try:\n",
+    "                    func_args = json.loads(tool_call_accumulator)\n",
+    "                    \n",
+    "                    # Handle tool call and get response\n",
+    "                    tool_response, tool_call = handle_tool_call(tool_call_function_name, func_args, tool_call_id)\n",
+    "                    \n",
+    "                    tool_calls.append(tool_call)\n",
+    "\n",
+    "                    # Add tool call and tool response to messages this is required by openAI api\n",
+    "                    messages.append({\n",
+    "                        \"role\": \"assistant\",\n",
+    "                        \"tool_calls\": tool_calls\n",
+    "                    })\n",
+    "                    messages.append(tool_response)\n",
+    "                    \n",
+    "                    # Create new response with full context\n",
+    "                    response = openai.chat.completions.create(\n",
+    "                        model=MODEL_GPT, \n",
+    "                        messages=messages, \n",
+    "                        stream=True\n",
+    "                    )\n",
+    "                    \n",
+    "                    # Reset and accumulate new full response\n",
+    "                    full_response = \"\"\n",
+    "                    for chunk in response:\n",
+    "                        if chunk.choices[0].delta.content:\n",
+    "                            full_response += chunk.choices[0].delta.content or \"\"\n",
+    "                            history[-1]['content'] = full_response\n",
+    "                            yield history\n",
+    "                    \n",
+    "                    # Reset tool call accumulator and related variables\n",
+    "                    tool_call_accumulator = \"\"\n",
+    "                    tool_call_id = None\n",
+    "                    tool_call_function_name = None\n",
+    "                    tool_calls = []\n",
+    "\n",
+    "                except json.JSONDecodeError:\n",
+    "                    # Incomplete JSON; continue accumulating\n",
+    "                    pass\n",
+    "\n",
+    "    # trigger text-to-audio once full response available\n",
+    "    talker(full_response)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "515d3774-cc2c-44cd-af9b-768a63ed90dc",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# We have to write that function handle_tool_call:\n",
+    "def handle_tool_call(function_name, arguments, tool_call_id):\n",
+    "    question = arguments.get('question_for_topic')\n",
+    " \n",
+    "    # Prepare tool call information\n",
+    "    tool_call = {\n",
+    "        \"id\": tool_call_id,\n",
+    "        \"type\": \"function\",\n",
+    "        \"function\": {\n",
+    "            \"name\": function_name,\n",
+    "            \"arguments\": json.dumps(arguments)\n",
+    "        }\n",
+    "    }\n",
+    "    \n",
+    "    if function_name in tools_functions_map:\n",
+    "        answer = tools_functions_map[function_name](question)\n",
+    "        response = {\n",
+    "            \"role\": \"tool\",\n",
+    "            \"content\": json.dumps({\"question\": question, \"answer\" : answer}),\n",
+    "            \"tool_call_id\": tool_call_id\n",
+    "        }\n",
+    "\n",
+    "        return response, tool_call"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5d7cc622-8635-4693-afa3-b5bcc2f9a63d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def transcribe_audio(audio_file_path):\n",
+    "    try:\n",
+    "        audio_file = open(audio_file_path, \"rb\")\n",
+    "        response = openai.audio.transcriptions.create(model=\"whisper-1\", file=audio_file)        \n",
+    "        return response.text\n",
+    "    except Exception as e:\n",
+    "        return f\"An error occurred: {e}\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4ded9b3f-83e1-4971-9714-4894f2982b5a",
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "with gr.Blocks() as ui:\n",
+    "    with gr.Row():\n",
+    "        chatbot = gr.Chatbot(height=500, type=\"messages\", label=\"Multimodal Technical Expert Chatbot\")\n",
+    "    with gr.Row():\n",
+    "        entry = gr.Textbox(label=\"Ask our technical expert anything:\")\n",
+    "        audio_input = gr.Audio(\n",
+    "            sources=\"microphone\", \n",
+    "            type=\"filepath\",\n",
+    "            label=\"Record audio\",\n",
+    "            editable=False,\n",
+    "            waveform_options=gr.WaveformOptions(\n",
+    "                show_recording_waveform=False,\n",
+    "            ),\n",
+    "        )\n",
+    "\n",
+    "        # Add event listener for audio stop recording and show text on input area\n",
+    "        audio_input.stop_recording(\n",
+    "            fn=transcribe_audio, \n",
+    "            inputs=audio_input, \n",
+    "            outputs=entry\n",
+    "        )\n",
+    "            \n",
+    "    with gr.Row():\n",
+    "        clear = gr.Button(\"Clear\")\n",
+    "\n",
+    "    def do_entry(message, history):\n",
+    "        history += [{\"role\":\"user\", \"content\":message}]\n",
+    "        yield \"\", history\n",
+    "        \n",
+    "    entry.submit(do_entry, inputs=[entry, chatbot], outputs=[entry,chatbot]).then(\n",
+    "        chat, inputs=chatbot, outputs=chatbot)\n",
+    "    \n",
+    "    clear.click(lambda: None, inputs=None, outputs=chatbot, queue=False)\n",
+    "\n",
+    "ui.launch(inbrowser=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "532cb948-7733-4323-b85f-febfe2631e66",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.11"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
--- a/week2/day4.ipynb
+++ b/week2/day4.ipynb
@@ -214,7 +214,7 @@
    "    response = {\n",
    "        \"role\": \"tool\",\n",
    "        \"content\": json.dumps({\"destination_city\": city,\"price\": price}),\n",
-    "        \"tool_call_id\": message.tool_calls[0].id\n",
+    "        \"tool_call_id\": tool_call.id\n",
    "    }\n",
    "    return response, city"
   ]