Merge pull request #311 from KhadatkarSameer/community-contributions-branch

Added my contributions to community-contributions
2025-04-11 18:46:47 -04:00
parent ef948f4cff b31cad470b
commit abb462e53c
1 changed files with 326 additions and 0 deletions
--- a/week2/community-contributions/Airlines_Chatbot_with_Audio_Input.ipynb
+++ b/week2/community-contributions/Airlines_Chatbot_with_Audio_Input.ipynb
@@ -0,0 +1,326 @@
 {
 "cells": [
  {
   "cell_type": "markdown",
   "id": "d006b2ea-9dfe-49c7-88a9-a5a0775185fd",
   "metadata": {},
   "source": [
    "# Project to take Audio Input to the Airlines ChatBot"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a07e7793-b8f5-44f4-aded-5562f633271a",
   "metadata": {},
   "outputs": [],
   "source": [
    "# imports\n",
    "\n",
    "import os\n",
    "import json\n",
    "from dotenv import load_dotenv\n",
    "from openai import OpenAI\n",
    "import gradio as gr\n",
    "import base64\n",
    "from io import BytesIO\n",
    "from PIL import Image\n",
    "from IPython.display import Audio, display"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9e2315a3-f80c-4d3f-8073-f5b61d709564",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Initialization\n",
    "\n",
    "load_dotenv(override=True)\n",
    "\n",
    "openai_api_key = os.getenv('OPENAI_API_KEY')\n",
    "if openai_api_key:\n",
    "    print(f\"OpenAI API Key exists and begins {openai_api_key[:8]}\")\n",
    "else:\n",
    "    print(\"OpenAI API Key not set\")\n",
    "    \n",
    "MODEL = \"gpt-4o-mini\"\n",
    "openai = OpenAI()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "40da9de1-b350-49de-8acd-052f40ce5611",
   "metadata": {},
   "outputs": [],
   "source": [
    "system_message = \"You are a helpful assistant for an Airline called FlightAI. \"\n",
    "system_message += \"Give short, courteous answers, no more than 1 sentence. \"\n",
    "system_message += \"Always be accurate. If you don't know the answer, say so.\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5537635c-a60d-4983-8018-375c6a912e19",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Let's start by making a useful function\n",
    "\n",
    "ticket_prices = {\"london\": \"$799\", \"paris\": \"$899\", \"tokyo\": \"$1400\", \"berlin\": \"$499\"}\n",
    "\n",
    "def get_ticket_price(destination_city):\n",
    "    print(f\"Tool get_ticket_price called for {destination_city}\")\n",
    "    city = destination_city.lower()\n",
    "    return ticket_prices.get(city, \"Unknown\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c7132dd0-8788-4885-a415-d59664f68fd8",
   "metadata": {},
   "outputs": [],
   "source": [
    "# There's a particular dictionary structure that's required to describe our function:\n",
    "\n",
    "price_function = {\n",
    "    \"name\": \"get_ticket_price\",\n",
    "    \"description\": \"Get the price of a return ticket to the destination city. Call this whenever you need to know the ticket price, for example when a customer asks 'How much is a ticket to this city'\",\n",
    "    \"parameters\": {\n",
    "        \"type\": \"object\",\n",
    "        \"properties\": {\n",
    "            \"destination_city\": {\n",
    "                \"type\": \"string\",\n",
    "                \"description\": \"The city that the customer wants to travel to\",\n",
    "            },\n",
    "        },\n",
    "        \"required\": [\"destination_city\"],\n",
    "        \"additionalProperties\": False\n",
    "    }\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "7703ca0c-5da4-4641-bcb1-7727d1b2f2bf",
   "metadata": {},
   "outputs": [],
   "source": [
    "# And this is included in a list of tools:\n",
    "\n",
    "tools = [{\"type\": \"function\", \"function\": price_function}]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "29ce724b-d998-4c3f-bc40-6b8576c0fd34",
   "metadata": {},
   "outputs": [],
   "source": [
    "# We have to write that function handle_tool_call:\n",
    "\n",
    "def handle_tool_call(message):\n",
    "    tool_call = message.tool_calls[0]\n",
    "    arguments = json.loads(tool_call.function.arguments)\n",
    "    city = arguments.get('destination_city')\n",
    "    price = get_ticket_price(city)\n",
    "    response = {\n",
    "        \"role\": \"tool\",\n",
    "        \"content\": json.dumps({\"destination_city\": city,\"price\": price}),\n",
    "        \"tool_call_id\": tool_call.id\n",
    "    }\n",
    "    return response, city"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "931d0565-b01d-4aa8-bd18-72bafff8fb3b",
   "metadata": {},
   "outputs": [],
   "source": [
    "def artist(city):\n",
    "    image_response = openai.images.generate(\n",
    "            model=\"dall-e-3\",\n",
    "            prompt=f\"An image representing a vacation in {city}, showing tourist spots and everything unique about {city}, in a vibrant pop-art style\",\n",
    "            size=\"1024x1024\",\n",
    "            n=1,\n",
    "            response_format=\"b64_json\",\n",
    "        )\n",
    "    image_base64 = image_response.data[0].b64_json\n",
    "    image_data = base64.b64decode(image_base64)\n",
    "    return Image.open(BytesIO(image_data))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "fa165f7f-9796-4513-b923-2fa0b0b9ddd8",
   "metadata": {},
   "outputs": [],
   "source": [
    "import base64\n",
    "from io import BytesIO\n",
    "from PIL import Image\n",
    "from IPython.display import Audio, display\n",
    "\n",
    "def talker(message):\n",
    "    response = openai.audio.speech.create(\n",
    "        model=\"tts-1\",\n",
    "        voice=\"onyx\",\n",
    "        input=message)\n",
    "\n",
    "    audio_stream = BytesIO(response.content)\n",
    "    output_filename = \"output_audio.mp3\"\n",
    "    with open(output_filename, \"wb\") as f:\n",
    "        f.write(audio_stream.read())\n",
    "\n",
    "    # Play the generated audio\n",
    "    display(Audio(output_filename, autoplay=True))\n",
    "\n",
    "talker(\"Well, hi there\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b512d4ff-0f7b-4148-b161-4ee0ebf14776",
   "metadata": {},
   "outputs": [],
   "source": [
    "def transcribe_audio(audio_file):\n",
    "    with open(audio_file, \"rb\") as f:\n",
    "        transcript = openai.audio.transcriptions.create(\n",
    "            model=\"whisper-1\",\n",
    "            file=f\n",
    "        )\n",
    "    return transcript.text"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c3852570-fb26-4507-a001-f50fd94b7655",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Translate between languages using GPT\n",
    "def translate(text, source_lang, target_lang):\n",
    "    translation_prompt = (\n",
    "        f\"Translate the following text from {source_lang} to {target_lang}:\\n\\n{text}\"\n",
    "    )\n",
    "    response = openai.chat.completions.create(\n",
    "        model=\"gpt-3.5-turbo\",\n",
    "        messages=[{\"role\": \"user\", \"content\": translation_prompt}]\n",
    "    )\n",
    "    return response.choices[0].message.content.strip()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3d75abc2-870e-48af-a8fe-8dd463418b3d",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Chatbot logic: handle both text and audio input\n",
    "def chatbot_dual(history):\n",
    "    messages = [{\"role\": \"system\", \"content\": system_message}] + history\n",
    "    response = openai.chat.completions.create(model=MODEL, messages=messages, tools=tools)\n",
    "    image = None\n",
    "    \n",
    "    if response.choices[0].finish_reason==\"tool_calls\":\n",
    "        message = response.choices[0].message\n",
    "        response, city = handle_tool_call(message)\n",
    "        messages.append(message)\n",
    "        messages.append(response)\n",
    "        image = artist(city)\n",
    "        response = openai.chat.completions.create(model=MODEL, messages=messages)\n",
    "        \n",
    "    reply = response.choices[0].message.content\n",
    "    history += [{\"role\":\"assistant\", \"content\":reply}]\n",
    "\n",
    "    # Comment out or delete the next line if you'd rather skip Audio for now..\n",
    "    # audio_response = talker(reply)\n",
    "    talker(reply)\n",
    "    return history, image# Chatbot logic here — replace with real logic"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "512fec09-c2f7-4847-817b-bc20f8b30319",
   "metadata": {},
   "outputs": [],
   "source": [
    "# More involved Gradio code as we're not using the preset Chat interface!\n",
    "# Passing in inbrowser=True in the last line will cause a Gradio window to pop up immediately.\n",
    "\n",
    "with gr.Blocks() as ui:\n",
    "    with gr.Row():\n",
    "        chatbot = gr.Chatbot(height=500, type=\"messages\")\n",
    "        image_output = gr.Image(height=500)\n",
    "\n",
    "    with gr.Row():\n",
    "        text_input = gr.Textbox(label=\"Chat with our AI Assistant:\")\n",
    "        audio_input = gr.Audio(sources=\"microphone\", type=\"filepath\", label=\"Or speak to the assistant\")\n",
    "\n",
    "    with gr.Row():\n",
    "        # voice_output = gr.Audio(label=\"Bot Voice Reply\", autoplay=True)\n",
    "        clear = gr.Button(\"Clear\")\n",
    "\n",
    "    def do_entry(message, audio, history):\n",
    "        if message:\n",
    "            history += [{\"role\":\"user\", \"content\":message}]\n",
    "        if audio:\n",
    "            history += [{\"role\":\"user\", \"content\":transcribe_audio(audio)}]\n",
    "        return \"\", None, history\n",
    "\n",
    "    text_input.submit(do_entry, inputs=[text_input, audio_input, chatbot], outputs=[text_input, audio_input, chatbot]).then(chatbot_dual, inputs=chatbot, outputs=[chatbot, image_output]\n",
    "    )\n",
    "\n",
    "    audio_input.change(do_entry, inputs=[text_input, audio_input, chatbot], outputs=[text_input, audio_input, chatbot]).then(chatbot_dual, inputs=chatbot, outputs=[chatbot, image_output]\n",
    "    )\n",
    "\n",
    "    clear.click(lambda: None, inputs=None, outputs=chatbot, queue=False)\n",
    "\n",
    "ui.launch(inbrowser=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3e1294e2-caf0-4f0f-b09e-b0d52c8ca6ec",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.11"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
 }