821 lines
29 KiB
Plaintext
821 lines
29 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "ddfa9ae6-69fe-444a-b994-8c4c5970a7ec",
|
|
"metadata": {},
|
|
"source": [
|
|
"# Project - Airline AI Assistant\n",
|
|
"\n",
|
|
"We'll now bring together what we've learned to make an AI Customer Support assistant for an Airline"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "8b50bbe2-c0b1-49c3-9a5c-1ba7efa2bcb4",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# imports\n",
|
|
"\n",
|
|
"import os, json, gradio as gr, anthropic, google.generativeai\n",
|
|
"from dotenv import load_dotenv\n",
|
|
"from openai import OpenAI"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "747e8786-9da8-4342-b6c9-f5f69c2e22ae",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Initialization\n",
|
|
"\n",
|
|
"load_dotenv(override=True)\n",
|
|
"\n",
|
|
"openai_api_key = os.getenv('OPENAI_API_KEY')\n",
|
|
"if openai_api_key:\n",
|
|
" print(f\"OpenAI API Key exists and begins {openai_api_key[:8]}\")\n",
|
|
"else:\n",
|
|
" print(\"OpenAI API Key not set\")\n",
|
|
" \n",
|
|
"MODEL = \"gpt-4o-mini\"\n",
|
|
"openai = OpenAI()\n",
|
|
"\n",
|
|
"# Other LLMs\n",
|
|
"DALL_E_MODEL = \"dall-e-3\"\n",
|
|
"\n",
|
|
"CLAUDE_MODEL = \"claude-sonnet-4-20250514\"\n",
|
|
"claude = anthropic.Anthropic()\n",
|
|
"\n",
|
|
"google_api_key = os.getenv('GOOGLE_API_KEY')\n",
|
|
"if google_api_key:\n",
|
|
" print(f\"Google API Key exists and begins {google_api_key[:8]}\")\n",
|
|
"else:\n",
|
|
" print(\"Google API Key not set\")\n",
|
|
" \n",
|
|
"GEMINI_MODEL= \"gemini-2.5-flash\"\n",
|
|
"gemini = google.generativeai.configure()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "0a521d84-d07c-49ab-a0df-d6451499ed97",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"system_message = \"You are a helpful assistant for an Airline called FlightAI. \"\n",
|
|
"system_message += \"Give short, courteous answers, no more than 1 sentence. \"\n",
|
|
"system_message += \"Always be accurate. If you don't know the answer, say so.\""
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "61a2a15d-b559-4844-b377-6bd5cb4949f6",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Just take in history\n",
|
|
"def chat(history):\n",
|
|
" message = history[-1][\"content\"] # Get the last message from the user\n",
|
|
" messages = [{\"role\": \"system\", \"content\": system_message}] + history + [{\"role\": \"user\", \"content\": message}]\n",
|
|
" response = openai.chat.completions.create(model=MODEL, messages=messages, tools=tools)\n",
|
|
"\n",
|
|
" if response.choices[0].finish_reason==\"tool_calls\":\n",
|
|
" message = response.choices[0].message\n",
|
|
" response_tool, city = handle_tool_call(message)\n",
|
|
" messages.append(message)\n",
|
|
" messages.append(response_tool)\n",
|
|
" image = artist(city)\n",
|
|
" print(\"Avail image for: \", city)\n",
|
|
" response = openai.chat.completions.create(model=MODEL, messages=messages)\n",
|
|
"\n",
|
|
"\n",
|
|
" # After getting the final response from OpenAI\n",
|
|
" final_response_content = response.choices[0].message.content\n",
|
|
" history.append({\"role\": \"assistant\", \"content\": final_response_content})\n",
|
|
"\n",
|
|
" # The return value should be a tuple of (history, image)\n",
|
|
" return history, image\n",
|
|
"\n",
|
|
"# gr.ChatInterface(fn=chat, type=\"messages\").launch()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "36bedabf-a0a7-4985-ad8e-07ed6a55a3a4",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Tools\n",
|
|
"\n",
|
|
"Tools are an incredibly powerful feature provided by the frontier LLMs.\n",
|
|
"\n",
|
|
"With tools, you can write a function, and have the LLM call that function as part of its response.\n",
|
|
"\n",
|
|
"Sounds almost spooky.. we're giving it the power to run code on our machine?\n",
|
|
"\n",
|
|
"Well, kinda."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "0696acb1-0b05-4dc2-80d5-771be04f1fb2",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Let's start by making a useful function\n",
|
|
"\n",
|
|
"ticket_prices = {\"london\": \"$799\", \"paris\": \"$899\", \"tokyo\": \"$1400\", \"berlin\": \"$499\"}\n",
|
|
"\n",
|
|
"def get_ticket_price(destination_city):\n",
|
|
" print(f\"Tool get_ticket_price called for {destination_city}\")\n",
|
|
" city = destination_city.lower()\n",
|
|
" return ticket_prices.get(city, \"Unknown\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "80ca4e09-6287-4d3f-997d-fa6afbcf6c85",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# get_ticket_price(\"London\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "4afceded-7178-4c05-8fa6-9f2085e6a344",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# There's a particular dictionary structure that's required to describe our function:\n",
|
|
"\n",
|
|
"price_function = {\n",
|
|
" \"name\": \"get_ticket_price\",\n",
|
|
" \"description\": \"Get the price of a return ticket to the destination city. Call this whenever you need to know the ticket price, for example when a customer asks 'How much is a ticket to this city'\",\n",
|
|
" \"parameters\": {\n",
|
|
" \"type\": \"object\",\n",
|
|
" \"properties\": {\n",
|
|
" \"destination_city\": {\n",
|
|
" \"type\": \"string\",\n",
|
|
" \"description\": \"The city that the customer wants to travel to\",\n",
|
|
" },\n",
|
|
" },\n",
|
|
" \"required\": [\"destination_city\"],\n",
|
|
" \"additionalProperties\": False\n",
|
|
" }\n",
|
|
"}"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "bdca8679-935f-4e7f-97e6-e71a4d4f228c",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# And this is included in a list of tools:\n",
|
|
"\n",
|
|
"tools = [{\"type\": \"function\", \"function\": price_function}]\n",
|
|
"# print(tools)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "83070cc0-b213-4309-8040-b0cc8390b64b",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Simulate the booking process by simply returning a confirmation string.\n",
|
|
"\n",
|
|
"def book_flight(destination_city, number_of_passengers, booking_date):\n",
|
|
" \"\"\"\n",
|
|
" Simulates booking a flight.\n",
|
|
" \"\"\"\n",
|
|
" print(f\"Tool book_flight called for {destination_city} for {number_of_passengers} passengers on {booking_date}\")\n",
|
|
" return f\"Your booking to {destination_city} for {number_of_passengers} passengers on {booking_date} has been confirmed. Your booking reference is BKG-{hash(destination_city + str(number_of_passengers) + str(booking_date))}\""
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "781786f0-7106-4b10-89d7-453a0d10d204",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Tool definition for book_flight\n",
|
|
"\n",
|
|
"booking_function = {\n",
|
|
" \"name\": \"book_flight\",\n",
|
|
" \"description\": \"Books a flight for a customer. Call this whenever a customer asks to book a flight.\",\n",
|
|
" \"parameters\": {\n",
|
|
" \"type\": \"object\",\n",
|
|
" \"properties\": {\n",
|
|
" \"destination_city\": {\n",
|
|
" \"type\": \"string\",\n",
|
|
" \"description\": \"The city the customer wants to fly to.\"\n",
|
|
" },\n",
|
|
" \"number_of_passengers\": {\n",
|
|
" \"type\": \"integer\",\n",
|
|
" \"description\": \"The number of passengers for the booking.\"\n",
|
|
" },\n",
|
|
" \"booking_date\": {\n",
|
|
" \"type\": \"string\",\n",
|
|
" \"description\": \"The date of the flight booking in YYYY-MM-DD format.\"\n",
|
|
" }\n",
|
|
" },\n",
|
|
" \"required\": [\"destination_city\", \"number_of_passengers\", \"booking_date\"],\n",
|
|
" \"additionalProperties\": False\n",
|
|
" }\n",
|
|
"}"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "e3fc237c-9721-4fee-a56b-2ff12fc98e27",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Add the new booking_function to the existing tools list.\n",
|
|
"\n",
|
|
"tools.append({\"type\": \"function\", \"function\": booking_function})"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "a482eb03-188a-4526-8acf-3a1fe96aaaf0",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# To translate to a given language\n",
|
|
"\n",
|
|
"def translate_text(text, target_language):\n",
|
|
" \"\"\"\n",
|
|
" Translates text to a specified language.\n",
|
|
" \n",
|
|
" Args:\n",
|
|
" text (str): The text to translate.\n",
|
|
" target_language (str): The language to translate the text into.\n",
|
|
" \n",
|
|
" Returns:\n",
|
|
" str: The translated text or an error message.\n",
|
|
" \"\"\"\n",
|
|
" print(f\"Tool translate_text called to translate to {target_language}\")\n",
|
|
" \n",
|
|
" # Use a system prompt to instruct the model to perform a translation\n",
|
|
" system_prompt_for_language = f\"You are a helpful translation assistant. Translate the following text into {target_language}. Only provide the translated text without any additional conversational text.\"\n",
|
|
" \n",
|
|
" try:\n",
|
|
" # # Using OpenAI\n",
|
|
" # response = openai.chat.completions.create(\n",
|
|
" # model=MODEL,\n",
|
|
" # messages=[\n",
|
|
" # {\"role\": \"system\", \"content\": system_prompt_for_language},\n",
|
|
" # {\"role\": \"user\", \"content\": text}\n",
|
|
" # ],\n",
|
|
" # )\n",
|
|
" # result = response.choices[0].message.content\n",
|
|
" # return result\n",
|
|
"\n",
|
|
" \n",
|
|
" # # Using Gemini\n",
|
|
" # gemini = google.generativeai.GenerativeModel(\n",
|
|
" # model_name=GEMINI_MODEL,\n",
|
|
" # system_instruction=system_prompt_for_language\n",
|
|
" # )\n",
|
|
" # response = gemini.generate_content(text)\n",
|
|
" # result = response.text\n",
|
|
" # return result\n",
|
|
"\n",
|
|
" \n",
|
|
" # Using Claude\n",
|
|
" response = claude.messages.create(\n",
|
|
" model=CLAUDE_MODEL,\n",
|
|
" max_tokens=200,\n",
|
|
" temperature=0.7,\n",
|
|
" system=system_prompt_for_language,\n",
|
|
" messages=[\n",
|
|
" {\"role\": \"user\", \"content\": text},\n",
|
|
" ],\n",
|
|
" )\n",
|
|
" result = response.content[0].text\n",
|
|
" return result\n",
|
|
" \n",
|
|
" except Exception as e:\n",
|
|
" print(f\"Error during translation: {e}\")\n",
|
|
" return \"Sorry, I encountered an error and could not complete the translation.\""
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "756e9859-94bc-4cef-bbc7-070d8ef6164b",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Tool definition for translate_text\n",
|
|
"\n",
|
|
"translation_function = {\n",
|
|
" \"name\": \"translate_text\",\n",
|
|
" \"description\": \"Translates a given text to a specified target language. Call this whenever a customer asks for a translation.\",\n",
|
|
" \"parameters\": {\n",
|
|
" \"type\": \"object\",\n",
|
|
" \"properties\": {\n",
|
|
" \"text\": {\n",
|
|
" \"type\": \"string\",\n",
|
|
" \"description\": \"The text to be translated.\"\n",
|
|
" },\n",
|
|
" \"target_language\": {\n",
|
|
" \"type\": \"string\",\n",
|
|
" \"description\": \"The language to translate the text into (e.g., 'French', 'Spanish', 'Swahili').\"\n",
|
|
" }\n",
|
|
" },\n",
|
|
" \"required\": [\"text\", \"target_language\"],\n",
|
|
" \"additionalProperties\": False\n",
|
|
" }\n",
|
|
"}"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "5444455e-6e5c-4ef6-bd39-5ff01731dd4b",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Integrate the tool\n",
|
|
"\n",
|
|
"tools.append({\"type\": \"function\", \"function\": translation_function})"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "c3d3554f-b4e3-4ce7-af6f-68faa6dd2340",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Getting OpenAI to use our Tool\n",
|
|
"\n",
|
|
"There's some fiddly stuff to allow OpenAI \"to call our tool\"\n",
|
|
"\n",
|
|
"What we actually do is give the LLM the opportunity to inform us that it wants us to run the tool.\n",
|
|
"\n",
|
|
"Here's how the new chat function looks:"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "ce9b0744-9c78-408d-b9df-9f6fd9ed78cf",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def chat(message, history):\n",
|
|
" messages = [{\"role\": \"system\", \"content\": system_message}] + history + [{\"role\": \"user\", \"content\": message}]\n",
|
|
" response = openai.chat.completions.create(model=MODEL, messages=messages, tools=tools)\n",
|
|
"\n",
|
|
" if response.choices[0].finish_reason==\"tool_calls\":\n",
|
|
" message = response.choices[0].message\n",
|
|
" response, city = handle_tool_call(message)\n",
|
|
" messages.append(message)\n",
|
|
" messages.append(response)\n",
|
|
" response = openai.chat.completions.create(model=MODEL, messages=messages)\n",
|
|
" \n",
|
|
" return response.choices[0].message.content"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "b0992986-ea09-4912-a076-8e5603ee631f",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# We have to write that function handle_tool_call:\n",
|
|
"\n",
|
|
"# Handle multiple tools\n",
|
|
"def handle_tool_call(message):\n",
|
|
" tool_call = message.tool_calls[0]\n",
|
|
" function_name = tool_call.function.name\n",
|
|
" arguments = json.loads(tool_call.function.arguments)\n",
|
|
"\n",
|
|
" destination_city = None\n",
|
|
" translated_text = None\n",
|
|
"\n",
|
|
" if function_name == \"get_ticket_price\":\n",
|
|
" city = arguments.get('destination_city')\n",
|
|
" price = get_ticket_price(city)\n",
|
|
" response_content = json.dumps({\"destination_city\": city, \"price\": price})\n",
|
|
" destination_city = city\n",
|
|
" elif function_name == \"book_flight\":\n",
|
|
" destination_city = arguments.get('destination_city')\n",
|
|
" number_of_passengers = arguments.get('number_of_passengers')\n",
|
|
" booking_date = arguments.get('booking_date')\n",
|
|
" confirmation = book_flight(destination_city, number_of_passengers, booking_date)\n",
|
|
" response_content = json.dumps({\"confirmation_message\": confirmation})\n",
|
|
" elif function_name == \"translate_text\":\n",
|
|
" text = arguments.get('text')\n",
|
|
" target_language = arguments.get('target_language')\n",
|
|
" translated_text = translate_text(text, target_language)\n",
|
|
" response_content = json.dumps({\"translated_text\": translated_text})\n",
|
|
" else:\n",
|
|
" response_content = json.dumps({\"error\": f\"Unknown tool: {function_name}\"})\n",
|
|
"\n",
|
|
" response = {\n",
|
|
" \"role\": \"tool\",\n",
|
|
" \"content\": response_content,\n",
|
|
" \"tool_call_id\": tool_call.id\n",
|
|
" }\n",
|
|
" return response, destination_city"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "f4be8a71-b19e-4c2f-80df-f59ff2661f14",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# gr.ChatInterface(fn=chat, type=\"messages\").launch()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "473e5b39-da8f-4db1-83ae-dbaca2e9531e",
|
|
"metadata": {},
|
|
"source": [
|
|
"# Let's go multi-modal!!\n",
|
|
"\n",
|
|
"We can use DALL-E-3, the image generation model behind GPT-4o, to make us some images\n",
|
|
"\n",
|
|
"Let's put this in a function called artist.\n",
|
|
"\n",
|
|
"### Price alert: each time I generate an image it costs about 4 cents - don't go crazy with images!"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "2c27c4ba-8ed5-492f-add1-02ce9c81d34c",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Some imports for handling images\n",
|
|
"\n",
|
|
"import base64\n",
|
|
"from io import BytesIO\n",
|
|
"from PIL import Image"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "773a9f11-557e-43c9-ad50-56cbec3a0f8f",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def artist(city):\n",
|
|
" image_response = openai.images.generate(\n",
|
|
" model=DALL_E_MODEL,\n",
|
|
" prompt=f\"An image representing a vacation in {city}, showing tourist spots and everything unique about {city}, in a vibrant pop-art style\",\n",
|
|
" size=\"1024x1024\",\n",
|
|
" n=1,\n",
|
|
" response_format=\"b64_json\",\n",
|
|
" )\n",
|
|
" image_base64 = image_response.data[0].b64_json\n",
|
|
" image_data = base64.b64decode(image_base64)\n",
|
|
" return Image.open(BytesIO(image_data))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "d877c453-e7fb-482a-88aa-1a03f976b9e9",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# image = artist(\"New York City\")\n",
|
|
"# display(image)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "6dd849b5-31ae-4237-9072-46b210792bf9",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Audio (NOTE - Audio is optional for this course - feel free to skip Audio if it causes trouble!)\n",
|
|
"\n",
|
|
"And let's make a function talker that uses OpenAI's speech model to generate Audio\n",
|
|
"\n",
|
|
"### Troubleshooting Audio issues\n",
|
|
"\n",
|
|
"If you have any problems running this code below (like a FileNotFound error, or a warning of a missing package), you may need to install FFmpeg, a very popular audio utility.\n",
|
|
"\n",
|
|
"**For Mac Users**\n",
|
|
"\n",
|
|
"1. Install homebrew if you don't have it already by running this in a Terminal window and following any instructions: \n",
|
|
"`/bin/bash -c \"$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)\"`\n",
|
|
"\n",
|
|
"2. Then install FFmpeg with `brew install ffmpeg`\n",
|
|
"\n",
|
|
"3. Verify your installation with `ffmpeg -version` and if everything is good, within Jupyter Lab do Kernel -> Restart kernel to pick up the changes\n",
|
|
"\n",
|
|
"Message me or email me at ed@edwarddonner.com with any problems!"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "4cc90e80-c96e-4dd4-b9d6-386fe2b7e797",
|
|
"metadata": {},
|
|
"source": [
|
|
"## To check you now have ffmpeg and can access it here\n",
|
|
"\n",
|
|
"Excecute the next cell to see if you get a version number. (Putting an exclamation mark before something in Jupyter Lab tells it to run it as a terminal command rather than python code).\n",
|
|
"\n",
|
|
"If this doesn't work, you may need to actually save and close down your Jupyter lab, and start it again from a new Terminal window (Mac) or Anaconda prompt (PC), remembering to activate the llms environment. This ensures you pick up ffmpeg.\n",
|
|
"\n",
|
|
"And if that doesn't work, please contact me!"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "7b3be0fb-1d34-4693-ab6f-dbff190afcd7",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"!ffmpeg -version\n",
|
|
"!ffprobe -version\n",
|
|
"!ffplay -version"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "d91d3f8f-e505-4e3c-a87c-9e42ed823db6",
|
|
"metadata": {},
|
|
"source": [
|
|
"# For Mac users - and possibly many PC users too\n",
|
|
"\n",
|
|
"This version should work fine for you. It might work for Windows users too, but you might get a Permissions error writing to a temp file. If so, see the next section!\n",
|
|
"\n",
|
|
"As always, if you have problems, please contact me! (You could also comment out the audio talker() in the later code if you're less interested in audio generation)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "ffbfe93b-5e86-4e68-ba71-b301cd5230db",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from pydub import AudioSegment\n",
|
|
"from pydub.playback import play\n",
|
|
"\n",
|
|
"def talker(message):\n",
|
|
" response = openai.audio.speech.create(\n",
|
|
" model=\"tts-1\",\n",
|
|
" voice=\"onyx\", # Also, try replacing onyx with alloy\n",
|
|
" input=message\n",
|
|
" )\n",
|
|
" \n",
|
|
" audio_stream = BytesIO(response.content)\n",
|
|
" audio = AudioSegment.from_file(audio_stream, format=\"mp3\")\n",
|
|
" play(audio)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "b88d775d-d357-4292-a1ad-5dc5ed567281",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# talker(\"Well, hi there\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "e927f333-7ed5-4625-9e5a-5e0b62f8a684",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# To transcribe an audio prompt/input\n",
|
|
"\n",
|
|
"import tempfile\n",
|
|
"from pydub import AudioSegment\n",
|
|
"from pydub.playback import play\n",
|
|
"\n",
|
|
"def transcribe_audio(audio_file):\n",
|
|
" \"\"\"\n",
|
|
" Transcribes an audio file using OpenAI's Whisper model.\n",
|
|
" \"\"\"\n",
|
|
" if audio_file is None:\n",
|
|
" return \"\"\n",
|
|
" \n",
|
|
" # The Gradio Audio component returns a tuple (sample_rate, numpy_array)\n",
|
|
" # We need to save this to a file to pass to the OpenAI API\n",
|
|
" with tempfile.NamedTemporaryFile(suffix=\".wav\", delete=True) as tmpfile:\n",
|
|
" audio = AudioSegment.from_file(audio_file, format=\"wav\")\n",
|
|
" audio.export(tmpfile.name, format=\"wav\")\n",
|
|
" \n",
|
|
" with open(tmpfile.name, \"rb\") as audio_file_obj:\n",
|
|
" transcript = openai.audio.transcriptions.create(\n",
|
|
" model=\"whisper-1\", \n",
|
|
" file=audio_file_obj\n",
|
|
" )\n",
|
|
" return transcript.text"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "f38d0d27-33bf-4992-a2e5-5dbed973cde7",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# More involved Gradio code as we're not using the preset Chat interface!\n",
|
|
"# Passing in inbrowser=True in the last line will cause a Gradio window to pop up immediately.\n",
|
|
"\n",
|
|
"with gr.Blocks() as ui:\n",
|
|
" with gr.Row():\n",
|
|
" chatbot = gr.Chatbot(height=500)\n",
|
|
" image = gr.Image(height=500)\n",
|
|
" with gr.Row():\n",
|
|
" # entry = gr.Textbox(label=\"Chat with our AI Assistant:\")\n",
|
|
" entry = gr.Textbox(label=\"Chat with our AI Assistant:\", scale=4)\n",
|
|
" submit_btn = gr.Button(\"Submit\", scale=1)\n",
|
|
" with gr.Row():\n",
|
|
" # Provide a microphone input\n",
|
|
" audio_input = gr.Audio(sources=[\"microphone\"], type=\"filepath\", label=\"Speak to our AI Assistant\", scale=4)\n",
|
|
" submit_audio_btn = gr.Button(\"Submit Audio\", scale=1)\n",
|
|
"\n",
|
|
"\n",
|
|
" with gr.Row():\n",
|
|
" languages = [\"English\", \"Swahili\", \"French\", \"Chinese\", \"German\"]\n",
|
|
" language_dropdown = gr.Dropdown(\n",
|
|
" label=\"Select a language for translation\",\n",
|
|
" choices=languages,\n",
|
|
" value=languages[0] # Default to English\n",
|
|
" )\n",
|
|
"\n",
|
|
" audio_options = [\"Yes\", \"No\"]\n",
|
|
" audio_dropdown = gr.Dropdown(\n",
|
|
" label=\"Select whether to respond with audio\",\n",
|
|
" choices=audio_options,\n",
|
|
" value=audio_options[1] # Default to No\n",
|
|
" )\n",
|
|
" \n",
|
|
" with gr.Row():\n",
|
|
" clear = gr.Button(\"Clear\")\n",
|
|
"\n",
|
|
" def user_message_updater(user_message, history):\n",
|
|
" return \"\", history + [[user_message, None]]\n",
|
|
"\n",
|
|
" def chat_with_assistant(history, target_language, use_audio_output):\n",
|
|
" message = history[-1][0] # Get the user's message from the last list in history\n",
|
|
" \n",
|
|
" messages = [{\"role\": \"system\", \"content\": system_message}]\n",
|
|
" for msg_user, msg_assistant in history:\n",
|
|
" messages.append({\"role\": \"user\", \"content\": msg_user})\n",
|
|
" if msg_assistant:\n",
|
|
" messages.append({\"role\": \"assistant\", \"content\": msg_assistant})\n",
|
|
" \n",
|
|
" response = openai.chat.completions.create(model=MODEL, messages=messages, tools=tools)\n",
|
|
"\n",
|
|
" image = None\n",
|
|
" \n",
|
|
" if response.choices[0].finish_reason==\"tool_calls\":\n",
|
|
" message = response.choices[0].message\n",
|
|
" response_tool, city = handle_tool_call(message)\n",
|
|
"\n",
|
|
" # Check if a city was returned from the tool call to generate an image\n",
|
|
" if city:\n",
|
|
" image = artist(city) # Generate an image to represent the target City\n",
|
|
"\n",
|
|
" messages.append(message.model_dump()) # Append message as a dictionary using .model_dump()\n",
|
|
" messages.append(response_tool)\n",
|
|
" \n",
|
|
" response = openai.chat.completions.create(model=MODEL, messages=messages)\n",
|
|
" \n",
|
|
" \n",
|
|
" final_response_content = response.choices[0].message.content\n",
|
|
" history[-1][1] = final_response_content # Update the last message with the assistant's reply\n",
|
|
"\n",
|
|
" if target_language != \"English\": # Assuming \"English\" is the default and no translation is needed\n",
|
|
" translated_response = translate_text(final_response_content, target_language)\n",
|
|
" final_response_content = translated_response\n",
|
|
"\n",
|
|
" history[-1][1] = final_response_content\n",
|
|
"\n",
|
|
" if use_audio_output != \"No\":\n",
|
|
" talker(final_response_content)\n",
|
|
"\n",
|
|
" return history, image # Return a tuple of (the updated history, an image)\n",
|
|
"\n",
|
|
" # This function ties together the transcription and the chat logic\n",
|
|
" def transcribe_and_chat(audio_file, history, target_language, use_audio_output):\n",
|
|
" if audio_file:\n",
|
|
" # Transcribe the audio file to text\n",
|
|
" transcribed_text = transcribe_audio(audio_file)\n",
|
|
" \n",
|
|
" # Update history with the transcribed text\n",
|
|
" new_history = history + [[transcribed_text, None]]\n",
|
|
" \n",
|
|
" # Call the main chat function with the new history\n",
|
|
" return chat_with_assistant(new_history, target_language, use_audio_output)\n",
|
|
" else:\n",
|
|
" return history, None\n",
|
|
"\n",
|
|
" # The event listeners are updated to be triggered by both the textbox and the new button\n",
|
|
" entry.submit(\n",
|
|
" user_message_updater,\n",
|
|
" inputs=[entry, chatbot],\n",
|
|
" outputs=[entry, chatbot],\n",
|
|
" queue=False\n",
|
|
" ).then(\n",
|
|
" chat_with_assistant, \n",
|
|
" inputs=[chatbot, language_dropdown, audio_dropdown],\n",
|
|
" outputs=[chatbot, image]\n",
|
|
" )\n",
|
|
"\n",
|
|
" submit_btn.click(\n",
|
|
" user_message_updater,\n",
|
|
" inputs=[entry, chatbot],\n",
|
|
" outputs=[entry, chatbot],\n",
|
|
" queue=False\n",
|
|
" ).then(\n",
|
|
" chat_with_assistant,\n",
|
|
" inputs=[chatbot, language_dropdown, audio_dropdown],\n",
|
|
" outputs=[chatbot, image]\n",
|
|
" )\n",
|
|
"\n",
|
|
" # Event listener to trigger on audio stop\n",
|
|
" audio_input.stop(\n",
|
|
" transcribe_and_chat,\n",
|
|
" inputs=[audio_input, chatbot, language_dropdown, audio_dropdown],\n",
|
|
" outputs=[chatbot, image],\n",
|
|
" queue=False\n",
|
|
" )\n",
|
|
"\n",
|
|
" submit_audio_btn.click(\n",
|
|
" transcribe_and_chat,\n",
|
|
" inputs=[audio_input, chatbot, language_dropdown, audio_dropdown],\n",
|
|
" outputs=[chatbot, image],\n",
|
|
" queue=False\n",
|
|
" )\n",
|
|
" \n",
|
|
" clear.click(lambda: None, inputs=None, outputs=[chatbot, image], queue=False)\n",
|
|
"\n",
|
|
"ui.launch(inbrowser=True)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "39144b88-fc11-4156-84f9-d9157ddaec47",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "3469b07d-2b9a-4409-bb1c-fbdab3248974",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3 (ipykernel)",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.11.7"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 5
|
|
}
|