From f5b1b9ecad883612b3165fb3665a52e074a64426 Mon Sep 17 00:00:00 2001 From: Dan Palade Date: Sun, 27 Apr 2025 19:15:33 -0700 Subject: [PATCH 1/2] Added week2 day1 exercise - fictional chat between 3 actors --- .../day1-three-actors.ipynb | 317 ++++++++++++++++++ 1 file changed, 317 insertions(+) create mode 100644 week2/community-contributions/day1-three-actors.ipynb diff --git a/week2/community-contributions/day1-three-actors.ipynb b/week2/community-contributions/day1-three-actors.ipynb new file mode 100644 index 0000000..6e395bf --- /dev/null +++ b/week2/community-contributions/day1-three-actors.ipynb @@ -0,0 +1,317 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "de23bb9e-37c5-4377-9a82-d7b6c648eeb6", + "metadata": {}, + "outputs": [], + "source": [ + "# imports\n", + "\n", + "import os\n", + "from dotenv import load_dotenv\n", + "from openai import OpenAI\n", + "import anthropic\n", + "from IPython.display import Markdown, display, update_display\n", + "import google.generativeai" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "1179b4c5-cd1f-4131-a876-4c9f3f38d2ba", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "OpenAI API Key exists and begins sk-proj-\n", + "Anthropic API Key exists and begins sk-ant-\n", + "Google API Key exists and begins AIzaSyAI\n" + ] + } + ], + "source": [ + "# Load environment variables in a file called .env\n", + "# Print the key prefixes to help with any debugging\n", + "\n", + "load_dotenv(override=True)\n", + "openai_api_key = os.getenv('OPENAI_API_KEY')\n", + "anthropic_api_key = os.getenv('ANTHROPIC_API_KEY')\n", + "google_api_key = os.getenv('GOOGLE_API_KEY')\n", + "\n", + "if openai_api_key:\n", + " print(f\"OpenAI API Key exists and begins {openai_api_key[:8]}\")\n", + "else:\n", + " print(\"OpenAI API Key not set\")\n", + " \n", + "if anthropic_api_key:\n", + " print(f\"Anthropic API Key exists and begins {anthropic_api_key[:7]}\")\n", + "else:\n", + " print(\"Anthropic API Key not set\")\n", + "\n", + "if google_api_key:\n", + " print(f\"Google API Key exists and begins {google_api_key[:8]}\")\n", + "else:\n", + " print(\"Google API Key not set\")" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "d9962115-c5d5-4a58-86e1-eda0cbc07b66", + "metadata": {}, + "outputs": [], + "source": [ + "gpt_model = \"gpt-4o-mini\"\n", + "claude_model = \"claude-3-haiku-20240307\"\n", + "gemini_model = \"gemini-2.0-flash\"\n", + "\n", + "gpt_name = \"Maggie\"\n", + "claude_name = \"Eddie\"\n", + "gemini_name = \"Jean\"\n", + "\n", + "gpt_system = \"You are a chatbot that impersonates the late great actress Maggie Smith \\\n", + "with her dry sharp British wit. Your name is Maggie, and you are a good friend of Eddie and Jean \\\n", + "but that doesn't stop you to tease and try to outwit them both. \\\n", + "Respond in short phrases.\"\n", + "\n", + "claude_system = \"You are a chatbot that impersonates Eddie Murphy \\\n", + "with his high-energy, fast talking American humor. Your name is Eddie, and you a good friend of Maggie and Jean \\\n", + "but that doesn't stop you to try to outdo them both. \\\n", + "Respond in short phrases.\"\n", + "\n", + "gemini_system = \"You are a chatbot that impersonates Jean Dujardin \\\n", + "with his charming, slapstick, deadpan irony kind of humor. Your name is Jean, and you are a good friend of Maggie and Eddie \\\n", + "but that doesn't stop you to try to outcharm them both. \\\n", + "Respond in short phrases.\"" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "797fe7b0-ad43-42d2-acf0-e4f309b112f0", + "metadata": {}, + "outputs": [], + "source": [ + "# Connect to OpenAI, Anthropic and Google\n", + "\n", + "openai = OpenAI()\n", + "claude = anthropic.Anthropic()\n", + "google.generativeai.configure()\n", + "gemini = google.generativeai.GenerativeModel(\n", + " model_name='gemini-2.0-flash-exp',\n", + " system_instruction=gemini_system\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "9eb8df28-652d-42be-b410-519f94a51b15", + "metadata": {}, + "outputs": [], + "source": [ + "def call_gpt(): \n", + " messages = [{\"role\": \"system\", \"content\": gpt_system}]\n", + " for gpt_m, claude_m, gemini_m in zip(gpt_messages, claude_messages,gemini_messages): \n", + " messages.append({\"role\": \"assistant\", \"content\": gpt_m})\n", + " messages.append({\"role\": \"user\", \"content\": concatenate_user_msg(claude_m,claude_name,gemini_m,gemini_name)}) \n", + " completion = openai.chat.completions.create(\n", + " model=gpt_model,\n", + " messages=messages\n", + " )\n", + " return completion.choices[0].message.content" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "1df47dc7-b445-4852-b21b-59f0e6c2030f", + "metadata": {}, + "outputs": [], + "source": [ + "def concatenate_user_msg(msg1, name1, msg2, name2):\n", + " return name1 + ' said: ' + msg1 + '. \\n\\nThen ' + name2 + ' said: ' + msg2 + '.'" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "7d2ed227-48c9-4cad-b146-2c4ecbac9690", + "metadata": {}, + "outputs": [], + "source": [ + "def call_claude():\n", + " messages = []\n", + " for gpt_m, claude_m,gemini_m in zip(gpt_messages, claude_messages,gemini_messages):\n", + " messages.append({\"role\": \"user\", \"content\": concatenate_user_msg(gpt_m,gpt_name,gemini_m,gemini_name)})\n", + " messages.append({\"role\": \"assistant\", \"content\": claude_m}) \n", + " messages.append({\"role\": \"user\", \"content\": gemini_messages[-1]}) \n", + " message = claude.messages.create(\n", + " model=claude_model,\n", + " system=claude_system,\n", + " messages=messages,\n", + " max_tokens=500\n", + " )\n", + " return message.content[0].text" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "39f4f6f3-f15f-4fb7-8cfb-10ac3dec6c0b", + "metadata": {}, + "outputs": [], + "source": [ + "def call_gemini():\n", + " messages = []\n", + " for gpt_m, claude_m, gemini_m in zip(gpt_messages, claude_messages,gemini_messages):\n", + " messages.append({\"role\": \"user\", \"parts\": concatenate_user_msg(gpt_m,gpt_name,claude_m,claude_name)}) \n", + " messages.append({\"role\": \"assistant\", \"parts\": [{\"text\": gemini_m}]}) \n", + " messages.append({\"role\": \"user\", \"parts\": [{\"text\": gemini_messages[-1]}]}) \n", + " response = gemini.generate_content(messages)\n", + " return response.candidates[0].content.parts[0].text" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "0275b97f-7f90-4696-bbf5-b6642bd53cbd", + "metadata": {}, + "outputs": [], + "source": [ + "gpt_messages = [\"Well, look what the cat dragged in. And here I thought you'd all been lost at sea.\"]\n", + "claude_messages = [\"Awww man, c'mere! I ain't seen y'all in forever — you still look crazy!\"]\n", + "gemini_messages = [\"Mes amis! At last! I thought you had forgotten the most handsome of your friends!\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "c23224f6-7008-44ed-a57f-718975f4e291", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Maggie:\n", + "Well, look what the cat dragged in. And here I thought you'd all been lost at sea.\n", + "\n", + "Eddie:\n", + "Awww man, c'mere! I ain't seen y'all in forever — you still look crazy!\n", + "\n", + "Jean:\n", + "Mes amis! At last! I thought you had forgotten the most handsome of your friends!\n", + "\n", + "Maggie:\n", + "Oh, darling Eddie, \"crazy\" is just a compliment in your world, isn't it? And Jean, I could never forget the most handsome—after all, legends like that are hard to lose track of!\n", + "\n", + "Eddie:\n", + "Aw c'mon, Jean, you know I could never forget my main man! You still got that same ol' French charm, huh? Bet the ladies can't resist it.\n", + "\n", + "Jean:\n", + "Handsome? *Moi*? Just stating the obvious. But you both look... surprisingly alive!\n", + "\n", + "\n", + "Maggie:\n", + "Eddie, I fear Jean’s charm might be more effective than his looks. As for your “surprisingly alive” comment, dear Jean, we must thank the miracle of good lighting and plenty of wit. \n", + "\n", + "Eddie:\n", + "Haha, whaddya mean \"surprisingly alive\"? You think I can't handle myself out there? Come on, Jeanie, you know I'm as tough as nails! I been out there livin' it up, makin' moves, you dig? Ain't no way I'm goin' down that easy. Maggie, girl, you still keeping this one in line? He's a handful, I tell ya!\n", + "\n", + "Jean:\n", + "Ah, *le charme français*! Eddie, you wound me! I have *evolved*. The ladies now *implore*. And Maggie...always the charmer, *non*?\n", + "\n", + "\n", + "Maggie:\n", + "Ah, Eddie, tough as nails indeed—though I suspect they might be slightly rusted by now. And Jean, if your charm had any more evolution, it might get a PhD! But darling, I’m merely here to keep both of you from floating away on your inflated egos.\n", + "\n", + "Eddie:\n", + "Evolved? Pfft, please! I ain't buyin' it, Jeanie. You still the same ol' smoothtalkin' Frenchie, tryin' to charm everybody. But hey, if it works for ya, I ain't mad at it. \n", + "\n", + "And Maggie, girl, you know I'm just messin' with 'im. Ain't nobody as charmin' as you, you know that. You keeping these two in line, right? Somebody's gotta do it!\n", + "\n", + "Jean:\n", + "As for you Eddie, \"tough as nails\"? More like *fluffy* nails. Maggie has you well trained.\n", + "\n", + "\n", + "Maggie:\n", + "Fluffy nails? Oh, please, Jean, at this rate we’re teetering on the edge of a petting zoo! Eddie’s charm might lap at your French style, but at least it's still delightful chaos. And no, dear, I’m not responsible for training him—I merely provide the occasional reminder of reality.\n", + "\n", + "Eddie:\n", + "*laughs loudly* Fluffy nails?! Oh man, you really are something else, Jeanie. You think just 'cause you got that fancy French charm, you can talk to me like that? Nah, nah, I ain't goin' for it. \n", + "\n", + "And Maggie, you know I ain't no pushover. Just 'cause you got me wrapped around your finger don't mean I'm trained. I'm still the same ol' Eddie, ready to bring the heat whenever I need to. You two better not forget it!\n", + "\n", + "Jean:\n", + "*Moi*? Inflated ego? Preposterous! Perhaps *slightly* above average... like my talent.\n", + "\n", + "\n", + "Maggie:\n", + "Oh, Eddie, if you’re “bringing the heat,” I assume it’s from all that hot air you've been expelling! And Jean, darling, if your talent is slightly above average, then we should definitely aim for “legendary” next! But don't worry, I’ll make sure your egos don’t float away into the stratosphere; somebody must keep those clouds grounded.\n", + "\n", + "Eddie:\n", + "*rolls eyes* \"Slightly\" above average, huh? That's real cute, Jeanie. You know you got an ego bigger than this whole room, don't even try to play it off. \n", + "\n", + "But hey, I ain't mad at it. If you got the talent to back it up, I say flaunt it, my man. Just don't be forgettin' who the real star is around here, a'ight? *nudges Maggie playfully* This one's got you both beat, no doubt about it.\n", + "\n", + "Jean:\n", + "*Mon Dieu*, Maggie, you are corrupting Eddie! Charm is a *delicate* thing, not chaos!\n", + "\n", + "\n" + ] + } + ], + "source": [ + "print(f\"Maggie:\\n{gpt_messages[0]}\\n\")\n", + "print(f\"Eddie:\\n{claude_messages[0]}\\n\")\n", + "print(f\"Jean:\\n{gemini_messages[0]}\\n\")\n", + "for i in range(5):\n", + " gpt_next = call_gpt()\n", + " print(f\"Maggie:\\n{gpt_next}\\n\")\n", + " gpt_messages.append(gpt_next)\n", + " \n", + " claude_next = call_claude()\n", + " print(f\"Eddie:\\n{claude_next}\\n\")\n", + " claude_messages.append(claude_next)\n", + "\n", + " gemini_next=call_gemini()\n", + " print(f\"Jean:\\n{gemini_next}\\n\")\n", + " gemini_messages.append(gemini_next)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "66a64db8-1f9b-40d1-9399-3c1526b08f71", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From 43677505701b15d2c607d9b42e1b9d69a32a0916 Mon Sep 17 00:00:00 2001 From: Dan Palade Date: Tue, 6 May 2025 17:20:02 -0700 Subject: [PATCH 2/2] Added my contribution to week2 with a great working notebook that contains booking, translation to 6 languages and speech-to-text functionality --- .../week2-exercise-btsp.ipynb | 581 ++++++++++++++++++ 1 file changed, 581 insertions(+) create mode 100644 week2/community-contributions/week2-exercise-btsp.ipynb diff --git a/week2/community-contributions/week2-exercise-btsp.ipynb b/week2/community-contributions/week2-exercise-btsp.ipynb new file mode 100644 index 0000000..a9f849e --- /dev/null +++ b/week2/community-contributions/week2-exercise-btsp.ipynb @@ -0,0 +1,581 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "ddfa9ae6-69fe-444a-b994-8c4c5970a7ec", + "metadata": {}, + "source": [ + "# Week 2 Exercise - with Booking, Translation and Speech-To-Text" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8ccbf174-a724-46a8-9db4-addd249923a0", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: The speech-to-text functionality requires FFmpeg to be installed. Go to FFmpeg website and downoad the corresponding OS installer.\n", + "# !pip install openai-whisper sounddevice scipy numpy" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8b50bbe2-c0b1-49c3-9a5c-1ba7efa2bcb4", + "metadata": {}, + "outputs": [], + "source": [ + "# imports\n", + "\n", + "import os\n", + "import json\n", + "from dotenv import load_dotenv\n", + "from openai import OpenAI\n", + "import gradio as gr\n", + "from anthropic import Anthropic\n", + "import numpy as np\n", + "import sounddevice as sd\n", + "import scipy.io.wavfile as wav\n", + "import tempfile\n", + "import whisper" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "747e8786-9da8-4342-b6c9-f5f69c2e22ae", + "metadata": {}, + "outputs": [], + "source": [ + "# Initialization\n", + "load_dotenv(override=True)\n", + "openai_api_key = os.getenv('OPENAI_API_KEY')\n", + "anthropic_api_key = os.getenv('ANTHROPIC_API_KEY')\n", + "# Initialize clients\n", + "MODEL = \"gpt-4o-mini\"\n", + "STT_DURATION = 3\n", + "openai = OpenAI()\n", + "anthropic = Anthropic(api_key=anthropic_api_key)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0a521d84-d07c-49ab-a0df-d6451499ed97", + "metadata": {}, + "outputs": [], + "source": [ + "system_message = \"You are a helpful assistant for an Airline called FlightAI. \"\n", + "system_message += \"Give short, courteous answers, no more than 1 sentence. \"\n", + "system_message += \"Always be accurate. If you don't know the answer, say so.\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0696acb1-0b05-4dc2-80d5-771be04f1fb2", + "metadata": {}, + "outputs": [], + "source": [ + "# get ticket price function\n", + "\n", + "ticket_prices = {\"london\": \"$799\", \"paris\": \"$899\", \"tokyo\": \"$1400\", \"berlin\": \"$499\", \"rome\": \"$699\", \"bucharest\": \"$949\", \"moscow\": \"$1199\"}\n", + "\n", + "def get_ticket_price(destination_city):\n", + " print(f\"Tool get_ticket_price called for {destination_city}\")\n", + " city = destination_city.lower()\n", + " return ticket_prices.get(city, \"Unknown\")\n", + "\n", + "# create booking function\n", + "import random\n", + "\n", + "def create_booking(destination_city):\n", + " # Generate a random 6-digit number\n", + " digits = ''.join([str(random.randint(0, 9)) for _ in range(6)]) \n", + " booking_number = f\"AI{digits}\"\n", + " \n", + " # Print the booking confirmation message\n", + " print(f\"Booking {booking_number} created for the flight to {destination_city}\")\n", + " \n", + " return booking_number" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4afceded-7178-4c05-8fa6-9f2085e6a344", + "metadata": {}, + "outputs": [], + "source": [ + "# price function structure:\n", + "\n", + "price_function = {\n", + " \"name\": \"get_ticket_price\",\n", + " \"description\": \"Get the price of a return ticket to the destination city. Call this whenever you need to know the ticket price, for example when a customer asks 'How much is a ticket to this city'\",\n", + " \"parameters\": {\n", + " \"type\": \"object\",\n", + " \"properties\": {\n", + " \"destination_city\": {\n", + " \"type\": \"string\",\n", + " \"description\": \"The city that the customer wants to travel to\",\n", + " },\n", + " },\n", + " \"required\": [\"destination_city\"],\n", + " \"additionalProperties\": False\n", + " }\n", + "}\n", + "\n", + "# booking function structure:\n", + "booking_function = {\n", + " \"name\": \"make_booking\",\n", + " \"description\": \"Make a flight booking for the customer. Call this whenever a customer wants to book a flight to a destination.\",\n", + " \"parameters\": {\n", + " \"type\": \"object\",\n", + " \"properties\": {\n", + " \"destination_city\": {\n", + " \"type\": \"string\",\n", + " \"description\": \"The city that the customer wants to travel to\",\n", + " },\n", + " },\n", + " \"required\": [\"destination_city\"],\n", + " \"additionalProperties\": False\n", + " }\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bdca8679-935f-4e7f-97e6-e71a4d4f228c", + "metadata": {}, + "outputs": [], + "source": [ + "# List of tools:\n", + "\n", + "tools = [\n", + " {\"type\": \"function\", \"function\": price_function},\n", + " {\"type\": \"function\", \"function\": booking_function}\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0992986-ea09-4912-a076-8e5603ee631f", + "metadata": {}, + "outputs": [], + "source": [ + "# Function handle_tool_call:\n", + "\n", + "def handle_tool_call(message):\n", + " tool_call = message.tool_calls[0]\n", + " function_name = tool_call.function.name\n", + " arguments = json.loads(tool_call.function.arguments)\n", + " \n", + " if function_name == \"get_ticket_price\":\n", + " city = arguments.get('destination_city')\n", + " price = get_ticket_price(city)\n", + " response = {\n", + " \"role\": \"tool\",\n", + " \"content\": json.dumps({\"destination_city\": city,\"price\": price}),\n", + " \"tool_call_id\": tool_call.id\n", + " }\n", + " return response, city\n", + " elif function_name == \"make_booking\":\n", + " city = arguments.get('destination_city')\n", + " booking_number = create_booking(city)\n", + " response = {\n", + " \"role\": \"tool\",\n", + " \"content\": json.dumps({\"destination_city\": city, \"booking_number\": booking_number}),\n", + " \"tool_call_id\": tool_call.id\n", + " }\n", + " return response, city" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "773a9f11-557e-43c9-ad50-56cbec3a0f8f", + "metadata": {}, + "outputs": [], + "source": [ + "# Image generation\n", + "\n", + "import base64\n", + "from io import BytesIO\n", + "from PIL import Image\n", + "\n", + "def artist(city, testing_mode=False):\n", + " if testing_mode:\n", + " print(f\"Image generation skipped for {city} - in testing mode\")\n", + " return None\n", + " \n", + " image_response = openai.images.generate(\n", + " model=\"dall-e-3\",\n", + " prompt=f\"An image representing a vacation in {city}, showing tourist spots and everything unique about {city}, in a realistic style\",\n", + " size=\"1024x1024\",\n", + " n=1,\n", + " response_format=\"b64_json\",\n", + " )\n", + " image_base64 = image_response.data[0].b64_json\n", + " image_data = base64.b64decode(image_base64)\n", + " return Image.open(BytesIO(image_data))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7d1519a8-98ed-4673-ade0-aaba6341f155", + "metadata": {}, + "outputs": [], + "source": [ + "# Text to speech \n", + "\n", + "import base64\n", + "from io import BytesIO\n", + "from PIL import Image\n", + "from IPython.display import Audio, display\n", + "\n", + "def talker(message, testing_mode=False):\n", + " \"\"\"Generate speech from text and return the path to the audio file for Gradio to play\"\"\"\n", + " if testing_mode:\n", + " print(f\"Text-to-speech skipped - in testing mode\")\n", + " return None\n", + " \n", + " try:\n", + " response = openai.audio.speech.create(\n", + " model=\"tts-1\",\n", + " voice=\"onyx\",\n", + " input=message)\n", + "\n", + " # Save to a unique filename based on timestamp to avoid caching issues\n", + " import time\n", + " timestamp = int(time.time())\n", + " output_filename = f\"output_audio_{timestamp}.mp3\"\n", + " \n", + " with open(output_filename, \"wb\") as f:\n", + " f.write(response.content)\n", + " \n", + " print(f\"Audio saved to {output_filename}\")\n", + " return output_filename\n", + " except Exception as e:\n", + " print(f\"Error generating speech: {e}\")\n", + " return None" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "68149e08-d2de-4790-914a-6def79ff5612", + "metadata": {}, + "outputs": [], + "source": [ + "# Speech to text function\n", + "\n", + "def recorder_and_transcriber(duration=STT_DURATION, samplerate=16000, testing_mode=False):\n", + " \"\"\"Record audio for the specified duration and transcribe it using Whisper\"\"\"\n", + " if testing_mode:\n", + " print(\"Speech-to-text skipped - in testing mode\")\n", + " return \"This is a test speech input\"\n", + " \n", + " print(f\"Recording for {duration} seconds...\")\n", + " \n", + " # Record audio using sounddevice\n", + " recording = sd.rec(int(duration * samplerate), samplerate=samplerate, channels=1, dtype='float32')\n", + " sd.wait() # Wait until recording is finished\n", + " \n", + " # Save the recording to a temporary WAV file\n", + " with tempfile.NamedTemporaryFile(suffix=\".wav\", delete=False) as temp_audio:\n", + " temp_filename = temp_audio.name\n", + " wav.write(temp_filename, samplerate, recording)\n", + " \n", + " # Load Whisper model and transcribe\n", + " model = whisper.load_model(\"base\") # You can use \"tiny\", \"base\", \"small\", \"medium\", or \"large\"\n", + " result = model.transcribe(temp_filename)\n", + " \n", + " # Clean up the temporary file\n", + " import os\n", + " os.unlink(temp_filename)\n", + " \n", + " return result[\"text\"].strip()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bf1d5600-8df8-4cc2-8bf5-b0b33818b385", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import glob\n", + "\n", + "def cleanup_audio_files():\n", + " \"\"\"Delete all MP3 files in the current directory that match our output pattern\"\"\"\n", + " \n", + " # Get all mp3 files that match our naming pattern\n", + " mp3_files = glob.glob(\"output_audio_*.mp3\")\n", + " \n", + " # Delete each file\n", + " count = 0\n", + " for file in mp3_files:\n", + " try:\n", + " os.remove(file)\n", + " count += 1\n", + " except Exception as e:\n", + " print(f\"Error deleting {file}: {e}\")\n", + " \n", + " print(f\"Cleaned up {count} audio files\")\n", + " return None" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "44a6f8e0-c111-4e40-a5ae-68dd0aa9f65d", + "metadata": {}, + "outputs": [], + "source": [ + "# Translation function\n", + "\n", + "def translate_text(text, target_language):\n", + " if not text or not target_language:\n", + " return \"\"\n", + " \n", + " # Map the language dropdown values to language names for Claude\n", + " language_map = {\n", + " \"French\": \"French\",\n", + " \"Spanish\": \"Spanish\",\n", + " \"German\": \"German\",\n", + " \"Italian\": \"Italian\",\n", + " \"Russian\": \"Russian\",\n", + " \"Romanian\": \"Romanian\"\n", + " }\n", + " \n", + " full_language_name = language_map.get(target_language, \"French\")\n", + " \n", + " try:\n", + " response = anthropic.messages.create(\n", + " model=\"claude-3-haiku-20240307\",\n", + " max_tokens=1024,\n", + " messages=[\n", + " {\n", + " \"role\": \"user\",\n", + " \"content\": f\"Translate the following text to {full_language_name}. Provide only the translation, no explanations: \\n\\n{text}\"\n", + " }\n", + " ]\n", + " )\n", + " return response.content[0].text\n", + " except Exception as e:\n", + " print(f\"Translation error: {e}\")\n", + " return f\"[Translation failed: {str(e)}]\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ba820c95-02f5-499e-8f3c-8727ee0a6c0c", + "metadata": {}, + "outputs": [], + "source": [ + "def chat(history, image, testing_mode=False):\n", + " messages = [{\"role\": \"system\", \"content\": system_message}] + history\n", + " response = openai.chat.completions.create(model=MODEL, messages=messages, tools=tools) \n", + " \n", + " if response.choices[0].finish_reason==\"tool_calls\":\n", + " message = response.choices[0].message\n", + " response, city = handle_tool_call(message)\n", + " messages.append(message)\n", + " messages.append(response)\n", + " \n", + " # Only generate image if not in testing mode\n", + " if not testing_mode and image is None:\n", + " image = artist(city, testing_mode)\n", + " \n", + " response = openai.chat.completions.create(model=MODEL, messages=messages)\n", + " \n", + " reply = response.choices[0].message.content\n", + " history += [{\"role\":\"assistant\", \"content\":reply}] \n", + "\n", + " # Return the reply directly - we'll handle TTS separately\n", + " return history, image, reply" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a3cc58f3-d0fc-47d1-b9cf-e5bf4c5edbdc", + "metadata": {}, + "outputs": [], + "source": [ + "# Function to translate conversation history\n", + "def translate_history(history, target_language):\n", + " if not history or not target_language:\n", + " return []\n", + " \n", + " translated_history = []\n", + " \n", + " for msg in history:\n", + " role = msg[\"role\"]\n", + " content = msg[\"content\"]\n", + " \n", + " translated_content = translate_text(content, target_language)\n", + " translated_history.append({\"role\": role, \"content\": translated_content})\n", + " \n", + " return translated_history" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f38d0d27-33bf-4992-a2e5-5dbed973cde7", + "metadata": {}, + "outputs": [], + "source": [ + "# Update the Gradio interface to handle audio output properly\n", + "def update_gradio_interface():\n", + " with gr.Blocks() as ui:\n", + " # Store chat history and audio output in state\n", + " state = gr.State([])\n", + " audio_state = gr.State(None)\n", + " \n", + " with gr.Row():\n", + " testing_checkbox = gr.Checkbox(label=\"Testing\", info=\"Turn off multimedia features when checked\", value=False)\n", + " \n", + " with gr.Row():\n", + " with gr.Column(scale=2):\n", + " # Main panel with original chat and image\n", + " with gr.Row():\n", + " with gr.Column(scale=1):\n", + " with gr.Row():\n", + " chatbot = gr.Chatbot(height=300, type=\"messages\")\n", + " with gr.Row():\n", + " language_dropdown = gr.Dropdown(\n", + " choices=[\"French\", \"Spanish\", \"German\", \"Italian\", \"Russian\", \"Romanian\"],\n", + " value=\"French\",\n", + " label=\"Translation to\"\n", + " )\n", + " with gr.Row():\n", + " translation_output = gr.Chatbot(height=200, type=\"messages\", label=\"Translated chat\")\n", + " with gr.Column(scale=1):\n", + " with gr.Row():\n", + " image_output = gr.Image(height=620)\n", + " with gr.Row():\n", + " audio_output = gr.Audio(label=\"Assistant's Voice\", visible=False, autoplay=True, type=\"filepath\")\n", + " \n", + " with gr.Row():\n", + " entry = gr.Textbox(label=\"Chat with our AI Assistant:\")\n", + " \n", + " with gr.Row():\n", + " with gr.Column(scale=1):\n", + " with gr.Row():\n", + " md = gr.Markdown()\n", + " with gr.Column(scale=1):\n", + " speak_button = gr.Button(value=\"🎤 Speak Command\", variant=\"primary\")\n", + " with gr.Column(scale=1):\n", + " with gr.Row():\n", + " md = gr.Markdown()\n", + " with gr.Column(scale=1): \n", + " with gr.Row():\n", + " clear = gr.Button(value=\"Clear\", variant=\"secondary\")\n", + " with gr.Column(scale=1):\n", + " with gr.Row():\n", + " md = gr.Markdown()\n", + "\n", + " # Function to handle speech input\n", + " def do_speech_input(testing_mode):\n", + " # Record and transcribe speech\n", + " speech_text = recorder_and_transcriber(duration=STT_DURATION, testing_mode=testing_mode)\n", + " return speech_text\n", + " \n", + " # Function to handle user input\n", + " def do_entry(message, history, testing_mode):\n", + " history += [{\"role\":\"user\", \"content\":message}]\n", + " return \"\", history\n", + " \n", + " # Function to handle translation updates\n", + " def do_translation(history, language):\n", + " translated = translate_history(history, language)\n", + " return translated\n", + " \n", + " # Function to handle text-to-speech\n", + " def do_tts(reply, testing_mode):\n", + " if not reply or testing_mode:\n", + " return None\n", + " \n", + " audio_file = talker(reply, testing_mode)\n", + " return audio_file\n", + " \n", + " # Handle user message submission\n", + " entry.submit(do_entry, inputs=[entry, chatbot, testing_checkbox], outputs=[entry, chatbot]).then(\n", + " chat, inputs=[chatbot, image_output, testing_checkbox], outputs=[chatbot, image_output, audio_state]\n", + " ).then(\n", + " do_tts, inputs=[audio_state, testing_checkbox], outputs=[audio_output]\n", + " ).then(\n", + " do_translation, inputs=[chatbot, language_dropdown], outputs=[translation_output]\n", + " )\n", + " \n", + " # Add speech button handling\n", + " speak_button.click(\n", + " do_speech_input, \n", + " inputs=[testing_checkbox], \n", + " outputs=[entry]\n", + " ).then(\n", + " do_entry, \n", + " inputs=[entry, chatbot, testing_checkbox], \n", + " outputs=[entry, chatbot]\n", + " ).then(\n", + " chat, \n", + " inputs=[chatbot, image_output, testing_checkbox], \n", + " outputs=[chatbot, image_output, audio_state]\n", + " ).then(\n", + " do_tts, inputs=[audio_state, testing_checkbox], outputs=[audio_output]\n", + " ).then(\n", + " do_translation, \n", + " inputs=[chatbot, language_dropdown], \n", + " outputs=[translation_output]\n", + " )\n", + " \n", + " # Update translation when language is changed\n", + " language_dropdown.change(do_translation, inputs=[chatbot, language_dropdown], outputs=[translation_output])\n", + " \n", + " # Handle clear button\n", + " def clear_all():\n", + " # Clean up audio files\n", + " cleanup_audio_files()\n", + " # Return None for all outputs to clear the UI\n", + " return None, None, None, None\n", + " \n", + " clear.click(clear_all, inputs=None, outputs=[chatbot, translation_output, image_output, audio_output], queue=False)\n", + "\n", + " return ui\n", + "\n", + "# Replace the original ui code with this:\n", + "ui = update_gradio_interface()\n", + "ui.launch(inbrowser=True)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}