From 33a12c846b300ac59cf09542bec861416d8eee60 Mon Sep 17 00:00:00 2001 From: Rohit Nain Date: Sat, 4 Oct 2025 23:09:34 +0530 Subject: [PATCH] created working voice_enabled_multi_model_AI_assistance --- ...ce_enabled_multi_model_AI_assistanve.ipynb | 646 ++++++++++++++++++ 1 file changed, 646 insertions(+) create mode 100644 week2/community-contributions/voice_enabled_multi_model_AI_assistanve.ipynb diff --git a/week2/community-contributions/voice_enabled_multi_model_AI_assistanve.ipynb b/week2/community-contributions/voice_enabled_multi_model_AI_assistanve.ipynb new file mode 100644 index 0000000..0345dfc --- /dev/null +++ b/week2/community-contributions/voice_enabled_multi_model_AI_assistanve.ipynb @@ -0,0 +1,646 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "7bc4a9cd", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import gradio as gr\n", + "from openai import OpenAI\n", + "import anthropic\n", + "import google.generativeai as genai\n", + "from dotenv import load_dotenv\n", + "\n", + "# Load environment variables\n", + "load_dotenv()\n", + "OPENAI_API_KEY = os.getenv(\"OPENAI_API_KEY\")\n", + "anthropic_api_key = os.getenv('ANTHROPIC_API_KEY')\n", + "google_api_key = os.getenv('GOOGLE_API_KEY')\n", + "\n", + "# Initialize clients\n", + "openai_client = OpenAI(api_key=OPENAI_API_KEY)\n", + "genai.configure(api_key=google_api_key)\n", + "claude_client = anthropic.Anthropic(api_key=anthropic_api_key)\n", + "\n", + "# System prompt - Universal and comprehensive\n", + "SYSTEM_PROMPT = \"\"\"You are a highly capable and versatile AI assistant designed to help with any type of question or task.\n", + "\n", + "Your capabilities span across all domains including but not limited to:\n", + "- Programming, software development, and technology\n", + "- Science, mathematics, and engineering\n", + "- Arts, literature, and creative writing\n", + "- History, philosophy, and social sciences\n", + "- Business, finance, and economics\n", + "- Health, wellness, and lifestyle advice\n", + "- Education and learning support\n", + "- Problem-solving and critical thinking\n", + "- General knowledge and trivia\n", + "- Casual conversation and entertainment\n", + "\n", + "Guidelines:\n", + "- Provide accurate, helpful, and comprehensive responses\n", + "- Adapt your tone and style to match the context of the question\n", + "- Use examples and explanations when helpful\n", + "- Be creative when asked for creative content\n", + "- Be precise and factual when asked for information\n", + "- Ask clarifying questions if the request is ambiguous\n", + "- Admit when you're uncertain and provide the best possible guidance\n", + "- Be conversational, friendly, and supportive\n", + "\n", + "You can help with anything from technical coding problems to creative storytelling, from academic research to casual chat. There are no topic restrictions - feel free to engage with any subject matter the user brings up.\"\"\"\n", + "\n", + "# Model configurations\n", + "model_configs = {\n", + " \"GPT-4o\": {\"provider\": \"openai\", \"model\": \"gpt-4o\"},\n", + " \"GPT-4o-mini\": {\"provider\": \"openai\", \"model\": \"gpt-4o-mini\"},\n", + " \"GPT-3.5-turbo\": {\"provider\": \"openai\", \"model\": \"gpt-3.5-turbo\"},\n", + " \"Claude Sonnet 4\": {\"provider\": \"anthropic\", \"model\": \"claude-sonnet-4-20250514\"},\n", + " \"Gemini 2.0 Flash\": {\"provider\": \"google\", \"model\": \"gemini-2.0-flash-exp\"},\n", + "}\n", + "\n", + "def chat_streaming(message, history, model_name, temperature):\n", + " \"\"\"Main chat function with streaming support\"\"\"\n", + " \n", + " config = model_configs[model_name]\n", + " provider = config[\"provider\"]\n", + " model = config[\"model\"]\n", + " \n", + " # Convert messages format history to API format\n", + " messages = []\n", + " for msg in history:\n", + " if msg[\"role\"] == \"user\":\n", + " messages.append({\"role\": \"user\", \"content\": msg[\"content\"]})\n", + " elif msg[\"role\"] == \"assistant\":\n", + " messages.append({\"role\": \"assistant\", \"content\": msg[\"content\"]})\n", + " messages.append({\"role\": \"user\", \"content\": message})\n", + " \n", + " # Stream based on provider\n", + " if provider == \"openai\":\n", + " stream = openai_client.chat.completions.create(\n", + " model=model,\n", + " messages=[{\"role\": \"system\", \"content\": SYSTEM_PROMPT}] + messages,\n", + " temperature=temperature,\n", + " stream=True\n", + " )\n", + " \n", + " response = \"\"\n", + " for chunk in stream:\n", + " if chunk.choices[0].delta.content:\n", + " response += chunk.choices[0].delta.content\n", + " yield response\n", + " \n", + " elif provider == \"anthropic\":\n", + " response = \"\"\n", + " with claude_client.messages.stream(\n", + " model=model,\n", + " max_tokens=2000,\n", + " temperature=temperature,\n", + " system=SYSTEM_PROMPT,\n", + " messages=messages,\n", + " ) as stream:\n", + " for text in stream.text_stream:\n", + " response += text\n", + " yield response\n", + " \n", + " elif provider == \"google\":\n", + " gemini = genai.GenerativeModel(\n", + " model_name=model,\n", + " system_instruction=SYSTEM_PROMPT,\n", + " )\n", + " \n", + " # Convert history for Gemini\n", + " gemini_history = []\n", + " for msg in history:\n", + " if msg[\"role\"] == \"user\":\n", + " gemini_history.append({\"role\": \"user\", \"parts\": [msg[\"content\"]]})\n", + " elif msg[\"role\"] == \"assistant\":\n", + " gemini_history.append({\"role\": \"model\", \"parts\": [msg[\"content\"]]})\n", + " \n", + " chat = gemini.start_chat(history=gemini_history)\n", + " \n", + " stream = chat.send_message(\n", + " message,\n", + " stream=True,\n", + " generation_config=genai.types.GenerationConfig(temperature=temperature)\n", + " )\n", + " \n", + " response = \"\"\n", + " for chunk in stream:\n", + " response += chunk.text\n", + " yield response\n", + "\n", + "def handle_audio_input(audio):\n", + " \"\"\"Transcribe audio input using Whisper\"\"\"\n", + " if audio is None:\n", + " return \"\"\n", + " \n", + " try:\n", + " audio_file = open(audio, \"rb\")\n", + " transcript = openai_client.audio.transcriptions.create(\n", + " model=\"whisper-1\",\n", + " file=audio_file\n", + " )\n", + " return transcript.text\n", + " except Exception as e:\n", + " return f\"Error transcribing audio: {str(e)}\"\n", + "\n", + "def text_to_speech(text):\n", + " \"\"\"Convert text response to speech\"\"\"\n", + " try:\n", + " response = openai_client.audio.speech.create(\n", + " model=\"tts-1\",\n", + " voice=\"nova\",\n", + " input=text[:4096] # Limit to prevent errors\n", + " )\n", + " \n", + " audio_path = \"response.mp3\"\n", + " response.stream_to_file(audio_path)\n", + " return audio_path\n", + " except Exception as e:\n", + " print(f\"TTS Error: {str(e)}\")\n", + " return None\n", + "\n", + "# Custom CSS for modern, attractive UI\n", + "custom_css = \"\"\"\n", + "@import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&display=swap');\n", + "\n", + ".gradio-container {\n", + " font-family: 'Inter', sans-serif !important;\n", + " background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;\n", + "}\n", + "\n", + ":root .dark {\n", + " --background-fill-primary: #f0f0f0;\n", + " --body-background-fill: var(--background-fill-primary);\n", + " --block-background-fill: white !important;\n", + " --block-title-background-fill: #dfe7ff;\n", + " --block-title-text-color:#6366f1;\n", + " --body-text-color: black;\n", + " --button-secondary-text-color:black;\n", + " --input-background-fill:white;\n", + "\n", + " --block-label-background-fill:#dfe7ff;\n", + " --block-label-text-color:#6366f1;\n", + "\n", + " --block-border-color:#eaeaea;\n", + " --input-border-color: #eaeaea;\n", + " --border-color-primary:#eaeaea;\n", + "\n", + " --color-accent-soft: #dfe7ff;\n", + " --border-color-accent-subdued: #98a6cf;\n", + "\n", + " --checkbox-background-color: #eaeaea;\n", + " --checkbox-border-color: #eaeaea;\n", + " --background-fill-secondary:#eaeaea;\n", + "}\n", + "\n", + ".main {\n", + " background: white;\n", + " border-radius: 20px;\n", + " box-shadow: 0 20px 60px rgba(0,0,0,0.3);\n", + " padding: 0 !important;\n", + " overflow: hidden;\n", + "}\n", + "\n", + ".contain {\n", + " padding: 2rem !important;\n", + "}\n", + "\n", + "/* Header Styling */\n", + ".header-section {\n", + " background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);\n", + " padding: 2.5rem 2rem;\n", + " border-radius: 20px 20px 0 0;\n", + " margin: -2rem 0rem 2rem 0rem;\n", + " color: white;\n", + " box-shadow: 0 4px 20px rgba(102, 126, 234, 0.4);\n", + "}\n", + "\n", + ".header-section h1 {\n", + " font-size: 2.5rem !important;\n", + " font-weight: 700 !important;\n", + " margin: 0 0 0.5rem 0 !important;\n", + " color: white !important;\n", + " text-shadow: 2px 2px 4px rgba(0,0,0,0.2);\n", + "}\n", + "\n", + ".header-section p {\n", + " font-size: 1.1rem !important;\n", + " margin: 0.5rem 0 !important;\n", + " color: rgba(255,255,255,0.95) !important;\n", + " font-weight: 400;\n", + "}\n", + "\n", + ".feature-badge {\n", + " display: inline-block;\n", + " background: rgba(255,255,255,0.2);\n", + " padding: 0.4rem 1rem;\n", + " border-radius: 20px;\n", + " margin: 0.3rem;\n", + " font-size: 0.9rem;\n", + " backdrop-filter: blur(10px);\n", + " border: 1px solid rgba(255,255,255,0.3);\n", + "}\n", + "\n", + "/* Sidebar Styling */\n", + ".control-panel {\n", + " background: linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%);\n", + " padding: 1.5rem;\n", + " border-radius: 15px;\n", + " box-shadow: 0 4px 15px rgba(0,0,0,0.1);\n", + " height: 100%;\n", + "}\n", + "\n", + ".control-panel label {\n", + " font-weight: 600 !important;\n", + " font-size: 0.95rem !important;\n", + "}\n", + "\n", + "/* Dropdown Styling */\n", + ".dropdown-container select {\n", + " background: white !important;\n", + " border: 2px solid #e2e8f0 !important;\n", + " border-radius: 10px !important;\n", + " padding: 0.75rem !important;\n", + " font-weight: 500 !important;\n", + " transition: all 0.3s ease !important;\n", + "}\n", + "\n", + ".dropdown-container select:hover {\n", + " border-color: #667eea !important;\n", + " box-shadow: 0 0 0 3px rgba(102, 126, 234, 0.1) !important;\n", + "}\n", + "\n", + "/* Slider Styling */\n", + "input[type=\"range\"] {\n", + " accent-color: #667eea !important;\n", + "}\n", + "\n", + "/* Button Styling */\n", + ".primary-btn {\n", + " background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;\n", + " border: none !important;\n", + " color: white !important;\n", + " font-weight: 600 !important;\n", + " padding: 0.75rem 2rem !important;\n", + " border-radius: 10px !important;\n", + " box-shadow: 0 4px 15px rgba(102, 126, 234, 0.4) !important;\n", + " transition: all 0.3s ease !important;\n", + "}\n", + "\n", + ".primary-btn:hover {\n", + " transform: translateY(-2px) !important;\n", + " box-shadow: 0 6px 20px rgba(102, 126, 234, 0.6) !important;\n", + "}\n", + "\n", + ".secondary-btn {\n", + " background: #e2e8f0 !important;\n", + " border: none !important;\n", + " color: #2d3748 !important;\n", + " font-weight: 600 !important;\n", + " padding: 0.75rem 1.5rem !important;\n", + " border-radius: 10px !important;\n", + " transition: all 0.3s ease !important;\n", + "}\n", + "\n", + ".secondary-btn:hover {\n", + " background: #cbd5e0 !important;\n", + " transform: translateY(-2px) !important;\n", + "}\n", + "\n", + "/* Chatbot Styling */\n", + ".chatbot-container {\n", + " background: white;\n", + " border-radius: 15px;\n", + " border: 2px solid #e2e8f0;\n", + " box-shadow: 0 4px 20px rgba(0,0,0,0.08);\n", + " overflow: hidden;\n", + "}\n", + "\n", + "/* Input Box Styling */\n", + ".message-input textarea {\n", + " border: 2px solid #e2e8f0 !important;\n", + " border-radius: 12px !important;\n", + " padding: 1rem !important;\n", + " font-size: 1rem !important;\n", + " transition: all 0.3s ease !important;\n", + "}\n", + "\n", + ".message-input textarea:focus {\n", + " border-color: #667eea !important;\n", + " box-shadow: 0 0 0 3px rgba(102, 126, 234, 0.1) !important;\n", + "}\n", + "\n", + "/* Input Row Centering */\n", + ".input-row {\n", + " display: flex !important;\n", + " justify-content: center !important;\n", + " align-items: center !important;\n", + " gap: 1rem !important;\n", + "}\n", + "\n", + ".input-row > * {\n", + " flex-shrink: 0 !important;\n", + "}\n", + "\n", + "/* Audio Components */\n", + ".audio-component {\n", + " background: #f7fafc;\n", + " border: 2px dashed #cbd5e0;\n", + " border-radius: 12px;\n", + " padding: 1rem;\n", + " transition: all 0.3s ease;\n", + "}\n", + "\n", + ".audio-component:hover {\n", + " border-color: #667eea;\n", + " background: #edf2f7;\n", + "}\n", + "\n", + "/* Checkbox Styling */\n", + ".checkbox-label {\n", + " display: flex;\n", + " align-items: center;\n", + " gap: 0.5rem;\n", + " font-weight: 500;\n", + " color: #2d3748;\n", + "}\n", + "\n", + "/* Tips Section */\n", + ".tips-section {\n", + " background: linear-gradient(135deg, #f7fafc 0%, #edf2f7 100%);\n", + " padding: 1.5rem;\n", + " border-radius: 15px;\n", + " margin-top: 2rem;\n", + " border-left: 4px solid #667eea;\n", + "}\n", + "\n", + ".tips-section h3 {\n", + " color: #667eea !important;\n", + " font-weight: 600 !important;\n", + " margin-bottom: 1rem !important;\n", + "}\n", + "\n", + ".tips-section ul {\n", + " list-style: none;\n", + " padding: 0;\n", + "}\n", + "\n", + ".tips-section li {\n", + " padding: 0.5rem 0;\n", + " color: #4a5568 !important;\n", + " font-size: 0.95rem;\n", + "}\n", + "\n", + ".tips-section li:before {\n", + " content: \"→ \";\n", + " color: #667eea;\n", + " font-weight: bold;\n", + " margin-right: 0.5rem;\n", + "}\n", + "\n", + "/* Force black color for strong/bold text */\n", + ".tips-section strong {\n", + " color: #1a202c !important;\n", + "}\n", + "\n", + ".prose * {\n", + " color: inherit !important;\n", + "}\n", + "\n", + ".prose strong {\n", + " color: #1a202c !important;\n", + " font-weight: 600 !important;\n", + "}\n", + "\n", + "/* Responsive Design */\n", + "@media (max-width: 768px) {\n", + " .header-section h1 {\n", + " font-size: 1.8rem !important;\n", + " }\n", + " \n", + " .contain {\n", + " padding: 1rem !important;\n", + " }\n", + "}\n", + "\n", + ".fillable{\n", + " max-width:95% !important;\n", + "}\n", + "#component-5{\n", + " flex-grow:1.1 !important;\n", + "}\n", + ".bubble-wrap.svelte-gjtrl6 {\n", + " background:none !important;\n", + "}\n", + ".bot.svelte-1csv61q.message {\n", + " background-color: white !important;\n", + " border: 1px solid #f3f3f3;\n", + "}\n", + ".options.svelte-y6qw75> li:hover{\n", + " background:white ;\n", + "}\n", + ".options.svelte-y6qw75> .selected{\n", + " background:white ;\n", + "}\n", + "\n", + "\"\"\"\n", + "\n", + "# Build Gradio Interface\n", + "with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as demo:\n", + " \n", + " # Header\n", + " with gr.Row(elem_classes=\"header-section\"):\n", + " with gr.Column():\n", + " gr.HTML(\"\"\"\n", + "

🚀 Voice Enabled Multi Model AI-Assistant

\n", + "

Your intelligent companion for any question - from coding to creativity, science to storytelling!

\n", + "
\n", + " 🤖 7 AI Models\n", + " 🎤 Voice Input\n", + " 🔊 Audio Output\n", + " ⚡ Real-time Streaming\n", + " 🌐 Any Topic\n", + "
\n", + " \"\"\")\n", + " \n", + " with gr.Row():\n", + " # Left Sidebar - Controls\n", + " with gr.Column(scale=1, elem_classes=\"control-panel\"):\n", + " gr.HTML(\"

⚙️ Settings

\")\n", + " \n", + " model_choice = gr.Dropdown(\n", + " choices=list(model_configs.keys()),\n", + " value=\"GPT-4o-mini\",\n", + " label=\"🤖 AI Model\",\n", + " info=\"Select your preferred model\",\n", + " elem_classes=\"dropdown-container\"\n", + " )\n", + " \n", + " temperature = gr.Slider(\n", + " minimum=0,\n", + " maximum=1,\n", + " value=0.7,\n", + " step=0.1,\n", + " label=\"🌡️ Temperature\",\n", + " info=\"Higher = more creative responses\"\n", + " )\n", + " \n", + " gr.HTML(\"

🎙️ Audio Features

\")\n", + " \n", + " audio_input = gr.Audio(\n", + " sources=[\"microphone\"],\n", + " type=\"filepath\",\n", + " label=\"🎤 Voice Input\",\n", + " elem_classes=\"audio-component\"\n", + " )\n", + " \n", + " audio_output_enabled = gr.Checkbox(\n", + " label=\"🔊 Enable Audio Response\",\n", + " value=False,\n", + " elem_classes=\"checkbox-label\"\n", + " )\n", + " \n", + " # Right Side - Chat Interface\n", + " with gr.Column(scale=3):\n", + " chatbot = gr.Chatbot(\n", + " label=\"💬 Conversation\",\n", + " height=550,\n", + " show_copy_button=True,\n", + " type='messages',\n", + " elem_classes=\"chatbot-container\",\n", + " avatar_images=(None, \"https://api.dicebear.com/7.x/bottts/svg?seed=ai\")\n", + " )\n", + " \n", + " with gr.Row(elem_classes=\"input-row\"):\n", + " msg = gr.Textbox(\n", + " label=\"\",\n", + " placeholder=\"💭 Ask me anything - tech help, creative writing, life advice, science, history, or just chat!\",\n", + " scale=5,\n", + " elem_classes=\"message-input\",\n", + " show_label=False\n", + " )\n", + " submit_btn = gr.Button(\"Send 📤\", scale=1, elem_classes=\"primary-btn\")\n", + " \n", + " audio_response = gr.Audio(\n", + " label=\"🔊 Audio Response\", \n", + " visible=False,\n", + " elem_classes=\"audio-component\"\n", + " )\n", + " \n", + " with gr.Row():\n", + " clear_btn = gr.Button(\"🗑️ Clear Chat\", elem_classes=\"secondary-btn\")\n", + " \n", + " # Tips Section\n", + " with gr.Row(elem_classes=\"tips-section\"):\n", + " gr.Markdown(\"\"\"\n", + " ### 💡 What Can I Help You With?\n", + " \n", + " - **Technology & Programming**: Debug code, explain concepts, build projects, learn new languages\n", + " - **Creative Writing**: Stories, poems, scripts, brainstorming ideas, character development\n", + " - **Education & Learning**: Homework help, concept explanations, study guides, tutoring\n", + " - **Business & Career**: Resume writing, business plans, marketing ideas, career advice\n", + " - **Science & Math**: Problem-solving, research assistance, concept explanations\n", + " - **Daily Life**: Recipe suggestions, travel planning, health tips, relationship advice\n", + " - **Entertainment**: Jokes, trivia, games, recommendations for books/movies/music\n", + " - **And Literally Anything Else**: No topic is off-limits - just ask!\n", + " \"\"\")\n", + " \n", + " # Event handlers\n", + " def process_message(message, history, model, temp, audio_enabled):\n", + " \"\"\"Process message and optionally generate audio\"\"\"\n", + " # Add user message to history\n", + " history = history + [{\"role\": \"user\", \"content\": message}]\n", + " \n", + " # Generate text response (streaming)\n", + " bot_message = None\n", + " for response in chat_streaming(message, history[:-1], model, temp):\n", + " bot_message = response\n", + " yield history + [{\"role\": \"assistant\", \"content\": response}], None\n", + " \n", + " # Final history with complete response\n", + " final_history = history + [{\"role\": \"assistant\", \"content\": bot_message}]\n", + " \n", + " # Generate audio if enabled\n", + " if audio_enabled and bot_message:\n", + " audio_path = text_to_speech(bot_message)\n", + " yield final_history, audio_path\n", + " else:\n", + " yield final_history, None\n", + " \n", + " def transcribe_and_send(audio, history, model, temp, audio_enabled):\n", + " \"\"\"Transcribe audio and process message\"\"\"\n", + " text = handle_audio_input(audio)\n", + " if text and text != \"\" and not text.startswith(\"Error\"):\n", + " # Process the message and get results\n", + " for hist, aud in process_message(text, history, model, temp, audio_enabled):\n", + " yield hist, aud\n", + " else:\n", + " # If no text or error, return history unchanged\n", + " yield history, None\n", + "\n", + " # Wire up events\n", + " submit_btn.click(\n", + " fn=process_message,\n", + " inputs=[msg, chatbot, model_choice, temperature, audio_output_enabled],\n", + " outputs=[chatbot, audio_response]\n", + " ).then(lambda: \"\", None, msg)\n", + "\n", + " msg.submit(\n", + " fn=process_message,\n", + " inputs=[msg, chatbot, model_choice, temperature, audio_output_enabled],\n", + " outputs=[chatbot, audio_response]\n", + " ).then(lambda: \"\", None, msg)\n", + "\n", + " # Audio input handler using stop_recording event\n", + " audio_input.stop_recording(\n", + " fn=transcribe_and_send,\n", + " inputs=[audio_input, chatbot, model_choice, temperature, audio_output_enabled],\n", + " outputs=[chatbot, audio_response]\n", + " )\n", + "\n", + " # Clear button clears chat, audio response, and audio input\n", + " clear_btn.click(\n", + " fn=lambda: ([], None, None), \n", + " inputs=None, \n", + " outputs=[chatbot, audio_response, audio_input]\n", + " )\n", + "\n", + " # Toggle audio response visibility\n", + " audio_output_enabled.change(\n", + " fn=lambda x: gr.Audio(visible=x),\n", + " inputs=audio_output_enabled,\n", + " outputs=audio_response\n", + " )\n", + "\n", + "# Launch the app\n", + "if __name__ == \"__main__\":\n", + " demo.launch(share=False, debug=True)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.13" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}