From 33a12c846b300ac59cf09542bec861416d8eee60 Mon Sep 17 00:00:00 2001 From: Rohit Nain Date: Sat, 4 Oct 2025 23:09:34 +0530 Subject: [PATCH 1/2] created working voice_enabled_multi_model_AI_assistance --- ...ce_enabled_multi_model_AI_assistanve.ipynb | 646 ++++++++++++++++++ 1 file changed, 646 insertions(+) create mode 100644 week2/community-contributions/voice_enabled_multi_model_AI_assistanve.ipynb diff --git a/week2/community-contributions/voice_enabled_multi_model_AI_assistanve.ipynb b/week2/community-contributions/voice_enabled_multi_model_AI_assistanve.ipynb new file mode 100644 index 0000000..0345dfc --- /dev/null +++ b/week2/community-contributions/voice_enabled_multi_model_AI_assistanve.ipynb @@ -0,0 +1,646 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "7bc4a9cd", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import gradio as gr\n", + "from openai import OpenAI\n", + "import anthropic\n", + "import google.generativeai as genai\n", + "from dotenv import load_dotenv\n", + "\n", + "# Load environment variables\n", + "load_dotenv()\n", + "OPENAI_API_KEY = os.getenv(\"OPENAI_API_KEY\")\n", + "anthropic_api_key = os.getenv('ANTHROPIC_API_KEY')\n", + "google_api_key = os.getenv('GOOGLE_API_KEY')\n", + "\n", + "# Initialize clients\n", + "openai_client = OpenAI(api_key=OPENAI_API_KEY)\n", + "genai.configure(api_key=google_api_key)\n", + "claude_client = anthropic.Anthropic(api_key=anthropic_api_key)\n", + "\n", + "# System prompt - Universal and comprehensive\n", + "SYSTEM_PROMPT = \"\"\"You are a highly capable and versatile AI assistant designed to help with any type of question or task.\n", + "\n", + "Your capabilities span across all domains including but not limited to:\n", + "- Programming, software development, and technology\n", + "- Science, mathematics, and engineering\n", + "- Arts, literature, and creative writing\n", + "- History, philosophy, and social sciences\n", + "- Business, finance, and economics\n", + "- Health, wellness, and lifestyle advice\n", + "- Education and learning support\n", + "- Problem-solving and critical thinking\n", + "- General knowledge and trivia\n", + "- Casual conversation and entertainment\n", + "\n", + "Guidelines:\n", + "- Provide accurate, helpful, and comprehensive responses\n", + "- Adapt your tone and style to match the context of the question\n", + "- Use examples and explanations when helpful\n", + "- Be creative when asked for creative content\n", + "- Be precise and factual when asked for information\n", + "- Ask clarifying questions if the request is ambiguous\n", + "- Admit when you're uncertain and provide the best possible guidance\n", + "- Be conversational, friendly, and supportive\n", + "\n", + "You can help with anything from technical coding problems to creative storytelling, from academic research to casual chat. There are no topic restrictions - feel free to engage with any subject matter the user brings up.\"\"\"\n", + "\n", + "# Model configurations\n", + "model_configs = {\n", + " \"GPT-4o\": {\"provider\": \"openai\", \"model\": \"gpt-4o\"},\n", + " \"GPT-4o-mini\": {\"provider\": \"openai\", \"model\": \"gpt-4o-mini\"},\n", + " \"GPT-3.5-turbo\": {\"provider\": \"openai\", \"model\": \"gpt-3.5-turbo\"},\n", + " \"Claude Sonnet 4\": {\"provider\": \"anthropic\", \"model\": \"claude-sonnet-4-20250514\"},\n", + " \"Gemini 2.0 Flash\": {\"provider\": \"google\", \"model\": \"gemini-2.0-flash-exp\"},\n", + "}\n", + "\n", + "def chat_streaming(message, history, model_name, temperature):\n", + " \"\"\"Main chat function with streaming support\"\"\"\n", + " \n", + " config = model_configs[model_name]\n", + " provider = config[\"provider\"]\n", + " model = config[\"model\"]\n", + " \n", + " # Convert messages format history to API format\n", + " messages = []\n", + " for msg in history:\n", + " if msg[\"role\"] == \"user\":\n", + " messages.append({\"role\": \"user\", \"content\": msg[\"content\"]})\n", + " elif msg[\"role\"] == \"assistant\":\n", + " messages.append({\"role\": \"assistant\", \"content\": msg[\"content\"]})\n", + " messages.append({\"role\": \"user\", \"content\": message})\n", + " \n", + " # Stream based on provider\n", + " if provider == \"openai\":\n", + " stream = openai_client.chat.completions.create(\n", + " model=model,\n", + " messages=[{\"role\": \"system\", \"content\": SYSTEM_PROMPT}] + messages,\n", + " temperature=temperature,\n", + " stream=True\n", + " )\n", + " \n", + " response = \"\"\n", + " for chunk in stream:\n", + " if chunk.choices[0].delta.content:\n", + " response += chunk.choices[0].delta.content\n", + " yield response\n", + " \n", + " elif provider == \"anthropic\":\n", + " response = \"\"\n", + " with claude_client.messages.stream(\n", + " model=model,\n", + " max_tokens=2000,\n", + " temperature=temperature,\n", + " system=SYSTEM_PROMPT,\n", + " messages=messages,\n", + " ) as stream:\n", + " for text in stream.text_stream:\n", + " response += text\n", + " yield response\n", + " \n", + " elif provider == \"google\":\n", + " gemini = genai.GenerativeModel(\n", + " model_name=model,\n", + " system_instruction=SYSTEM_PROMPT,\n", + " )\n", + " \n", + " # Convert history for Gemini\n", + " gemini_history = []\n", + " for msg in history:\n", + " if msg[\"role\"] == \"user\":\n", + " gemini_history.append({\"role\": \"user\", \"parts\": [msg[\"content\"]]})\n", + " elif msg[\"role\"] == \"assistant\":\n", + " gemini_history.append({\"role\": \"model\", \"parts\": [msg[\"content\"]]})\n", + " \n", + " chat = gemini.start_chat(history=gemini_history)\n", + " \n", + " stream = chat.send_message(\n", + " message,\n", + " stream=True,\n", + " generation_config=genai.types.GenerationConfig(temperature=temperature)\n", + " )\n", + " \n", + " response = \"\"\n", + " for chunk in stream:\n", + " response += chunk.text\n", + " yield response\n", + "\n", + "def handle_audio_input(audio):\n", + " \"\"\"Transcribe audio input using Whisper\"\"\"\n", + " if audio is None:\n", + " return \"\"\n", + " \n", + " try:\n", + " audio_file = open(audio, \"rb\")\n", + " transcript = openai_client.audio.transcriptions.create(\n", + " model=\"whisper-1\",\n", + " file=audio_file\n", + " )\n", + " return transcript.text\n", + " except Exception as e:\n", + " return f\"Error transcribing audio: {str(e)}\"\n", + "\n", + "def text_to_speech(text):\n", + " \"\"\"Convert text response to speech\"\"\"\n", + " try:\n", + " response = openai_client.audio.speech.create(\n", + " model=\"tts-1\",\n", + " voice=\"nova\",\n", + " input=text[:4096] # Limit to prevent errors\n", + " )\n", + " \n", + " audio_path = \"response.mp3\"\n", + " response.stream_to_file(audio_path)\n", + " return audio_path\n", + " except Exception as e:\n", + " print(f\"TTS Error: {str(e)}\")\n", + " return None\n", + "\n", + "# Custom CSS for modern, attractive UI\n", + "custom_css = \"\"\"\n", + "@import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&display=swap');\n", + "\n", + ".gradio-container {\n", + " font-family: 'Inter', sans-serif !important;\n", + " background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;\n", + "}\n", + "\n", + ":root .dark {\n", + " --background-fill-primary: #f0f0f0;\n", + " --body-background-fill: var(--background-fill-primary);\n", + " --block-background-fill: white !important;\n", + " --block-title-background-fill: #dfe7ff;\n", + " --block-title-text-color:#6366f1;\n", + " --body-text-color: black;\n", + " --button-secondary-text-color:black;\n", + " --input-background-fill:white;\n", + "\n", + " --block-label-background-fill:#dfe7ff;\n", + " --block-label-text-color:#6366f1;\n", + "\n", + " --block-border-color:#eaeaea;\n", + " --input-border-color: #eaeaea;\n", + " --border-color-primary:#eaeaea;\n", + "\n", + " --color-accent-soft: #dfe7ff;\n", + " --border-color-accent-subdued: #98a6cf;\n", + "\n", + " --checkbox-background-color: #eaeaea;\n", + " --checkbox-border-color: #eaeaea;\n", + " --background-fill-secondary:#eaeaea;\n", + "}\n", + "\n", + ".main {\n", + " background: white;\n", + " border-radius: 20px;\n", + " box-shadow: 0 20px 60px rgba(0,0,0,0.3);\n", + " padding: 0 !important;\n", + " overflow: hidden;\n", + "}\n", + "\n", + ".contain {\n", + " padding: 2rem !important;\n", + "}\n", + "\n", + "/* Header Styling */\n", + ".header-section {\n", + " background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);\n", + " padding: 2.5rem 2rem;\n", + " border-radius: 20px 20px 0 0;\n", + " margin: -2rem 0rem 2rem 0rem;\n", + " color: white;\n", + " box-shadow: 0 4px 20px rgba(102, 126, 234, 0.4);\n", + "}\n", + "\n", + ".header-section h1 {\n", + " font-size: 2.5rem !important;\n", + " font-weight: 700 !important;\n", + " margin: 0 0 0.5rem 0 !important;\n", + " color: white !important;\n", + " text-shadow: 2px 2px 4px rgba(0,0,0,0.2);\n", + "}\n", + "\n", + ".header-section p {\n", + " font-size: 1.1rem !important;\n", + " margin: 0.5rem 0 !important;\n", + " color: rgba(255,255,255,0.95) !important;\n", + " font-weight: 400;\n", + "}\n", + "\n", + ".feature-badge {\n", + " display: inline-block;\n", + " background: rgba(255,255,255,0.2);\n", + " padding: 0.4rem 1rem;\n", + " border-radius: 20px;\n", + " margin: 0.3rem;\n", + " font-size: 0.9rem;\n", + " backdrop-filter: blur(10px);\n", + " border: 1px solid rgba(255,255,255,0.3);\n", + "}\n", + "\n", + "/* Sidebar Styling */\n", + ".control-panel {\n", + " background: linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%);\n", + " padding: 1.5rem;\n", + " border-radius: 15px;\n", + " box-shadow: 0 4px 15px rgba(0,0,0,0.1);\n", + " height: 100%;\n", + "}\n", + "\n", + ".control-panel label {\n", + " font-weight: 600 !important;\n", + " font-size: 0.95rem !important;\n", + "}\n", + "\n", + "/* Dropdown Styling */\n", + ".dropdown-container select {\n", + " background: white !important;\n", + " border: 2px solid #e2e8f0 !important;\n", + " border-radius: 10px !important;\n", + " padding: 0.75rem !important;\n", + " font-weight: 500 !important;\n", + " transition: all 0.3s ease !important;\n", + "}\n", + "\n", + ".dropdown-container select:hover {\n", + " border-color: #667eea !important;\n", + " box-shadow: 0 0 0 3px rgba(102, 126, 234, 0.1) !important;\n", + "}\n", + "\n", + "/* Slider Styling */\n", + "input[type=\"range\"] {\n", + " accent-color: #667eea !important;\n", + "}\n", + "\n", + "/* Button Styling */\n", + ".primary-btn {\n", + " background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;\n", + " border: none !important;\n", + " color: white !important;\n", + " font-weight: 600 !important;\n", + " padding: 0.75rem 2rem !important;\n", + " border-radius: 10px !important;\n", + " box-shadow: 0 4px 15px rgba(102, 126, 234, 0.4) !important;\n", + " transition: all 0.3s ease !important;\n", + "}\n", + "\n", + ".primary-btn:hover {\n", + " transform: translateY(-2px) !important;\n", + " box-shadow: 0 6px 20px rgba(102, 126, 234, 0.6) !important;\n", + "}\n", + "\n", + ".secondary-btn {\n", + " background: #e2e8f0 !important;\n", + " border: none !important;\n", + " color: #2d3748 !important;\n", + " font-weight: 600 !important;\n", + " padding: 0.75rem 1.5rem !important;\n", + " border-radius: 10px !important;\n", + " transition: all 0.3s ease !important;\n", + "}\n", + "\n", + ".secondary-btn:hover {\n", + " background: #cbd5e0 !important;\n", + " transform: translateY(-2px) !important;\n", + "}\n", + "\n", + "/* Chatbot Styling */\n", + ".chatbot-container {\n", + " background: white;\n", + " border-radius: 15px;\n", + " border: 2px solid #e2e8f0;\n", + " box-shadow: 0 4px 20px rgba(0,0,0,0.08);\n", + " overflow: hidden;\n", + "}\n", + "\n", + "/* Input Box Styling */\n", + ".message-input textarea {\n", + " border: 2px solid #e2e8f0 !important;\n", + " border-radius: 12px !important;\n", + " padding: 1rem !important;\n", + " font-size: 1rem !important;\n", + " transition: all 0.3s ease !important;\n", + "}\n", + "\n", + ".message-input textarea:focus {\n", + " border-color: #667eea !important;\n", + " box-shadow: 0 0 0 3px rgba(102, 126, 234, 0.1) !important;\n", + "}\n", + "\n", + "/* Input Row Centering */\n", + ".input-row {\n", + " display: flex !important;\n", + " justify-content: center !important;\n", + " align-items: center !important;\n", + " gap: 1rem !important;\n", + "}\n", + "\n", + ".input-row > * {\n", + " flex-shrink: 0 !important;\n", + "}\n", + "\n", + "/* Audio Components */\n", + ".audio-component {\n", + " background: #f7fafc;\n", + " border: 2px dashed #cbd5e0;\n", + " border-radius: 12px;\n", + " padding: 1rem;\n", + " transition: all 0.3s ease;\n", + "}\n", + "\n", + ".audio-component:hover {\n", + " border-color: #667eea;\n", + " background: #edf2f7;\n", + "}\n", + "\n", + "/* Checkbox Styling */\n", + ".checkbox-label {\n", + " display: flex;\n", + " align-items: center;\n", + " gap: 0.5rem;\n", + " font-weight: 500;\n", + " color: #2d3748;\n", + "}\n", + "\n", + "/* Tips Section */\n", + ".tips-section {\n", + " background: linear-gradient(135deg, #f7fafc 0%, #edf2f7 100%);\n", + " padding: 1.5rem;\n", + " border-radius: 15px;\n", + " margin-top: 2rem;\n", + " border-left: 4px solid #667eea;\n", + "}\n", + "\n", + ".tips-section h3 {\n", + " color: #667eea !important;\n", + " font-weight: 600 !important;\n", + " margin-bottom: 1rem !important;\n", + "}\n", + "\n", + ".tips-section ul {\n", + " list-style: none;\n", + " padding: 0;\n", + "}\n", + "\n", + ".tips-section li {\n", + " padding: 0.5rem 0;\n", + " color: #4a5568 !important;\n", + " font-size: 0.95rem;\n", + "}\n", + "\n", + ".tips-section li:before {\n", + " content: \"→ \";\n", + " color: #667eea;\n", + " font-weight: bold;\n", + " margin-right: 0.5rem;\n", + "}\n", + "\n", + "/* Force black color for strong/bold text */\n", + ".tips-section strong {\n", + " color: #1a202c !important;\n", + "}\n", + "\n", + ".prose * {\n", + " color: inherit !important;\n", + "}\n", + "\n", + ".prose strong {\n", + " color: #1a202c !important;\n", + " font-weight: 600 !important;\n", + "}\n", + "\n", + "/* Responsive Design */\n", + "@media (max-width: 768px) {\n", + " .header-section h1 {\n", + " font-size: 1.8rem !important;\n", + " }\n", + " \n", + " .contain {\n", + " padding: 1rem !important;\n", + " }\n", + "}\n", + "\n", + ".fillable{\n", + " max-width:95% !important;\n", + "}\n", + "#component-5{\n", + " flex-grow:1.1 !important;\n", + "}\n", + ".bubble-wrap.svelte-gjtrl6 {\n", + " background:none !important;\n", + "}\n", + ".bot.svelte-1csv61q.message {\n", + " background-color: white !important;\n", + " border: 1px solid #f3f3f3;\n", + "}\n", + ".options.svelte-y6qw75> li:hover{\n", + " background:white ;\n", + "}\n", + ".options.svelte-y6qw75> .selected{\n", + " background:white ;\n", + "}\n", + "\n", + "\"\"\"\n", + "\n", + "# Build Gradio Interface\n", + "with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as demo:\n", + " \n", + " # Header\n", + " with gr.Row(elem_classes=\"header-section\"):\n", + " with gr.Column():\n", + " gr.HTML(\"\"\"\n", + "

🚀 Voice Enabled Multi Model AI-Assistant

\n", + "

Your intelligent companion for any question - from coding to creativity, science to storytelling!

\n", + "
\n", + " 🤖 7 AI Models\n", + " 🎤 Voice Input\n", + " 🔊 Audio Output\n", + " ⚡ Real-time Streaming\n", + " 🌐 Any Topic\n", + "
\n", + " \"\"\")\n", + " \n", + " with gr.Row():\n", + " # Left Sidebar - Controls\n", + " with gr.Column(scale=1, elem_classes=\"control-panel\"):\n", + " gr.HTML(\"

⚙️ Settings

\")\n", + " \n", + " model_choice = gr.Dropdown(\n", + " choices=list(model_configs.keys()),\n", + " value=\"GPT-4o-mini\",\n", + " label=\"🤖 AI Model\",\n", + " info=\"Select your preferred model\",\n", + " elem_classes=\"dropdown-container\"\n", + " )\n", + " \n", + " temperature = gr.Slider(\n", + " minimum=0,\n", + " maximum=1,\n", + " value=0.7,\n", + " step=0.1,\n", + " label=\"🌡️ Temperature\",\n", + " info=\"Higher = more creative responses\"\n", + " )\n", + " \n", + " gr.HTML(\"

🎙️ Audio Features

\")\n", + " \n", + " audio_input = gr.Audio(\n", + " sources=[\"microphone\"],\n", + " type=\"filepath\",\n", + " label=\"🎤 Voice Input\",\n", + " elem_classes=\"audio-component\"\n", + " )\n", + " \n", + " audio_output_enabled = gr.Checkbox(\n", + " label=\"🔊 Enable Audio Response\",\n", + " value=False,\n", + " elem_classes=\"checkbox-label\"\n", + " )\n", + " \n", + " # Right Side - Chat Interface\n", + " with gr.Column(scale=3):\n", + " chatbot = gr.Chatbot(\n", + " label=\"💬 Conversation\",\n", + " height=550,\n", + " show_copy_button=True,\n", + " type='messages',\n", + " elem_classes=\"chatbot-container\",\n", + " avatar_images=(None, \"https://api.dicebear.com/7.x/bottts/svg?seed=ai\")\n", + " )\n", + " \n", + " with gr.Row(elem_classes=\"input-row\"):\n", + " msg = gr.Textbox(\n", + " label=\"\",\n", + " placeholder=\"💭 Ask me anything - tech help, creative writing, life advice, science, history, or just chat!\",\n", + " scale=5,\n", + " elem_classes=\"message-input\",\n", + " show_label=False\n", + " )\n", + " submit_btn = gr.Button(\"Send 📤\", scale=1, elem_classes=\"primary-btn\")\n", + " \n", + " audio_response = gr.Audio(\n", + " label=\"🔊 Audio Response\", \n", + " visible=False,\n", + " elem_classes=\"audio-component\"\n", + " )\n", + " \n", + " with gr.Row():\n", + " clear_btn = gr.Button(\"🗑️ Clear Chat\", elem_classes=\"secondary-btn\")\n", + " \n", + " # Tips Section\n", + " with gr.Row(elem_classes=\"tips-section\"):\n", + " gr.Markdown(\"\"\"\n", + " ### 💡 What Can I Help You With?\n", + " \n", + " - **Technology & Programming**: Debug code, explain concepts, build projects, learn new languages\n", + " - **Creative Writing**: Stories, poems, scripts, brainstorming ideas, character development\n", + " - **Education & Learning**: Homework help, concept explanations, study guides, tutoring\n", + " - **Business & Career**: Resume writing, business plans, marketing ideas, career advice\n", + " - **Science & Math**: Problem-solving, research assistance, concept explanations\n", + " - **Daily Life**: Recipe suggestions, travel planning, health tips, relationship advice\n", + " - **Entertainment**: Jokes, trivia, games, recommendations for books/movies/music\n", + " - **And Literally Anything Else**: No topic is off-limits - just ask!\n", + " \"\"\")\n", + " \n", + " # Event handlers\n", + " def process_message(message, history, model, temp, audio_enabled):\n", + " \"\"\"Process message and optionally generate audio\"\"\"\n", + " # Add user message to history\n", + " history = history + [{\"role\": \"user\", \"content\": message}]\n", + " \n", + " # Generate text response (streaming)\n", + " bot_message = None\n", + " for response in chat_streaming(message, history[:-1], model, temp):\n", + " bot_message = response\n", + " yield history + [{\"role\": \"assistant\", \"content\": response}], None\n", + " \n", + " # Final history with complete response\n", + " final_history = history + [{\"role\": \"assistant\", \"content\": bot_message}]\n", + " \n", + " # Generate audio if enabled\n", + " if audio_enabled and bot_message:\n", + " audio_path = text_to_speech(bot_message)\n", + " yield final_history, audio_path\n", + " else:\n", + " yield final_history, None\n", + " \n", + " def transcribe_and_send(audio, history, model, temp, audio_enabled):\n", + " \"\"\"Transcribe audio and process message\"\"\"\n", + " text = handle_audio_input(audio)\n", + " if text and text != \"\" and not text.startswith(\"Error\"):\n", + " # Process the message and get results\n", + " for hist, aud in process_message(text, history, model, temp, audio_enabled):\n", + " yield hist, aud\n", + " else:\n", + " # If no text or error, return history unchanged\n", + " yield history, None\n", + "\n", + " # Wire up events\n", + " submit_btn.click(\n", + " fn=process_message,\n", + " inputs=[msg, chatbot, model_choice, temperature, audio_output_enabled],\n", + " outputs=[chatbot, audio_response]\n", + " ).then(lambda: \"\", None, msg)\n", + "\n", + " msg.submit(\n", + " fn=process_message,\n", + " inputs=[msg, chatbot, model_choice, temperature, audio_output_enabled],\n", + " outputs=[chatbot, audio_response]\n", + " ).then(lambda: \"\", None, msg)\n", + "\n", + " # Audio input handler using stop_recording event\n", + " audio_input.stop_recording(\n", + " fn=transcribe_and_send,\n", + " inputs=[audio_input, chatbot, model_choice, temperature, audio_output_enabled],\n", + " outputs=[chatbot, audio_response]\n", + " )\n", + "\n", + " # Clear button clears chat, audio response, and audio input\n", + " clear_btn.click(\n", + " fn=lambda: ([], None, None), \n", + " inputs=None, \n", + " outputs=[chatbot, audio_response, audio_input]\n", + " )\n", + "\n", + " # Toggle audio response visibility\n", + " audio_output_enabled.change(\n", + " fn=lambda x: gr.Audio(visible=x),\n", + " inputs=audio_output_enabled,\n", + " outputs=audio_response\n", + " )\n", + "\n", + "# Launch the app\n", + "if __name__ == \"__main__\":\n", + " demo.launch(share=False, debug=True)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.13" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From 00406349001906639d45181d7de58b9e9fd24090 Mon Sep 17 00:00:00 2001 From: Rohit Nain Date: Sun, 5 Oct 2025 00:33:53 +0530 Subject: [PATCH 2/2] added hugging face files --- .../.gitattributes | 35 + .../README.md | 13 + .../Voice_Enabled_Multi_Model_AI_Assistant.py | 620 ++++++++++++++++++ .../requirements.txt | 5 + 4 files changed, 673 insertions(+) create mode 100644 week2/community-contributions/Voice_Enabled_Multi_Model_AI_Assistant/.gitattributes create mode 100644 week2/community-contributions/Voice_Enabled_Multi_Model_AI_Assistant/README.md create mode 100644 week2/community-contributions/Voice_Enabled_Multi_Model_AI_Assistant/Voice_Enabled_Multi_Model_AI_Assistant.py create mode 100644 week2/community-contributions/Voice_Enabled_Multi_Model_AI_Assistant/requirements.txt diff --git a/week2/community-contributions/Voice_Enabled_Multi_Model_AI_Assistant/.gitattributes b/week2/community-contributions/Voice_Enabled_Multi_Model_AI_Assistant/.gitattributes new file mode 100644 index 0000000..a6344aa --- /dev/null +++ b/week2/community-contributions/Voice_Enabled_Multi_Model_AI_Assistant/.gitattributes @@ -0,0 +1,35 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text diff --git a/week2/community-contributions/Voice_Enabled_Multi_Model_AI_Assistant/README.md b/week2/community-contributions/Voice_Enabled_Multi_Model_AI_Assistant/README.md new file mode 100644 index 0000000..faccc29 --- /dev/null +++ b/week2/community-contributions/Voice_Enabled_Multi_Model_AI_Assistant/README.md @@ -0,0 +1,13 @@ +--- +title: Voice Enabled Multi Model AI Assistant +emoji: 💻 +colorFrom: gray +colorTo: gray +sdk: gradio +sdk_version: 5.49.0 +app_file: Voice_Enabled_Multi_Model_AI_Assistant.py +pinned: false +license: apache-2.0 +--- + +Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference diff --git a/week2/community-contributions/Voice_Enabled_Multi_Model_AI_Assistant/Voice_Enabled_Multi_Model_AI_Assistant.py b/week2/community-contributions/Voice_Enabled_Multi_Model_AI_Assistant/Voice_Enabled_Multi_Model_AI_Assistant.py new file mode 100644 index 0000000..202577d --- /dev/null +++ b/week2/community-contributions/Voice_Enabled_Multi_Model_AI_Assistant/Voice_Enabled_Multi_Model_AI_Assistant.py @@ -0,0 +1,620 @@ +import os +import gradio as gr +from openai import OpenAI +import anthropic +import google.generativeai as genai +from dotenv import load_dotenv + +# Load environment variables +load_dotenv() +OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") +anthropic_api_key = os.getenv('ANTHROPIC_API_KEY') +google_api_key = os.getenv('GOOGLE_API_KEY') + +# Verify API keys are loaded +if not OPENAI_API_KEY: + raise ValueError("OPENAI_API_KEY not found in environment variables") +if not anthropic_api_key: + raise ValueError("ANTHROPIC_API_KEY not found in environment variables") +if not google_api_key: + raise ValueError("GOOGLE_API_KEY not found in environment variables") + +# Initialize clients +openai_client = OpenAI(api_key=OPENAI_API_KEY) +genai.configure(api_key=google_api_key) +claude_client = anthropic.Anthropic(api_key=anthropic_api_key) + +# System prompt - Universal and comprehensive +SYSTEM_PROMPT = """You are a highly capable and versatile AI assistant designed to help with any type of question or task. + +Your capabilities span across all domains including but not limited to: +- Programming, software development, and technology +- Science, mathematics, and engineering +- Arts, literature, and creative writing +- History, philosophy, and social sciences +- Business, finance, and economics +- Health, wellness, and lifestyle advice +- Education and learning support +- Problem-solving and critical thinking +- General knowledge and trivia +- Casual conversation and entertainment + +Guidelines: +- Provide accurate, helpful, and comprehensive responses +- Adapt your tone and style to match the context of the question +- Use examples and explanations when helpful +- Be creative when asked for creative content +- Be precise and factual when asked for information +- Ask clarifying questions if the request is ambiguous +- Admit when you're uncertain and provide the best possible guidance +- Be conversational, friendly, and supportive + +You can help with anything from technical coding problems to creative storytelling, from academic research to casual chat. There are no topic restrictions - feel free to engage with any subject matter the user brings up.""" + +# Model configurations +model_configs = { + "GPT-4o": {"provider": "openai", "model": "gpt-4o"}, + "GPT-4o-mini": {"provider": "openai", "model": "gpt-4o-mini"}, + "GPT-3.5-turbo": {"provider": "openai", "model": "gpt-3.5-turbo"}, + "Claude Sonnet 4": {"provider": "anthropic", "model": "claude-sonnet-4-20250514"}, + "Gemini 2.0 Flash": {"provider": "google", "model": "gemini-2.0-flash-exp"}, +} + +def chat_streaming(message, history, model_name, temperature): + """Main chat function with streaming support""" + + config = model_configs[model_name] + provider = config["provider"] + model = config["model"] + + # Convert messages format history to API format + messages = [] + for msg in history: + if msg["role"] == "user": + messages.append({"role": "user", "content": msg["content"]}) + elif msg["role"] == "assistant": + messages.append({"role": "assistant", "content": msg["content"]}) + messages.append({"role": "user", "content": message}) + + # Stream based on provider + if provider == "openai": + stream = openai_client.chat.completions.create( + model=model, + messages=[{"role": "system", "content": SYSTEM_PROMPT}] + messages, + temperature=temperature, + stream=True + ) + + response = "" + for chunk in stream: + if chunk.choices[0].delta.content: + response += chunk.choices[0].delta.content + yield response + + elif provider == "anthropic": + response = "" + with claude_client.messages.stream( + model=model, + max_tokens=2000, + temperature=temperature, + system=SYSTEM_PROMPT, + messages=messages, + ) as stream: + for text in stream.text_stream: + response += text + yield response + + elif provider == "google": + gemini = genai.GenerativeModel( + model_name=model, + system_instruction=SYSTEM_PROMPT, + ) + + # Convert history for Gemini + gemini_history = [] + for msg in history: + if msg["role"] == "user": + gemini_history.append({"role": "user", "parts": [msg["content"]]}) + elif msg["role"] == "assistant": + gemini_history.append({"role": "model", "parts": [msg["content"]]}) + + chat = gemini.start_chat(history=gemini_history) + + stream = chat.send_message( + message, + stream=True, + generation_config=genai.types.GenerationConfig(temperature=temperature) + ) + + response = "" + for chunk in stream: + response += chunk.text + yield response + +def handle_audio_input(audio): + """Transcribe audio input using Whisper""" + if audio is None: + return "" + + try: + audio_file = open(audio, "rb") + transcript = openai_client.audio.transcriptions.create( + model="whisper-1", + file=audio_file + ) + return transcript.text + except Exception as e: + return f"Error transcribing audio: {str(e)}" + +def text_to_speech(text): + """Convert text response to speech""" + try: + response = openai_client.audio.speech.create( + model="tts-1", + voice="nova", + input=text[:4096] # Limit to prevent errors + ) + + audio_path = "response.mp3" + response.stream_to_file(audio_path) + return audio_path + except Exception as e: + print(f"TTS Error: {str(e)}") + return None + +# Custom CSS for modern, attractive UI +custom_css = """ +@import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&display=swap'); + +.gradio-container { + font-family: 'Inter', sans-serif !important; + background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important; +} + +:root .dark { + --background-fill-primary: #f0f0f0; + --body-background-fill: var(--background-fill-primary); + --block-background-fill: white !important; + --block-title-background-fill: #dfe7ff; + --block-title-text-color:#6366f1; + --body-text-color: black; + --button-secondary-text-color:black; + --input-background-fill:white; + + --block-label-background-fill:#dfe7ff; + --block-label-text-color:#6366f1; + + --block-border-color:#eaeaea; + --input-border-color: #eaeaea; + --border-color-primary:#eaeaea; + + --color-accent-soft: #dfe7ff; + --border-color-accent-subdued: #98a6cf; + + --checkbox-background-color: #eaeaea; + --checkbox-border-color: #eaeaea; + --background-fill-secondary:#eaeaea; +} + +.main { + background: white; + border-radius: 20px; + box-shadow: 0 20px 60px rgba(0,0,0,0.3); + padding: 0 !important; + overflow: hidden; +} + +.contain { + padding: 2rem !important; +} + +/* Header Styling */ +.header-section { + background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); + padding: 2.5rem 2rem; + border-radius: 20px 20px 0 0; + margin: -2rem 0rem 2rem 0rem; + color: white; + box-shadow: 0 4px 20px rgba(102, 126, 234, 0.4); +} + +.header-section h1 { + font-size: 2.5rem !important; + font-weight: 700 !important; + margin: 0 0 0.5rem 0 !important; + color: white !important; + text-shadow: 2px 2px 4px rgba(0,0,0,0.2); +} + +.header-section p { + font-size: 1.1rem !important; + margin: 0.5rem 0 !important; + color: rgba(255,255,255,0.95) !important; + font-weight: 400; +} + +.feature-badge { + display: inline-block; + background: rgba(255,255,255,0.2); + padding: 0.4rem 1rem; + border-radius: 20px; + margin: 0.3rem; + font-size: 0.9rem; + backdrop-filter: blur(10px); + border: 1px solid rgba(255,255,255,0.3); +} + +/* Sidebar Styling */ +.control-panel { + background: linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%); + padding: 1.5rem; + border-radius: 15px; + box-shadow: 0 4px 15px rgba(0,0,0,0.1); + height: 100%; +} + +.control-panel label { + font-weight: 600 !important; + font-size: 0.95rem !important; +} + +/* Dropdown Styling */ +.dropdown-container select { + background: white !important; + border: 2px solid #e2e8f0 !important; + border-radius: 10px !important; + padding: 0.75rem !important; + font-weight: 500 !important; + transition: all 0.3s ease !important; +} + +.dropdown-container select:hover { + border-color: #667eea !important; + box-shadow: 0 0 0 3px rgba(102, 126, 234, 0.1) !important; +} + +/* Slider Styling */ +input[type="range"] { + accent-color: #667eea !important; +} + +/* Button Styling */ +.primary-btn { + background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important; + border: none !important; + color: white !important; + font-weight: 600 !important; + padding: 0.75rem 2rem !important; + border-radius: 10px !important; + box-shadow: 0 4px 15px rgba(102, 126, 234, 0.4) !important; + transition: all 0.3s ease !important; +} + +.primary-btn:hover { + transform: translateY(-2px) !important; + box-shadow: 0 6px 20px rgba(102, 126, 234, 0.6) !important; +} + +.secondary-btn { + background: #e2e8f0 !important; + border: none !important; + color: #2d3748 !important; + font-weight: 600 !important; + padding: 0.75rem 1.5rem !important; + border-radius: 10px !important; + transition: all 0.3s ease !important; +} + +.secondary-btn:hover { + background: #cbd5e0 !important; + transform: translateY(-2px) !important; +} + +/* Chatbot Styling */ +.chatbot-container { + background: white; + border-radius: 15px; + border: 2px solid #e2e8f0; + box-shadow: 0 4px 20px rgba(0,0,0,0.08); + overflow: hidden; +} + +/* Input Box Styling */ +.message-input textarea { + border: 2px solid #e2e8f0 !important; + border-radius: 12px !important; + padding: 1rem !important; + font-size: 1rem !important; + transition: all 0.3s ease !important; +} + +.message-input textarea:focus { + border-color: #667eea !important; + box-shadow: 0 0 0 3px rgba(102, 126, 234, 0.1) !important; +} + +/* Input Row Centering */ +.input-row { + display: flex !important; + justify-content: center !important; + align-items: center !important; + gap: 1rem !important; +} + +.input-row > * { + flex-shrink: 0 !important; +} + +/* Audio Components */ +.audio-component { + background: #f7fafc; + border: 2px dashed #cbd5e0; + border-radius: 12px; + padding: 1rem; + transition: all 0.3s ease; +} + +.audio-component:hover { + border-color: #667eea; + background: #edf2f7; +} + +/* Checkbox Styling */ +.checkbox-label { + display: flex; + align-items: center; + gap: 0.5rem; + font-weight: 500; + color: #2d3748; +} + +/* Tips Section */ +.tips-section { + background: linear-gradient(135deg, #f7fafc 0%, #edf2f7 100%); + padding: 1.5rem; + border-radius: 15px; + margin-top: 2rem; + border-left: 4px solid #667eea; +} + +.tips-section h3 { + color: #667eea !important; + font-weight: 600 !important; + margin-bottom: 1rem !important; +} + +.tips-section ul { + list-style: none; + padding: 0; +} + +.tips-section li { + padding: 0.5rem 0; + color: #4a5568 !important; + font-size: 0.95rem; +} + +.tips-section li:before { + content: "→ "; + color: #667eea; + font-weight: bold; + margin-right: 0.5rem; +} + +/* Force black color for strong/bold text */ +.tips-section strong { + color: #1a202c !important; +} + +.prose * { + color: inherit !important; +} + +.prose strong { + color: #1a202c !important; + font-weight: 600 !important; +} + +/* Responsive Design */ +@media (max-width: 768px) { + .header-section h1 { + font-size: 1.8rem !important; + } + + .contain { + padding: 1rem !important; + } +} + +.fillable{ + max-width:95% !important; +} +#component-5{ + flex-grow:1.1 !important; +} +.bubble-wrap.svelte-gjtrl6 { + background:none !important; +} +.bot.svelte-1csv61q.message { + background-color: white !important; + border: 1px solid #f3f3f3; +} +.options.svelte-y6qw75> li:hover{ + background:white ; +} +.options.svelte-y6qw75> .selected{ + background:white ; +} + +""" + +# Build Gradio Interface +with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as demo: + + # Header + with gr.Row(elem_classes="header-section"): + with gr.Column(): + gr.HTML(""" +

🚀 Voice Enabled Multi Model AI-Assistant

+

Your intelligent companion for any question - from coding to creativity, science to storytelling!

+
+ 🤖 7 AI Models + 🎤 Voice Input + 🔊 Audio Output + ⚡ Real-time Streaming + 🌐 Any Topic +
+ """) + + with gr.Row(): + # Left Sidebar - Controls + with gr.Column(scale=1, elem_classes="control-panel"): + gr.HTML("

⚙️ Settings

") + + model_choice = gr.Dropdown( + choices=list(model_configs.keys()), + value="GPT-4o-mini", + label="🤖 AI Model", + info="Select your preferred model", + elem_classes="dropdown-container" + ) + + temperature = gr.Slider( + minimum=0, + maximum=1, + value=0.7, + step=0.1, + label="🌡️ Temperature", + info="Higher = more creative responses" + ) + + gr.HTML("

🎙️ Audio Features

") + + audio_input = gr.Audio( + sources=["microphone"], + type="filepath", + label="🎤 Voice Input", + elem_classes="audio-component" + ) + + audio_output_enabled = gr.Checkbox( + label="🔊 Enable Audio Response", + value=False, + elem_classes="checkbox-label" + ) + + # Right Side - Chat Interface + with gr.Column(scale=3): + chatbot = gr.Chatbot( + label="💬 Conversation", + height=550, + show_copy_button=True, + type='messages', + elem_classes="chatbot-container", + avatar_images=(None, "https://api.dicebear.com/7.x/bottts/svg?seed=ai") + ) + + with gr.Row(elem_classes="input-row"): + msg = gr.Textbox( + label="", + placeholder="💭 Ask me anything - tech help, creative writing, life advice, science, history, or just chat!", + scale=5, + elem_classes="message-input", + show_label=False + ) + submit_btn = gr.Button("Send 📤", scale=1, elem_classes="primary-btn") + + audio_response = gr.Audio( + label="🔊 Audio Response", + visible=False, + elem_classes="audio-component" + ) + + with gr.Row(): + clear_btn = gr.Button("🗑️ Clear Chat", elem_classes="secondary-btn") + + # Tips Section + with gr.Row(elem_classes="tips-section"): + gr.Markdown(""" + ### 💡 What Can I Help You With? + + - **Technology & Programming**: Debug code, explain concepts, build projects, learn new languages + - **Creative Writing**: Stories, poems, scripts, brainstorming ideas, character development + - **Education & Learning**: Homework help, concept explanations, study guides, tutoring + - **Business & Career**: Resume writing, business plans, marketing ideas, career advice + - **Science & Math**: Problem-solving, research assistance, concept explanations + - **Daily Life**: Recipe suggestions, travel planning, health tips, relationship advice + - **Entertainment**: Jokes, trivia, games, recommendations for books/movies/music + - **And Literally Anything Else**: No topic is off-limits - just ask! + """) + + # Event handlers + def process_message(message, history, model, temp, audio_enabled): + """Process message and optionally generate audio""" + # Add user message to history + history = history + [{"role": "user", "content": message}] + + # Generate text response (streaming) + bot_message = None + for response in chat_streaming(message, history[:-1], model, temp): + bot_message = response + yield history + [{"role": "assistant", "content": response}], None + + # Final history with complete response + final_history = history + [{"role": "assistant", "content": bot_message}] + + # Generate audio if enabled + if audio_enabled and bot_message: + audio_path = text_to_speech(bot_message) + yield final_history, audio_path + else: + yield final_history, None + + def transcribe_and_send(audio, history, model, temp, audio_enabled): + """Transcribe audio and process message""" + text = handle_audio_input(audio) + if text and text != "" and not text.startswith("Error"): + # Process the message and get results + for hist, aud in process_message(text, history, model, temp, audio_enabled): + yield hist, aud + else: + # If no text or error, return history unchanged + yield history, None + + # Wire up events + submit_btn.click( + fn=process_message, + inputs=[msg, chatbot, model_choice, temperature, audio_output_enabled], + outputs=[chatbot, audio_response] + ).then(lambda: "", None, msg) + + msg.submit( + fn=process_message, + inputs=[msg, chatbot, model_choice, temperature, audio_output_enabled], + outputs=[chatbot, audio_response] + ).then(lambda: "", None, msg) + + # Audio input handler using stop_recording event + audio_input.stop_recording( + fn=transcribe_and_send, + inputs=[audio_input, chatbot, model_choice, temperature, audio_output_enabled], + outputs=[chatbot, audio_response] + ) + + # Clear button clears chat, audio response, and audio input + clear_btn.click( + fn=lambda: ([], None, None), + inputs=None, + outputs=[chatbot, audio_response, audio_input] + ) + + # Toggle audio response visibility + audio_output_enabled.change( + fn=lambda x: gr.Audio(visible=x), + inputs=audio_output_enabled, + outputs=audio_response + ) + +# Launch the app +if __name__ == "__main__": + demo.launch(share=False, debug=True) \ No newline at end of file diff --git a/week2/community-contributions/Voice_Enabled_Multi_Model_AI_Assistant/requirements.txt b/week2/community-contributions/Voice_Enabled_Multi_Model_AI_Assistant/requirements.txt new file mode 100644 index 0000000..3ba6475 --- /dev/null +++ b/week2/community-contributions/Voice_Enabled_Multi_Model_AI_Assistant/requirements.txt @@ -0,0 +1,5 @@ +gradio +openai +anthropic +google-generativeai +python-dotenv \ No newline at end of file