Gradio UI with recording voice and translating it

2025-02-10 11:07:16 +02:00
parent 4bad3a828f
commit a11f9e7e51
1 changed files with 203 additions and 0 deletions
--- a/week2/community-contributions/week2-exercise-translator.ipynb
+++ b/week2/community-contributions/week2-exercise-translator.ipynb
@@ -0,0 +1,203 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "7563a171",
+   "metadata": {},
+   "source": []
+  },
+  {
+   "cell_type": "markdown",
+   "id": "d006b2ea-9dfe-49c7-88a9-a5a0775185fd",
+   "metadata": {},
+   "source": [
+    "# Exercise - week 2: German translator\n",
+    "\n",
+    "This should include a Gradio UI, streaming, use of the system prompt to add expertise, and the ability to switch between models. Bonus points if you can demonstrate use of a tool!\n",
+    "\n",
+    "The assistant will transform your spoken English to text, then translate it German and speak it out."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a07e7793-b8f5-44f4-aded-5562f633271a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Install first PortAudio, in MacOS\n",
+    "# brew install portaudio\n",
+    "\n",
+    "!pip install openai speechrecognition pyaudio\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "dcae50aa",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# imports\n",
+    "\n",
+    "import os\n",
+    "import json\n",
+    "from dotenv import load_dotenv\n",
+    "from openai import OpenAI\n",
+    "import gradio as gr"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1796b554",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Initialization\n",
+    "\n",
+    "load_dotenv()\n",
+    "\n",
+    "openai_api_key = os.getenv('OPENAI_API_KEY')\n",
+    "if openai_api_key:\n",
+    "    print(f\"OpenAI API Key exists and begins {openai_api_key[:8]}\")\n",
+    "else:\n",
+    "    print(\"OpenAI API Key not set\")\n",
+    "    \n",
+    "MODEL = \"gpt-4o-mini\"\n",
+    "openai = OpenAI()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "c5caad24",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "system_message = \"\"\"You are a highly skilled language translator specializing in translating English text to German. \n",
+    "Your task is to accurately translate any English text provided by the user into German. \n",
+    "Ensure that the translations are grammatically correct and contextually appropriate. \n",
+    "If the user provides a phrase, sentence, or paragraph in English, respond with the equivalent translation in German.\"\"\" "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "id": "aca69563",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import speech_recognition as sr\n",
+    "\n",
+    "\n",
+    "def recognize_speech():\n",
+    "    recognizer = sr.Recognizer()\n",
+    "    with sr.Microphone() as source:\n",
+    "        print(\"Say something...\")\n",
+    "        audio = recognizer.listen(source)\n",
+    "    try:\n",
+    "        text = recognizer.recognize_google(audio)\n",
+    "        print(f\"You said: {text}\")\n",
+    "        return text\n",
+    "    except sr.UnknownValueError:\n",
+    "        print(\"Google Speech Recognition could not understand audio\")\n",
+    "        return None\n",
+    "    except sr.RequestError as e:\n",
+    "        print(f\"Could not request results from Google Speech Recognition service; {e}\")\n",
+    "        return None\n",
+    "\n",
+    "def recognize_speech(audio_file):\n",
+    "    recognizer = sr.Recognizer()\n",
+    "    with sr.AudioFile(audio_file) as source:\n",
+    "        audio = recognizer.record(source)\n",
+    "    try:\n",
+    "        text = recognizer.recognize_google(audio)\n",
+    "        return text\n",
+    "    except sr.UnknownValueError:\n",
+    "        return \"Google Speech Recognition could not understand audio\"\n",
+    "    except sr.RequestError as e:\n",
+    "        return f\"Could not request results from Google Speech Recognition service; {e}\"\n",
+    "\n",
+    "\n",
+    "def get_chatgpt_response(message):\n",
+    "    response = openai.chat.completions.create(\n",
+    "        model=MODEL,\n",
+    "        messages = \n",
+    "            [{\"role\": \"system\", \"content\": system_message},\n",
+    "            {\"role\": \"user\", \"content\": message}],\n",
+    "        max_tokens=150\n",
+    "    )\n",
+    "    return response.choices[0].message.content.strip()\n",
+    "\n",
+    "# If problem to find microphone, upload voice file\n",
+    "# To record a wav-file you can use Audacity:\n",
+    "# brew install --cask audacity\n",
+    "\n",
+    "def process_audio(audio_file):\n",
+    "    text = recognize_speech(audio_file)\n",
+    "    if text:\n",
+    "        response = get_chatgpt_response(text)\n",
+    "        return response\n",
+    "    return \"Could not recognize speech.\"\n",
+    "\n",
+    "# This is the microphone version:\n",
+    "# \n",
+    "# def process_audio():\n",
+    "#     text = recognize_speech()\n",
+    "#     if text:\n",
+    "#         response = get_chatgpt_response(text)\n",
+    "#         return response\n",
+    "#     return \"Could not recognize speech.\"\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f1118141",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Create Gradio interface\n",
+    "iface = gr.Interface(\n",
+    "    fn=process_audio,\n",
+    "    inputs=gr.Audio(type=\"filepath\"),\n",
+    "    outputs=\"text\",\n",
+    "    live=True, \n",
+    ")\n",
+    "\n",
+    "if __name__ == \"__main__\":\n",
+    "    iface.launch()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c1284da5",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "venv313",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.13.2"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}