Gradio UI with recording voice and translating it
This commit is contained in:
203
week2/community-contributions/week2-exercise-translator.ipynb
Normal file
203
week2/community-contributions/week2-exercise-translator.ipynb
Normal file
@@ -0,0 +1,203 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "7563a171",
|
||||
"metadata": {},
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d006b2ea-9dfe-49c7-88a9-a5a0775185fd",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Exercise - week 2: German translator\n",
|
||||
"\n",
|
||||
"This should include a Gradio UI, streaming, use of the system prompt to add expertise, and the ability to switch between models. Bonus points if you can demonstrate use of a tool!\n",
|
||||
"\n",
|
||||
"The assistant will transform your spoken English to text, then translate it German and speak it out."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "a07e7793-b8f5-44f4-aded-5562f633271a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Install first PortAudio, in MacOS\n",
|
||||
"# brew install portaudio\n",
|
||||
"\n",
|
||||
"!pip install openai speechrecognition pyaudio\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "dcae50aa",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# imports\n",
|
||||
"\n",
|
||||
"import os\n",
|
||||
"import json\n",
|
||||
"from dotenv import load_dotenv\n",
|
||||
"from openai import OpenAI\n",
|
||||
"import gradio as gr"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "1796b554",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Initialization\n",
|
||||
"\n",
|
||||
"load_dotenv()\n",
|
||||
"\n",
|
||||
"openai_api_key = os.getenv('OPENAI_API_KEY')\n",
|
||||
"if openai_api_key:\n",
|
||||
" print(f\"OpenAI API Key exists and begins {openai_api_key[:8]}\")\n",
|
||||
"else:\n",
|
||||
" print(\"OpenAI API Key not set\")\n",
|
||||
" \n",
|
||||
"MODEL = \"gpt-4o-mini\"\n",
|
||||
"openai = OpenAI()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "c5caad24",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"system_message = \"\"\"You are a highly skilled language translator specializing in translating English text to German. \n",
|
||||
"Your task is to accurately translate any English text provided by the user into German. \n",
|
||||
"Ensure that the translations are grammatically correct and contextually appropriate. \n",
|
||||
"If the user provides a phrase, sentence, or paragraph in English, respond with the equivalent translation in German.\"\"\" "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 26,
|
||||
"id": "aca69563",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import speech_recognition as sr\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def recognize_speech():\n",
|
||||
" recognizer = sr.Recognizer()\n",
|
||||
" with sr.Microphone() as source:\n",
|
||||
" print(\"Say something...\")\n",
|
||||
" audio = recognizer.listen(source)\n",
|
||||
" try:\n",
|
||||
" text = recognizer.recognize_google(audio)\n",
|
||||
" print(f\"You said: {text}\")\n",
|
||||
" return text\n",
|
||||
" except sr.UnknownValueError:\n",
|
||||
" print(\"Google Speech Recognition could not understand audio\")\n",
|
||||
" return None\n",
|
||||
" except sr.RequestError as e:\n",
|
||||
" print(f\"Could not request results from Google Speech Recognition service; {e}\")\n",
|
||||
" return None\n",
|
||||
"\n",
|
||||
"def recognize_speech(audio_file):\n",
|
||||
" recognizer = sr.Recognizer()\n",
|
||||
" with sr.AudioFile(audio_file) as source:\n",
|
||||
" audio = recognizer.record(source)\n",
|
||||
" try:\n",
|
||||
" text = recognizer.recognize_google(audio)\n",
|
||||
" return text\n",
|
||||
" except sr.UnknownValueError:\n",
|
||||
" return \"Google Speech Recognition could not understand audio\"\n",
|
||||
" except sr.RequestError as e:\n",
|
||||
" return f\"Could not request results from Google Speech Recognition service; {e}\"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def get_chatgpt_response(message):\n",
|
||||
" response = openai.chat.completions.create(\n",
|
||||
" model=MODEL,\n",
|
||||
" messages = \n",
|
||||
" [{\"role\": \"system\", \"content\": system_message},\n",
|
||||
" {\"role\": \"user\", \"content\": message}],\n",
|
||||
" max_tokens=150\n",
|
||||
" )\n",
|
||||
" return response.choices[0].message.content.strip()\n",
|
||||
"\n",
|
||||
"# If problem to find microphone, upload voice file\n",
|
||||
"# To record a wav-file you can use Audacity:\n",
|
||||
"# brew install --cask audacity\n",
|
||||
"\n",
|
||||
"def process_audio(audio_file):\n",
|
||||
" text = recognize_speech(audio_file)\n",
|
||||
" if text:\n",
|
||||
" response = get_chatgpt_response(text)\n",
|
||||
" return response\n",
|
||||
" return \"Could not recognize speech.\"\n",
|
||||
"\n",
|
||||
"# This is the microphone version:\n",
|
||||
"# \n",
|
||||
"# def process_audio():\n",
|
||||
"# text = recognize_speech()\n",
|
||||
"# if text:\n",
|
||||
"# response = get_chatgpt_response(text)\n",
|
||||
"# return response\n",
|
||||
"# return \"Could not recognize speech.\"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "f1118141",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Create Gradio interface\n",
|
||||
"iface = gr.Interface(\n",
|
||||
" fn=process_audio,\n",
|
||||
" inputs=gr.Audio(type=\"filepath\"),\n",
|
||||
" outputs=\"text\",\n",
|
||||
" live=True, \n",
|
||||
")\n",
|
||||
"\n",
|
||||
"if __name__ == \"__main__\":\n",
|
||||
" iface.launch()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "c1284da5",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "venv313",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.13.2"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
Reference in New Issue
Block a user