204 lines
5.8 KiB
Plaintext
204 lines
5.8 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "7563a171",
|
|
"metadata": {},
|
|
"source": []
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "d006b2ea-9dfe-49c7-88a9-a5a0775185fd",
|
|
"metadata": {},
|
|
"source": [
|
|
"# Exercise - week 2: German translator\n",
|
|
"\n",
|
|
"This should include a Gradio UI, streaming, use of the system prompt to add expertise, and the ability to switch between models. Bonus points if you can demonstrate use of a tool!\n",
|
|
"\n",
|
|
"The assistant will transform your spoken English to text, then translate it German and speak it out."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "a07e7793-b8f5-44f4-aded-5562f633271a",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Install first PortAudio, in MacOS\n",
|
|
"# brew install portaudio\n",
|
|
"\n",
|
|
"!pip install openai speechrecognition pyaudio\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "dcae50aa",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# imports\n",
|
|
"\n",
|
|
"import os\n",
|
|
"import json\n",
|
|
"from dotenv import load_dotenv\n",
|
|
"from openai import OpenAI\n",
|
|
"import gradio as gr"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "1796b554",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Initialization\n",
|
|
"\n",
|
|
"load_dotenv()\n",
|
|
"\n",
|
|
"openai_api_key = os.getenv('OPENAI_API_KEY')\n",
|
|
"if openai_api_key:\n",
|
|
" print(f\"OpenAI API Key exists and begins {openai_api_key[:8]}\")\n",
|
|
"else:\n",
|
|
" print(\"OpenAI API Key not set\")\n",
|
|
" \n",
|
|
"MODEL = \"gpt-4o-mini\"\n",
|
|
"openai = OpenAI()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 8,
|
|
"id": "c5caad24",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"system_message = \"\"\"You are a highly skilled language translator specializing in translating English text to German. \n",
|
|
"Your task is to accurately translate any English text provided by the user into German. \n",
|
|
"Ensure that the translations are grammatically correct and contextually appropriate. \n",
|
|
"If the user provides a phrase, sentence, or paragraph in English, respond with the equivalent translation in German.\"\"\" "
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 26,
|
|
"id": "aca69563",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import speech_recognition as sr\n",
|
|
"\n",
|
|
"\n",
|
|
"def recognize_speech():\n",
|
|
" recognizer = sr.Recognizer()\n",
|
|
" with sr.Microphone() as source:\n",
|
|
" print(\"Say something...\")\n",
|
|
" audio = recognizer.listen(source)\n",
|
|
" try:\n",
|
|
" text = recognizer.recognize_google(audio)\n",
|
|
" print(f\"You said: {text}\")\n",
|
|
" return text\n",
|
|
" except sr.UnknownValueError:\n",
|
|
" print(\"Google Speech Recognition could not understand audio\")\n",
|
|
" return None\n",
|
|
" except sr.RequestError as e:\n",
|
|
" print(f\"Could not request results from Google Speech Recognition service; {e}\")\n",
|
|
" return None\n",
|
|
"\n",
|
|
"def recognize_speech(audio_file):\n",
|
|
" recognizer = sr.Recognizer()\n",
|
|
" with sr.AudioFile(audio_file) as source:\n",
|
|
" audio = recognizer.record(source)\n",
|
|
" try:\n",
|
|
" text = recognizer.recognize_google(audio)\n",
|
|
" return text\n",
|
|
" except sr.UnknownValueError:\n",
|
|
" return \"Google Speech Recognition could not understand audio\"\n",
|
|
" except sr.RequestError as e:\n",
|
|
" return f\"Could not request results from Google Speech Recognition service; {e}\"\n",
|
|
"\n",
|
|
"\n",
|
|
"def get_chatgpt_response(message):\n",
|
|
" response = openai.chat.completions.create(\n",
|
|
" model=MODEL,\n",
|
|
" messages = \n",
|
|
" [{\"role\": \"system\", \"content\": system_message},\n",
|
|
" {\"role\": \"user\", \"content\": message}],\n",
|
|
" max_tokens=150\n",
|
|
" )\n",
|
|
" return response.choices[0].message.content.strip()\n",
|
|
"\n",
|
|
"# If problem to find microphone, upload voice file\n",
|
|
"# To record a wav-file you can use Audacity:\n",
|
|
"# brew install --cask audacity\n",
|
|
"\n",
|
|
"def process_audio(audio_file):\n",
|
|
" text = recognize_speech(audio_file)\n",
|
|
" if text:\n",
|
|
" response = get_chatgpt_response(text)\n",
|
|
" return response\n",
|
|
" return \"Could not recognize speech.\"\n",
|
|
"\n",
|
|
"# This is the microphone version:\n",
|
|
"# \n",
|
|
"# def process_audio():\n",
|
|
"# text = recognize_speech()\n",
|
|
"# if text:\n",
|
|
"# response = get_chatgpt_response(text)\n",
|
|
"# return response\n",
|
|
"# return \"Could not recognize speech.\"\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "f1118141",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Create Gradio interface\n",
|
|
"iface = gr.Interface(\n",
|
|
" fn=process_audio,\n",
|
|
" inputs=gr.Audio(type=\"filepath\"),\n",
|
|
" outputs=\"text\",\n",
|
|
" live=True, \n",
|
|
")\n",
|
|
"\n",
|
|
"if __name__ == \"__main__\":\n",
|
|
" iface.launch()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "c1284da5",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "venv313",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.13.2"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 5
|
|
}
|