LLM_Engineering_OLD/week2/community-contributions/week2-exercise-translator.ipynb

{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "7563a171",
   "metadata": {},
   "source": []
  },
  {
   "cell_type": "markdown",
   "id": "d006b2ea-9dfe-49c7-88a9-a5a0775185fd",
   "metadata": {},
   "source": [
    "# Exercise - week 2: German translator\n",
    "\n",
    "This should include a Gradio UI, streaming, use of the system prompt to add expertise, and the ability to switch between models. Bonus points if you can demonstrate use of a tool!\n",
    "\n",
    "The assistant will transform your spoken English to text, then translate it German and speak it out."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a07e7793-b8f5-44f4-aded-5562f633271a",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Install first PortAudio, in MacOS\n",
    "# brew install portaudio\n",
    "\n",
    "!pip install openai speechrecognition pyaudio\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "dcae50aa",
   "metadata": {},
   "outputs": [],
   "source": [
    "# imports\n",
    "\n",
    "import os\n",
    "import json\n",
    "from dotenv import load_dotenv\n",
    "from openai import OpenAI\n",
    "import gradio as gr"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1796b554",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Initialization\n",
    "\n",
    "load_dotenv()\n",
    "\n",
    "openai_api_key = os.getenv('OPENAI_API_KEY')\n",
    "if openai_api_key:\n",
    "    print(f\"OpenAI API Key exists and begins {openai_api_key[:8]}\")\n",
    "else:\n",
    "    print(\"OpenAI API Key not set\")\n",
    "    \n",
    "MODEL = \"gpt-4o-mini\"\n",
    "openai = OpenAI()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "c5caad24",
   "metadata": {},
   "outputs": [],
   "source": [
    "system_message = \"\"\"You are a highly skilled language translator specializing in translating English text to German. \n",
    "Your task is to accurately translate any English text provided by the user into German. \n",
    "Ensure that the translations are grammatically correct and contextually appropriate. \n",
    "If the user provides a phrase, sentence, or paragraph in English, respond with the equivalent translation in German.\"\"\" "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "id": "aca69563",
   "metadata": {},
   "outputs": [],
   "source": [
    "import speech_recognition as sr\n",
    "\n",
    "\n",
    "def recognize_speech():\n",
    "    recognizer = sr.Recognizer()\n",
    "    with sr.Microphone() as source:\n",
    "        print(\"Say something...\")\n",
    "        audio = recognizer.listen(source)\n",
    "    try:\n",
    "        text = recognizer.recognize_google(audio)\n",
    "        print(f\"You said: {text}\")\n",
    "        return text\n",
    "    except sr.UnknownValueError:\n",
    "        print(\"Google Speech Recognition could not understand audio\")\n",
    "        return None\n",
    "    except sr.RequestError as e:\n",
    "        print(f\"Could not request results from Google Speech Recognition service; {e}\")\n",
    "        return None\n",
    "\n",
    "def recognize_speech(audio_file):\n",
    "    recognizer = sr.Recognizer()\n",
    "    with sr.AudioFile(audio_file) as source:\n",
    "        audio = recognizer.record(source)\n",
    "    try:\n",
    "        text = recognizer.recognize_google(audio)\n",
    "        return text\n",
    "    except sr.UnknownValueError:\n",
    "        return \"Google Speech Recognition could not understand audio\"\n",
    "    except sr.RequestError as e:\n",
    "        return f\"Could not request results from Google Speech Recognition service; {e}\"\n",
    "\n",
    "\n",
    "def get_chatgpt_response(message):\n",
    "    response = openai.chat.completions.create(\n",
    "        model=MODEL,\n",
    "        messages = \n",
    "            [{\"role\": \"system\", \"content\": system_message},\n",
    "            {\"role\": \"user\", \"content\": message}],\n",
    "        max_tokens=150\n",
    "    )\n",
    "    return response.choices[0].message.content.strip()\n",
    "\n",
    "# If problem to find microphone, upload voice file\n",
    "# To record a wav-file you can use Audacity:\n",
    "# brew install --cask audacity\n",
    "\n",
    "def process_audio(audio_file):\n",
    "    text = recognize_speech(audio_file)\n",
    "    if text:\n",
    "        response = get_chatgpt_response(text)\n",
    "        return response\n",
    "    return \"Could not recognize speech.\"\n",
    "\n",
    "# This is the microphone version:\n",
    "# \n",
    "# def process_audio():\n",
    "#     text = recognize_speech()\n",
    "#     if text:\n",
    "#         response = get_chatgpt_response(text)\n",
    "#         return response\n",
    "#     return \"Could not recognize speech.\"\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f1118141",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Create Gradio interface\n",
    "iface = gr.Interface(\n",
    "    fn=process_audio,\n",
    "    inputs=gr.Audio(type=\"filepath\"),\n",
    "    outputs=\"text\",\n",
    "    live=True, \n",
    ")\n",
    "\n",
    "if __name__ == \"__main__\":\n",
    "    iface.launch()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c1284da5",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "venv313",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.13.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}