{ "cells": [ { "cell_type": "code", "execution_count": 2, "id": "d6f21a6d", "metadata": {}, "outputs": [], "source": [ "# week1 -> day1\n", "import os\n", "from dotenv import load_dotenv\n", "from openai import OpenAI\n", "\n", "#week2 -> day2\n", "import gradio as gr" ] }, { "cell_type": "code", "execution_count": 3, "id": "ea2e8c0c", "metadata": {}, "outputs": [], "source": [ "load_dotenv(override=True)\n", "api_key:str = os.getenv('OPENAI_API_KEY')" ] }, { "cell_type": "code", "execution_count": 26, "id": "5c54f2f8", "metadata": {}, "outputs": [], "source": [ "class SolveTechnicalQuestions:\n", " _system_prompt = \"\"\"\n", " You are a snarkyassistant that analyzes the contents of a website, \n", " and provides a short, snarky, humorous summary, ignoring text that might be navigation related.\n", " Respond in markdown. Do not wrap the markdown in a code block - respond just with the markdown.\n", " \"\"\"\n", " _stream = False\n", "\n", " def __init__(self, model: str = \"gpt-4o-mini\") -> None:\n", " self.openai_client = OpenAI()\n", " self._chat_llm = OpenAI()\n", " self._MODEL = model\n", " \n", " def set_system_prompt(self, system_prompt: str) -> None:\n", " self._system_prompt = system_prompt \n", "\n", " def set_stream(self, stream: bool) -> None:\n", " self._stream = stream\n", " \n", " def set_endpoint(self, endpoint: str, api_key: str = \"ollama\") -> None:\n", " self.openai_client = OpenAI(base_url=endpoint, api_key=api_key)\n", "\n", " def set_model(self, model: str) -> None:\n", " if model == \"GPT\" and self._MODEL != \"gpt-4o-mini\":\n", " self._chat_llm = OpenAI()\n", " self._MODEL = \"gpt-4o-mini\"\n", " elif model == \"ollama\" and self._MODEL != \"llama3.2\":\n", " self._chat_llm = OpenAI(base_url=\"http://localhost:11434/v1\", api_key=\"ollama\")\n", " self._MODEL = \"llama3.2\"\n", "\n", " def talker(self, message):\n", " response = self.openai_client.audio.speech.create(\n", " model=\"gpt-4o-mini-tts\",\n", " voice=\"onyx\", # Also, try replacing onyx with alloy or coral\n", " input=message\n", " )\n", " return response.content\n", "\n", " def initiate_chat(self, message, history):\n", " return \"\", history + [{\"role\":\"user\", \"content\":message}]\n", "\n", " def audio_to_text(self, audio_file, history):\n", " \"\"\"Convert audio file to text using OpenAI Whisper\"\"\"\n", "\n", " \n", " result = history + [{\"role\": \"user\", \"content\": \"\"}]\n", " try:\n", " if audio_file is None:\n", " print(\"No audio file provided\")\n", " result[-1][\"content\"] = \"No speech detected in audio\"\n", " return result\n", " \n", " # Ensure we have the file path\n", " if isinstance(audio_file, str):\n", " file_path = audio_file\n", " else:\n", " file_path = audio_file.name if hasattr(audio_file, 'name') else str(audio_file)\n", " \n", " # Check if file exists\n", " if not os.path.exists(file_path):\n", " print(f\"Audio file not found: {file_path}\")\n", " result[-1][\"content\"] = \"No speech detected in audio\"\n", " return result\n", " \n", " # Check file size (Whisper has limits)\n", " file_size = os.path.getsize(file_path)\n", " if file_size > 25 * 1024 * 1024: # 25MB limit\n", " result[-1][\"content\"] = \"Audio file too large (max 25MB)\"\n", " return result\n", " \n", " # Transcribe using OpenAI Whisper\n", " with open(file_path, \"rb\") as audio:\n", " response = self.openai_client.audio.transcriptions.create(\n", " model=\"whisper-1\",\n", " file=audio,\n", " response_format=\"text\"\n", " )\n", " \n", " # Clean up the transcribed text\n", " text = response.strip()\n", " \n", " if not text:\n", " result[-1][\"content\"] =\"No speech detected in audio\"\n", " return result\n", " \n", " result[-1][\"content\"] = text\n", " return result\n", " \n", " except Exception as e:\n", " error_msg = f\"Audio transcription error: {str(e)}\"\n", " print(f\"{error_msg}\")\n", " result[-1][\"content\"] = \"No speech detected in audio\"\n", " return result\n", "\n", " def chat(self, history):\n", " history = [{\"role\": h[\"role\"], \"content\": h[\"content\"]} for h in history]\n", " messages = [{\"role\": \"system\", \"content\": self._system_prompt}] + history\n", " stream = self._chat_llm.chat.completions.create(model=self._MODEL, messages=messages, stream=self._stream)\n", "\n", " if self._stream:\n", " response = \"\"\n", " voice = None\n", "\n", " for chunk in stream:\n", " if chunk.choices[0].delta.content:\n", " response += chunk.choices[0].delta.content\n", " temp_history = history + [{\"role\": \"assistant\", \"content\": response}]\n", " yield temp_history, voice \n", " \n", " voice = self.talker(response)\n", " history += [{\"role\": \"assistant\", \"content\": response}]\n", " yield history, voice\n", " else:\n", " response = stream.choices[0].message.content\n", " history += [{\"role\": \"assistant\", \"content\": response}]\n", " voice = self.talker(response)\n", " yield history, voice\n", "\n", " def Interface(self, title, name, desc):\n", " with gr.Blocks(title=title) as ui:\n", " with gr.Column():\n", " gr.Markdown(f\"\"\"\n", "
\n", " {desc}\n", "
\n", "