Added my contributions to community-contributions

2025-03-01 17:08:44 +05:00
parent 122620c9b9
commit dbe489e22e
1 changed files with 227 additions and 0 deletions
--- a/week2/community-contributions/multi-modal-StudyAI.ipynb
+++ b/week2/community-contributions/multi-modal-StudyAI.ipynb
@@ -0,0 +1,227 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "6aa646e3-7a57-461a-b69a-073179effa18",
+   "metadata": {},
+   "source": [
+    "## Additional End of week Exercise - week 2\n",
+    "\n",
+    "This includes \n",
+    "- Gradio UI\n",
+    "- use of the system prompt to add expertise\n",
+    "- audio input so you can talk to it\n",
+    "- respond with audio"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "72f3dca4-b052-4e9f-90c8-f42e667c165c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# imports\n",
+    "\n",
+    "import os\n",
+    "from dotenv import load_dotenv\n",
+    "from openai import OpenAI\n",
+    "from IPython.display import Markdown, display, update_display\n",
+    "import gradio as gr\n",
+    "import json"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "23570b9f-8c7a-4cc7-b809-3505334b60a7",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Load environment variables in a file called .env\n",
+    "\n",
+    "load_dotenv(override=True)\n",
+    "openai_api_key = os.getenv('OPENAI_API_KEY')\n",
+    "openai = OpenAI()\n",
+    "MODEL = 'gpt-4o-mini'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d379178a-8672-4e6f-a380-ad8d85f5c64e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "system_message = \"\"\"You are a personal study tutor, designed to provide clear, yet brief and succint answers to \n",
+    "students that ask you questions. The topics are related to data science, computer science \n",
+    "and technology in general, so you are allowed to use a moderate level of jargon. Explain in \n",
+    "simple terminology, so a student can easily understand. \n",
+    "\n",
+    "You may also be asked about prices for special courses.In this case, respond that you have no such\n",
+    "data available. \n",
+    "\n",
+    "\"\"\"\n",
+    "# Use a tabular format where possible \n",
+    "# for ease of information flow "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4745d439-c66e-4e5c-b5d4-9f0ba97aefdc",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def chat(history):\n",
+    "    messages = [{\"role\": \"system\", \"content\": system_message}] + history\n",
+    "    response = openai.chat.completions.create(model=MODEL, messages=messages)\n",
+    "\n",
+    "    reply = response.choices[0].message.content\n",
+    "    history += [{\"role\":\"assistant\", \"content\":reply}]\n",
+    "\n",
+    "    # Comment out or delete the next line if you'd rather skip Audio for now..\n",
+    "    talker(reply)\n",
+    "    \n",
+    "    return history"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a8b31799-df86-4151-98ea-66ef50fe767e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!pip install openai-whisper"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9f5b8e51-2833-44be-a4f4-63c4683f2b6e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import whisper\n",
+    "\n",
+    "def transcribe_audio(audio):\n",
+    "    if audio is None:\n",
+    "        return \"No audio received.\"\n",
+    "    \n",
+    "    model = whisper.load_model(\"base\")  # You can use \"tiny\", \"small\", etc.\n",
+    "    result = model.transcribe(audio)\n",
+    "    \n",
+    "    return result[\"text\"]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e55f8e43-2da1-4f2a-bcd4-3fffa830db48",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import base64\n",
+    "from io import BytesIO\n",
+    "from PIL import Image\n",
+    "from IPython.display import Audio, display\n",
+    "\n",
+    "def talker(message):\n",
+    "    response = openai.audio.speech.create(\n",
+    "        model=\"tts-1\",\n",
+    "        voice=\"onyx\",\n",
+    "        input=message)\n",
+    "\n",
+    "    audio_stream = BytesIO(response.content)\n",
+    "    output_filename = \"output_audio.mp3\"\n",
+    "    with open(output_filename, \"wb\") as f:\n",
+    "        f.write(audio_stream.read())\n",
+    "\n",
+    "    # Play the generated audio\n",
+    "    display(Audio(output_filename, autoplay=True))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "cb3107a7-bfdc-4255-825f-bfabcf458c0c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# More involved Gradio code as we're not using the preset Chat interface!\n",
+    "# Passing in inbrowser=True in the last line will cause a Gradio window to pop up immediately.\n",
+    "\n",
+    "with gr.Blocks() as ui:\n",
+    "    with gr.Row():\n",
+    "        chatbot = gr.Chatbot(height=400,type=\"messages\")\n",
+    "    with gr.Row():\n",
+    "        entry = gr.Textbox(label=\"Chat with our StudyAI Assistant:\")\n",
+    "    # with gr.Row():\n",
+    "    #     entry = gr.Textbox(label=\"Speak or Type:\", placeholder=\"Speak your question...\", interactive=True, microphone=True)\n",
+    "    with gr.Row():\n",
+    "        audio_input = gr.Audio(type=\"filepath\", label=\"Speak your question\")\n",
+    "    with gr.Row():\n",
+    "        clear = gr.Button(\"Clear\")\n",
+    "\n",
+    "    def do_entry(message, history):\n",
+    "        history += [{\"role\":\"user\", \"content\":message}]\n",
+    "        return \"\", history\n",
+    "\n",
+    "    def handle_audio(audio, history):\n",
+    "        text = transcribe_audio(audio)\n",
+    "        history += [{\"role\": \"user\", \"content\": text}]\n",
+    "        return \"\", history\n",
+    "\n",
+    "    entry.submit(do_entry, inputs=[entry, chatbot], outputs=[entry, chatbot]).then(\n",
+    "        chat, inputs=[chatbot], outputs=[chatbot]\n",
+    "    )\n",
+    "\n",
+    "    audio_input.change(handle_audio, inputs=[audio_input, chatbot], outputs=[entry, chatbot]).then(\n",
+    "        chat, inputs=[chatbot], outputs=[chatbot]\n",
+    "    )\n",
+    "    \n",
+    "    clear.click(lambda: [], inputs=None, outputs=chatbot, queue=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "73e0a776-d43e-4b04-a37f-a27d3714cf47",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ui.launch(inbrowser=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "bcd45503-d314-4b28-a41c-4dbb87059188",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.11"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}