Added my contributions to community-contributions
This commit is contained in:
227
week2/community-contributions/multi-modal-StudyAI.ipynb
Normal file
227
week2/community-contributions/multi-modal-StudyAI.ipynb
Normal file
@@ -0,0 +1,227 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "6aa646e3-7a57-461a-b69a-073179effa18",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Additional End of week Exercise - week 2\n",
|
||||
"\n",
|
||||
"This includes \n",
|
||||
"- Gradio UI\n",
|
||||
"- use of the system prompt to add expertise\n",
|
||||
"- audio input so you can talk to it\n",
|
||||
"- respond with audio"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "72f3dca4-b052-4e9f-90c8-f42e667c165c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# imports\n",
|
||||
"\n",
|
||||
"import os\n",
|
||||
"from dotenv import load_dotenv\n",
|
||||
"from openai import OpenAI\n",
|
||||
"from IPython.display import Markdown, display, update_display\n",
|
||||
"import gradio as gr\n",
|
||||
"import json"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "23570b9f-8c7a-4cc7-b809-3505334b60a7",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Load environment variables in a file called .env\n",
|
||||
"\n",
|
||||
"load_dotenv(override=True)\n",
|
||||
"openai_api_key = os.getenv('OPENAI_API_KEY')\n",
|
||||
"openai = OpenAI()\n",
|
||||
"MODEL = 'gpt-4o-mini'"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "d379178a-8672-4e6f-a380-ad8d85f5c64e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"system_message = \"\"\"You are a personal study tutor, designed to provide clear, yet brief and succint answers to \n",
|
||||
"students that ask you questions. The topics are related to data science, computer science \n",
|
||||
"and technology in general, so you are allowed to use a moderate level of jargon. Explain in \n",
|
||||
"simple terminology, so a student can easily understand. \n",
|
||||
"\n",
|
||||
"You may also be asked about prices for special courses.In this case, respond that you have no such\n",
|
||||
"data available. \n",
|
||||
"\n",
|
||||
"\"\"\"\n",
|
||||
"# Use a tabular format where possible \n",
|
||||
"# for ease of information flow "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "4745d439-c66e-4e5c-b5d4-9f0ba97aefdc",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def chat(history):\n",
|
||||
" messages = [{\"role\": \"system\", \"content\": system_message}] + history\n",
|
||||
" response = openai.chat.completions.create(model=MODEL, messages=messages)\n",
|
||||
"\n",
|
||||
" reply = response.choices[0].message.content\n",
|
||||
" history += [{\"role\":\"assistant\", \"content\":reply}]\n",
|
||||
"\n",
|
||||
" # Comment out or delete the next line if you'd rather skip Audio for now..\n",
|
||||
" talker(reply)\n",
|
||||
" \n",
|
||||
" return history"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "a8b31799-df86-4151-98ea-66ef50fe767e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install openai-whisper"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "9f5b8e51-2833-44be-a4f4-63c4683f2b6e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import whisper\n",
|
||||
"\n",
|
||||
"def transcribe_audio(audio):\n",
|
||||
" if audio is None:\n",
|
||||
" return \"No audio received.\"\n",
|
||||
" \n",
|
||||
" model = whisper.load_model(\"base\") # You can use \"tiny\", \"small\", etc.\n",
|
||||
" result = model.transcribe(audio)\n",
|
||||
" \n",
|
||||
" return result[\"text\"]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "e55f8e43-2da1-4f2a-bcd4-3fffa830db48",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import base64\n",
|
||||
"from io import BytesIO\n",
|
||||
"from PIL import Image\n",
|
||||
"from IPython.display import Audio, display\n",
|
||||
"\n",
|
||||
"def talker(message):\n",
|
||||
" response = openai.audio.speech.create(\n",
|
||||
" model=\"tts-1\",\n",
|
||||
" voice=\"onyx\",\n",
|
||||
" input=message)\n",
|
||||
"\n",
|
||||
" audio_stream = BytesIO(response.content)\n",
|
||||
" output_filename = \"output_audio.mp3\"\n",
|
||||
" with open(output_filename, \"wb\") as f:\n",
|
||||
" f.write(audio_stream.read())\n",
|
||||
"\n",
|
||||
" # Play the generated audio\n",
|
||||
" display(Audio(output_filename, autoplay=True))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "cb3107a7-bfdc-4255-825f-bfabcf458c0c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# More involved Gradio code as we're not using the preset Chat interface!\n",
|
||||
"# Passing in inbrowser=True in the last line will cause a Gradio window to pop up immediately.\n",
|
||||
"\n",
|
||||
"with gr.Blocks() as ui:\n",
|
||||
" with gr.Row():\n",
|
||||
" chatbot = gr.Chatbot(height=400,type=\"messages\")\n",
|
||||
" with gr.Row():\n",
|
||||
" entry = gr.Textbox(label=\"Chat with our StudyAI Assistant:\")\n",
|
||||
" # with gr.Row():\n",
|
||||
" # entry = gr.Textbox(label=\"Speak or Type:\", placeholder=\"Speak your question...\", interactive=True, microphone=True)\n",
|
||||
" with gr.Row():\n",
|
||||
" audio_input = gr.Audio(type=\"filepath\", label=\"Speak your question\")\n",
|
||||
" with gr.Row():\n",
|
||||
" clear = gr.Button(\"Clear\")\n",
|
||||
"\n",
|
||||
" def do_entry(message, history):\n",
|
||||
" history += [{\"role\":\"user\", \"content\":message}]\n",
|
||||
" return \"\", history\n",
|
||||
"\n",
|
||||
" def handle_audio(audio, history):\n",
|
||||
" text = transcribe_audio(audio)\n",
|
||||
" history += [{\"role\": \"user\", \"content\": text}]\n",
|
||||
" return \"\", history\n",
|
||||
"\n",
|
||||
" entry.submit(do_entry, inputs=[entry, chatbot], outputs=[entry, chatbot]).then(\n",
|
||||
" chat, inputs=[chatbot], outputs=[chatbot]\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" audio_input.change(handle_audio, inputs=[audio_input, chatbot], outputs=[entry, chatbot]).then(\n",
|
||||
" chat, inputs=[chatbot], outputs=[chatbot]\n",
|
||||
" )\n",
|
||||
" \n",
|
||||
" clear.click(lambda: [], inputs=None, outputs=chatbot, queue=False)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "73e0a776-d43e-4b04-a37f-a27d3714cf47",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"ui.launch(inbrowser=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "bcd45503-d314-4b28-a41c-4dbb87059188",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.11"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
Reference in New Issue
Block a user