From ed2fa6409414bc6f8aebae77ed4753195865c57f Mon Sep 17 00:00:00 2001 From: Lythmass Date: Tue, 9 Sep 2025 16:11:41 +0400 Subject: [PATCH] assignment: week2 exercise final Combining streaming and tools, I built a coding tutor AI that can take inputs from voice and generate image if asked about calculations. --- .../week2 EXERCISE Lythmass.ipynb | 324 ++++++++++++++++++ 1 file changed, 324 insertions(+) create mode 100644 week2/community-contributions/week2 EXERCISE Lythmass.ipynb diff --git a/week2/community-contributions/week2 EXERCISE Lythmass.ipynb b/week2/community-contributions/week2 EXERCISE Lythmass.ipynb new file mode 100644 index 0000000..5aa7b2d --- /dev/null +++ b/week2/community-contributions/week2 EXERCISE Lythmass.ipynb @@ -0,0 +1,324 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "d006b2ea-9dfe-49c7-88a9-a5a0775185fd", + "metadata": {}, + "source": [ + "# Additional End of week Exercise - week 2\n", + "\n", + "Now use everything you've learned from Week 2 to build a full prototype for the technical question/answerer you built in Week 1 Exercise.\n", + "\n", + "This should include a Gradio UI, streaming, use of the system prompt to add expertise, and the ability to switch between models. Bonus points if you can demonstrate use of a tool!\n", + "\n", + "If you feel bold, see if you can add audio input so you can talk to it, and have it respond with audio. ChatGPT or Claude can help you, or email me if you have questions.\n", + "\n", + "I will publish a full solution here soon - unless someone beats me to it...\n", + "\n", + "There are so many commercial applications for this, from a language tutor, to a company onboarding solution, to a companion AI to a course (like this one!) I can't wait to see your results." + ] + }, + { + "cell_type": "markdown", + "id": "87f483d5-dc85-41d1-bb34-5b49c6eeb30c", + "metadata": {}, + "source": [ + "**I built a coding expert tutor with 2 models: Gemini and GPT.\n", + "It works with streamining and tools simultaneously.\n", + "If a user asks a mathematical question, the Dalle 3 will generate an image of that equation.**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a07e7793-b8f5-44f4-aded-5562f633271a", + "metadata": {}, + "outputs": [], + "source": [ + "import gradio\n", + "from openai import OpenAI\n", + "import os\n", + "from dotenv import load_dotenv\n", + "import math\n", + "import json\n", + "import base64\n", + "from io import BytesIO\n", + "from PIL import Image" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "436819d1-8a09-43e2-9429-35189cc92317", + "metadata": {}, + "outputs": [], + "source": [ + "load_dotenv(override=True)\n", + "\n", + "openai_api_key = os.getenv('OPENAI_API_KEY')\n", + "google_api_key = os.getenv('GOOGLE_API_KEY')\n", + "\n", + "if openai_api_key:\n", + " print(f\"OpenAI API Key exists and begins {openai_api_key[:8]}\")\n", + "else:\n", + " print(\"OpenAI API Key not set\")\n", + " \n", + "if google_api_key:\n", + " print(f\"Google API Key exists and begins {google_api_key[:8]}\")\n", + "else:\n", + " print(\"Google API Key not set\")\n", + " \n", + " \n", + "GPT_MODEL = \"gpt-5-nano\"\n", + "GEMINI_MODEL = \"gemini-2.5-flash\"\n", + "openai = OpenAI()\n", + "gemini = OpenAI(\n", + " api_key=google_api_key, \n", + " base_url=\"https://generativelanguage.googleapis.com/v1beta/openai/\"\n", + ")\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e154015c-0c16-41a5-9518-163a9ae3ea0c", + "metadata": {}, + "outputs": [], + "source": [ + "system_message = \"You are an expert coding tutor. \\n\" \\\n", + "\"You explain the answers in a friendly and easy to understand way.\\n\" \\\n", + "\"However, if the input from the user feels too vague, ask them to provide more details before answering.\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "937dc916-fc0b-47a4-b963-4d689cec4f60", + "metadata": {}, + "outputs": [], + "source": [ + "def calculate_math(math_equation):\n", + " print(\"Math calculator tool has been run...\")\n", + " \n", + " allowed = {\"__builtins__\": None}\n", + " allowed.update({k: getattr(math, k) for k in dir(math) if not k.startswith(\"_\")})\n", + " \n", + " result = eval(math_equation, allowed, {})\n", + " return result\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "37a74256-fbf6-4539-8481-87bf73abefd4", + "metadata": {}, + "outputs": [], + "source": [ + "calculate_math(\"sqrt(25)\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c858d63d-c90f-4ab9-bf03-2047622ed151", + "metadata": {}, + "outputs": [], + "source": [ + "calculate_math_function = {\n", + " \"name\": \"calculate_math\",\n", + " \"description\": \"Calculate math requested by the user. You should run this tool when a user asks to know the result of ANY equation. For example: 'What is ther result of this: sqrt(25)'\",\n", + " \"parameters\": {\n", + " \"type\": \"object\",\n", + " \"properties\": {\n", + " \"math_equation\": {\n", + " \"type\": \"string\",\n", + " \"description\": \"The math question the user wants to calculate. You should pass only the math equation, not text. For example: sqrt(25)\",\n", + " },\n", + " },\n", + " \"required\": [\"math_equation\"],\n", + " \"additionalProperties\": False\n", + " }\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1c32ef1f-909c-4646-b39f-006d26a44d10", + "metadata": {}, + "outputs": [], + "source": [ + "tools = [{\"type\": \"function\", \"function\": calculate_math_function}]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "edcea23f-769c-4d40-b07c-ac2fc89d2af9", + "metadata": {}, + "outputs": [], + "source": [ + "def generate_math_result_image(equation, result):\n", + " image_response = openai.images.generate(\n", + " model=\"dall-e-3\",\n", + " prompt=f\"Generate a realistic image of a math equation: '{equation}={result}' on a school chalk board with.\",\n", + " size=\"1024x1024\",\n", + " n=1,\n", + " response_format=\"b64_json\",\n", + " )\n", + " image_base64 = image_response.data[0].b64_json\n", + " image_data = base64.b64decode(image_base64)\n", + " return Image.open(BytesIO(image_data))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ea0fa17b-069e-4080-9cfc-a0674a2bcca6", + "metadata": {}, + "outputs": [], + "source": [ + "def chat(history, model=\"GPT\"):\n", + " messages = [{\"role\": \"system\", \"content\": system_message}] + history\n", + " if model == \"GPT\": \n", + " response = openai.chat.completions.create(model=GPT_MODEL, messages=messages, stream=True, tools=tools)\n", + " else:\n", + " response = gemini.chat.completions.create(model=GEMINI_MODEL, messages=messages, stream=True, tools=tools)\n", + " \n", + " buffer = {\"role\": \"assistant\", \"content\": \"\", \"tool_calls\": []}\n", + " tool_answer = \"\"\n", + " image = None\n", + " \n", + " for chunk in response:\n", + " delta = chunk.choices[0].delta\n", + " if delta.content:\n", + " buffer[\"content\"] += delta.content or \"\"\n", + " yield history + [buffer], image\n", + "\n", + " if delta.tool_calls:\n", + " if delta.tool_calls[0].function.name:\n", + " buffer[\"tool_calls\"].append(delta.tool_calls[0])\n", + " for call in delta.tool_calls:\n", + " if call.function and model == \"GPT\":\n", + " buffer[\"tool_calls\"][0].function.arguments += call.function.arguments\n", + " \n", + " if chunk.choices[0].finish_reason == \"tool_calls\":\n", + " tool_call = buffer[\"tool_calls\"][0]\n", + " response, result, math_equation = handle_calculate_tool_call(tool_call)\n", + " messages.append(buffer)\n", + " messages.append(response)\n", + " image = generate_math_result_image(math_equation, result)\n", + " if model == \"GPT\": \n", + " next_response = openai.chat.completions.create(model=GPT_MODEL, messages=messages, stream=True)\n", + " else:\n", + " next_response = gemini.chat.completions.create(model=GEMINI_MODEL, messages=messages, stream=True)\n", + " for next_chunk in next_response:\n", + " tool_answer += next_chunk.choices[0].delta.content or \"\"\n", + " yield history + [{\"role\": \"assistant\", \"content\": tool_answer}], image" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5605e90c-1ccb-4222-b15e-9be35fd58168", + "metadata": {}, + "outputs": [], + "source": [ + "def handle_calculate_tool_call(tool_call):\n", + " arguments = json.loads(tool_call.function.arguments)\n", + " math_equation = arguments.get('math_equation')\n", + " result = calculate_math(math_equation)\n", + " response = {\n", + " \"role\": \"tool\",\n", + " \"content\": json.dumps({\"math_equation\": math_equation, \"result\": result}),\n", + " \"tool_call_id\": tool_call.id\n", + " }\n", + " return response, result, math_equation" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "89da6939-f38f-4584-9413-85ff843d9b32", + "metadata": {}, + "outputs": [], + "source": [ + "def transcribe(audio_file):\n", + " if audio_file is None:\n", + " return \"\"\n", + " with open(audio_file, \"rb\") as f:\n", + " transcription = openai.audio.transcriptions.create(\n", + " model=\"gpt-4o-mini-transcribe\", \n", + " file=f\n", + " )\n", + " return transcription.text" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6b9ba370-6014-4f66-8f57-824465b7fe41", + "metadata": {}, + "outputs": [], + "source": [ + "with gradio.Blocks() as ui:\n", + " with gradio.Row():\n", + " chatbot = gradio.Chatbot(height=500, type=\"messages\")\n", + " image_output = gradio.Image(height=500)\n", + " with gradio.Row():\n", + " entry = gradio.Textbox(label=\"Chat with our code expert:\")\n", + " microphone = gradio.Audio(sources=\"microphone\", type=\"filepath\")\n", + " with gradio.Row():\n", + " ai_model = gradio.Dropdown([\"GPT\", \"Gemini\"], label=\"Select Model\")\n", + " clear = gradio.Button(\"Clear\")\n", + "\n", + " def do_entry(message, history):\n", + " history += [{\"role\":\"user\", \"content\":message}]\n", + " return \"\", history, None\n", + "\n", + " entry.submit(do_entry, inputs=[entry, chatbot], outputs=[entry, chatbot, microphone]).then(\n", + " chat, inputs=[chatbot, ai_model], outputs=[chatbot, image_output]\n", + " )\n", + " microphone.change(\n", + " transcribe,\n", + " inputs=[microphone],\n", + " outputs=[entry] \n", + " )\n", + " clear.click(lambda: None, inputs=None, outputs=chatbot, queue=False)\n", + "\n", + "ui.launch()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "53abd8ac-a7de-42d1-91bf-741a93e2347b", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.13" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}