assignment: week2 exercise final

Combining streaming and tools, I built a coding tutor AI that can take inputs from voice and generate image if asked about calculations.
2025-09-09 16:11:41 +04:00
parent 804ac62e78
commit ed2fa64094
1 changed files with 324 additions and 0 deletions
--- a/week2/community-contributions/week2
+++ b/week2/community-contributions/week2
@@ -0,0 +1,324 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "d006b2ea-9dfe-49c7-88a9-a5a0775185fd",
+   "metadata": {},
+   "source": [
+    "# Additional End of week Exercise - week 2\n",
+    "\n",
+    "Now use everything you've learned from Week 2 to build a full prototype for the technical question/answerer you built in Week 1 Exercise.\n",
+    "\n",
+    "This should include a Gradio UI, streaming, use of the system prompt to add expertise, and the ability to switch between models. Bonus points if you can demonstrate use of a tool!\n",
+    "\n",
+    "If you feel bold, see if you can add audio input so you can talk to it, and have it respond with audio. ChatGPT or Claude can help you, or email me if you have questions.\n",
+    "\n",
+    "I will publish a full solution here soon - unless someone beats me to it...\n",
+    "\n",
+    "There are so many commercial applications for this, from a language tutor, to a company onboarding solution, to a companion AI to a course (like this one!) I can't wait to see your results."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "87f483d5-dc85-41d1-bb34-5b49c6eeb30c",
+   "metadata": {},
+   "source": [
+    "**I built a coding expert tutor with 2 models: Gemini and GPT.\n",
+    "It works with streamining and tools simultaneously.\n",
+    "If a user asks a mathematical question, the Dalle 3 will generate an image of that equation.**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a07e7793-b8f5-44f4-aded-5562f633271a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import gradio\n",
+    "from openai import OpenAI\n",
+    "import os\n",
+    "from dotenv import load_dotenv\n",
+    "import math\n",
+    "import json\n",
+    "import base64\n",
+    "from io import BytesIO\n",
+    "from PIL import Image"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "436819d1-8a09-43e2-9429-35189cc92317",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "load_dotenv(override=True)\n",
+    "\n",
+    "openai_api_key = os.getenv('OPENAI_API_KEY')\n",
+    "google_api_key = os.getenv('GOOGLE_API_KEY')\n",
+    "\n",
+    "if openai_api_key:\n",
+    "    print(f\"OpenAI API Key exists and begins {openai_api_key[:8]}\")\n",
+    "else:\n",
+    "    print(\"OpenAI API Key not set\")\n",
+    "    \n",
+    "if google_api_key:\n",
+    "    print(f\"Google API Key exists and begins {google_api_key[:8]}\")\n",
+    "else:\n",
+    "    print(\"Google API Key not set\")\n",
+    "    \n",
+    " \n",
+    "GPT_MODEL = \"gpt-5-nano\"\n",
+    "GEMINI_MODEL = \"gemini-2.5-flash\"\n",
+    "openai = OpenAI()\n",
+    "gemini = OpenAI(\n",
+    "    api_key=google_api_key, \n",
+    "    base_url=\"https://generativelanguage.googleapis.com/v1beta/openai/\"\n",
+    ")\n",
+    "   "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e154015c-0c16-41a5-9518-163a9ae3ea0c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "system_message = \"You are an expert coding tutor. \\n\" \\\n",
+    "\"You explain the answers in a friendly and easy to understand way.\\n\" \\\n",
+    "\"However, if the input from the user feels too vague, ask them to provide more details before answering.\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "937dc916-fc0b-47a4-b963-4d689cec4f60",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def calculate_math(math_equation):\n",
+    "    print(\"Math calculator tool has been run...\")\n",
+    "    \n",
+    "    allowed = {\"__builtins__\": None}\n",
+    "    allowed.update({k: getattr(math, k) for k in dir(math) if not k.startswith(\"_\")})\n",
+    "    \n",
+    "    result = eval(math_equation, allowed, {})\n",
+    "    return result\n",
+    "            "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "37a74256-fbf6-4539-8481-87bf73abefd4",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "calculate_math(\"sqrt(25)\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c858d63d-c90f-4ab9-bf03-2047622ed151",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "calculate_math_function = {\n",
+    "    \"name\": \"calculate_math\",\n",
+    "    \"description\": \"Calculate math requested by the user. You should run this tool when a user asks to know the result of ANY equation. For example: 'What is ther result of this: sqrt(25)'\",\n",
+    "    \"parameters\": {\n",
+    "        \"type\": \"object\",\n",
+    "        \"properties\": {\n",
+    "            \"math_equation\": {\n",
+    "                \"type\": \"string\",\n",
+    "                \"description\": \"The math question the user wants to calculate. You should pass only the math equation, not text. For example: sqrt(25)\",\n",
+    "            },\n",
+    "        },\n",
+    "        \"required\": [\"math_equation\"],\n",
+    "        \"additionalProperties\": False\n",
+    "    }\n",
+    "}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1c32ef1f-909c-4646-b39f-006d26a44d10",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "tools = [{\"type\": \"function\", \"function\": calculate_math_function}]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "edcea23f-769c-4d40-b07c-ac2fc89d2af9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def generate_math_result_image(equation, result):\n",
+    "    image_response = openai.images.generate(\n",
+    "        model=\"dall-e-3\",\n",
+    "        prompt=f\"Generate a realistic image of a math equation: '{equation}={result}' on a school chalk board with.\",\n",
+    "        size=\"1024x1024\",\n",
+    "        n=1,\n",
+    "        response_format=\"b64_json\",\n",
+    "    )\n",
+    "    image_base64 = image_response.data[0].b64_json\n",
+    "    image_data = base64.b64decode(image_base64)\n",
+    "    return Image.open(BytesIO(image_data))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ea0fa17b-069e-4080-9cfc-a0674a2bcca6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def chat(history, model=\"GPT\"):\n",
+    "    messages = [{\"role\": \"system\", \"content\": system_message}] + history\n",
+    "    if model == \"GPT\":   \n",
+    "        response = openai.chat.completions.create(model=GPT_MODEL, messages=messages, stream=True, tools=tools)\n",
+    "    else:\n",
+    "        response = gemini.chat.completions.create(model=GEMINI_MODEL, messages=messages, stream=True, tools=tools)\n",
+    "    \n",
+    "    buffer = {\"role\": \"assistant\", \"content\": \"\", \"tool_calls\": []}\n",
+    "    tool_answer = \"\"\n",
+    "    image = None\n",
+    "    \n",
+    "    for chunk in response:\n",
+    "        delta = chunk.choices[0].delta\n",
+    "        if delta.content:\n",
+    "            buffer[\"content\"] += delta.content or \"\"\n",
+    "            yield history + [buffer], image\n",
+    "\n",
+    "        if delta.tool_calls:\n",
+    "            if delta.tool_calls[0].function.name:\n",
+    "                buffer[\"tool_calls\"].append(delta.tool_calls[0])\n",
+    "            for call in delta.tool_calls:\n",
+    "                if call.function and model == \"GPT\":\n",
+    "                    buffer[\"tool_calls\"][0].function.arguments += call.function.arguments\n",
+    "        \n",
+    "        if chunk.choices[0].finish_reason == \"tool_calls\":\n",
+    "            tool_call = buffer[\"tool_calls\"][0]\n",
+    "            response, result, math_equation = handle_calculate_tool_call(tool_call)\n",
+    "            messages.append(buffer)\n",
+    "            messages.append(response)\n",
+    "            image = generate_math_result_image(math_equation, result)\n",
+    "            if model == \"GPT\":   \n",
+    "                next_response = openai.chat.completions.create(model=GPT_MODEL, messages=messages, stream=True)\n",
+    "            else:\n",
+    "                next_response = gemini.chat.completions.create(model=GEMINI_MODEL, messages=messages, stream=True)\n",
+    "            for next_chunk in next_response:\n",
+    "                tool_answer += next_chunk.choices[0].delta.content or \"\"\n",
+    "                yield history + [{\"role\": \"assistant\", \"content\": tool_answer}], image"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5605e90c-1ccb-4222-b15e-9be35fd58168",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def handle_calculate_tool_call(tool_call):\n",
+    "    arguments = json.loads(tool_call.function.arguments)\n",
+    "    math_equation = arguments.get('math_equation')\n",
+    "    result = calculate_math(math_equation)\n",
+    "    response = {\n",
+    "        \"role\": \"tool\",\n",
+    "        \"content\": json.dumps({\"math_equation\": math_equation, \"result\": result}),\n",
+    "        \"tool_call_id\": tool_call.id\n",
+    "    }\n",
+    "    return response, result, math_equation"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "89da6939-f38f-4584-9413-85ff843d9b32",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def transcribe(audio_file):\n",
+    "    if audio_file is None:\n",
+    "        return \"\"\n",
+    "    with open(audio_file, \"rb\") as f:\n",
+    "        transcription = openai.audio.transcriptions.create(\n",
+    "            model=\"gpt-4o-mini-transcribe\", \n",
+    "            file=f\n",
+    "        )\n",
+    "    return transcription.text"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6b9ba370-6014-4f66-8f57-824465b7fe41",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "with gradio.Blocks() as ui:\n",
+    "    with gradio.Row():\n",
+    "        chatbot = gradio.Chatbot(height=500, type=\"messages\")\n",
+    "        image_output = gradio.Image(height=500)\n",
+    "    with gradio.Row():\n",
+    "        entry = gradio.Textbox(label=\"Chat with our code expert:\")\n",
+    "        microphone = gradio.Audio(sources=\"microphone\", type=\"filepath\")\n",
+    "    with gradio.Row():\n",
+    "        ai_model = gradio.Dropdown([\"GPT\", \"Gemini\"], label=\"Select Model\")\n",
+    "        clear = gradio.Button(\"Clear\")\n",
+    "\n",
+    "    def do_entry(message, history):\n",
+    "        history += [{\"role\":\"user\", \"content\":message}]\n",
+    "        return \"\", history, None\n",
+    "\n",
+    "    entry.submit(do_entry, inputs=[entry, chatbot], outputs=[entry, chatbot, microphone]).then(\n",
+    "        chat, inputs=[chatbot, ai_model], outputs=[chatbot, image_output]\n",
+    "    )\n",
+    "    microphone.change(\n",
+    "        transcribe,\n",
+    "        inputs=[microphone],\n",
+    "        outputs=[entry]  \n",
+    "    )\n",
+    "    clear.click(lambda: None, inputs=None, outputs=chatbot, queue=False)\n",
+    "\n",
+    "ui.launch()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "53abd8ac-a7de-42d1-91bf-741a93e2347b",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.13"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}