Simplified based on latest Gradio, and added PC fix for audio playback

2024-10-29 20:44:22 -04:00
parent d752c86cfa
commit 86763f2fcb
3 changed files with 156 additions and 89 deletions
--- a/week2/day5.ipynb
+++ b/week2/day5.ipynb
@@ -60,16 +60,14 @@
   "metadata": {},
   "outputs": [],
   "source": [
+    "# This function looks rather simpler than the one from my video, because we're taking advantage of the latest Gradio updates\n",
+    "\n",
    "def chat(message, history):\n",
-    "    messages = [{\"role\": \"system\", \"content\": system_message}]\n",
-    "    for human, assistant in history:\n",
-    "        messages.append({\"role\": \"user\", \"content\": human})\n",
-    "        messages.append({\"role\": \"assistant\", \"content\": assistant})\n",
-    "    messages.append({\"role\": \"user\", \"content\": message})\n",
+    "    messages = [{\"role\": \"system\", \"content\": system_message}] + history + [{\"role\": \"user\", \"content\": message}]\n",
    "    response = openai.chat.completions.create(model=MODEL, messages=messages)\n",
    "    return response.choices[0].message.content\n",
    "\n",
-    "gr.ChatInterface(fn=chat).launch()"
+    "gr.ChatInterface(fn=chat, type=\"messages\").launch()"
   ]
  },
  {
@@ -175,11 +173,7 @@
   "outputs": [],
   "source": [
    "def chat(message, history):\n",
-    "    messages = [{\"role\": \"system\", \"content\": system_message}]\n",
-    "    for human, assistant in history:\n",
-    "        messages.append({\"role\": \"user\", \"content\": human})\n",
-    "        messages.append({\"role\": \"assistant\", \"content\": assistant})\n",
-    "    messages.append({\"role\": \"user\", \"content\": message})\n",
+    "    messages = [{\"role\": \"system\", \"content\": system_message}] + history + [{\"role\": \"user\", \"content\": message}]\n",
    "    response = openai.chat.completions.create(model=MODEL, messages=messages, tools=tools)\n",
    "\n",
    "    if response.choices[0].finish_reason==\"tool_calls\":\n",
@@ -221,7 +215,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "gr.ChatInterface(fn=chat).launch()"
+    "gr.ChatInterface(fn=chat, type=\"messages\").launch()"
   ]
  },
  {
@@ -333,6 +327,18 @@
    "Message me or email me at ed@edwarddonner.com with any problems!"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "id": "d91d3f8f-e505-4e3c-a87c-9e42ed823db6",
+   "metadata": {},
+   "source": [
+    "# For Mac users\n",
+    "\n",
+    "This version should work fine for you. It might work for Windows users too, but you might get a Permissions error writing to a temp file. If so, see the next section!\n",
+    "\n",
+    "As always, if you have problems, please contact me! (You could also comment out the audio talker() in the later code if you're less interested in audio generation)"
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": null,
@@ -365,6 +371,67 @@
    "talker(\"Well, hi there\")"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "id": "ad89a9bd-bb1e-4bbb-a49a-83af5f500c24",
+   "metadata": {},
+   "source": [
+    "# For Windows users\n",
+    "\n",
+    "## if you get a permissions error writing to a temp file, then this code should work instead.\n",
+    "\n",
+    "A collaboration between student Mark M. and Claude got this resolved!"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d59c8ebd-79c5-498a-bdf2-3a1c50d91aa0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import tempfile\n",
+    "import subprocess\n",
+    "\n",
+    "def play_audio(audio_segment):\n",
+    "    temp_dir = tempfile.gettempdir()\n",
+    "    temp_path = os.path.join(temp_dir, \"temp_audio.wav\")\n",
+    "    try:\n",
+    "        audio_segment.export(temp_path, format=\"wav\")\n",
+    "        subprocess.call([\n",
+    "            \"ffplay\",\n",
+    "            \"-nodisp\",\n",
+    "            \"-autoexit\",\n",
+    "            \"-hide_banner\",\n",
+    "            temp_path\n",
+    "        ], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)\n",
+    "    finally:\n",
+    "        try:\n",
+    "            os.remove(temp_path)\n",
+    "        except Exception:\n",
+    "            pass\n",
+    " \n",
+    "def talker(message):\n",
+    "    response = openai.audio.speech.create(\n",
+    "        model=\"tts-1\",\n",
+    "        voice=\"onyx\",  # Also, try replacing onyx with alloy\n",
+    "        input=message\n",
+    "    )\n",
+    "    audio_stream = BytesIO(response.content)\n",
+    "    audio = AudioSegment.from_file(audio_stream, format=\"mp3\")\n",
+    "    play_audio(audio)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0dfb8ee9-e7dd-4615-8d69-2deb3fd44473",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "talker(\"Well hi there\")"
+   ]
+  },
  {
   "cell_type": "markdown",
   "id": "1d48876d-c4fa-46a8-a04f-f9fadf61fb0d",
@@ -390,26 +457,23 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "def chat(message, history):\n",
+    "def chat(history):\n",
+    "    messages = [{\"role\": \"system\", \"content\": system_message}] + history\n",
+    "    response = openai.chat.completions.create(model=MODEL, messages=messages, tools=tools)\n",
    "    image = None\n",
-    "    conversation = [{\"role\": \"system\", \"content\": system_message}]\n",
-    "    for human, assistant in history:\n",
-    "        conversation.append({\"role\": \"user\", \"content\": human})\n",
-    "        conversation.append({\"role\": \"assistant\", \"content\": assistant})\n",
-    "    conversation.append({\"role\": \"user\", \"content\": message})\n",
-    "    response = openai.chat.completions.create(model=MODEL, messages=conversation, tools=tools)\n",
-    "\n",
+    "    \n",
    "    if response.choices[0].finish_reason==\"tool_calls\":\n",
-    "        message = tool_call = response.choices[0].message\n",
+    "        message = response.choices[0].message\n",
    "        response, city = handle_tool_call(message)\n",
-    "        conversation.append(message)\n",
-    "        conversation.append(response)\n",
+    "        messages.append(message)\n",
+    "        messages.append(response)\n",
    "        image = artist(city)\n",
-    "        response = openai.chat.completions.create(model=MODEL, messages=conversation)\n",
-    "\n",
+    "        response = openai.chat.completions.create(model=MODEL, messages=messages)\n",
+    "        \n",
    "    reply = response.choices[0].message.content\n",
+    "    history += [{\"role\":\"assistant\", \"content\":reply}]\n",
    "    talker(reply)\n",
-    "    return reply, image"
+    "    return history, image"
   ]
  },
  {
@@ -419,41 +483,43 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "# More involved Gradio code as we're not using the preset Chat interface\n",
+    "# More involved Gradio code as we're not using the preset Chat interface!\n",
+    "# Passing in inbrowser=True in the last line will cause a Gradio window to pop up immediately.\n",
    "\n",
    "with gr.Blocks() as ui:\n",
    "    with gr.Row():\n",
-    "        chatbot = gr.Chatbot(height=500)\n",
+    "        chatbot = gr.Chatbot(height=500, type=\"messages\")\n",
    "        image_output = gr.Image(height=500)\n",
    "    with gr.Row():\n",
-    "        msg = gr.Textbox(label=\"Chat with our AI Assistant:\")\n",
+    "        entry = gr.Textbox(label=\"Chat with our AI Assistant:\")\n",
    "    with gr.Row():\n",
    "        clear = gr.Button(\"Clear\")\n",
    "\n",
-    "    def user(user_message, history):\n",
-    "        return \"\", history + [[user_message, None]]\n",
+    "    def do_entry(message, history):\n",
+    "        history += [{\"role\":\"user\", \"content\":message}]\n",
+    "        return \"\", history\n",
    "\n",
-    "    def bot(history):\n",
-    "        user_message = history[-1][0]\n",
-    "        bot_message, image = chat(user_message, history[:-1])\n",
-    "        history[-1][1] = bot_message\n",
-    "        return history, image\n",
-    "\n",
-    "    msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(\n",
-    "        bot, chatbot, [chatbot, image_output]\n",
+    "    entry.submit(do_entry, inputs=[entry, chatbot], outputs=[entry, chatbot]).then(\n",
+    "        chat, inputs=chatbot, outputs=[chatbot, image_output]\n",
    "    )\n",
-    "    clear.click(lambda: None, None, chatbot, queue=False)\n",
+    "    clear.click(lambda: None, inputs=None, outputs=chatbot, queue=False)\n",
    "\n",
-    "ui.launch()"
+    "ui.launch(inbrowser=True)"
   ]
  },
  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "b0b12548-951d-4e7c-8e77-803a92271855",
+   "cell_type": "markdown",
+   "id": "226643d2-73e4-4252-935d-86b8019e278a",
   "metadata": {},
-   "outputs": [],
-   "source": []
+   "source": [
+    "# Business Applications\n",
+    "\n",
+    "Add in more tools - perhaps to simulate actually booking a flight. A student has done this and provided their example in the community contributions folder.\n",
+    "\n",
+    "Next: take this and apply it to your business. Make a multi-modal AI assistant with tools that could carry out an activity for your work. A customer support assistant? New employee onboarding assistant? So many possibilities!\n",
+    "\n",
+    "If you feel bold, see if you can add audio input to our assistant so you can talk to it. "
+   ]
  }
 ],
 "metadata": {