Files
LLM_Engineering_OLD/week2/community-contributions/rwothoromo/week2 EXERCISE.ipynb
Elijah Rwothoromo a605573de8 Get Gemini to work
2025-08-11 13:48:02 +03:00

623 lines
24 KiB
Plaintext

{
"cells": [
{
"cell_type": "markdown",
"id": "d006b2ea-9dfe-49c7-88a9-a5a0775185fd",
"metadata": {},
"source": [
"# Additional End of week Exercise - week 2\n",
"\n",
"Now use everything you've learned from Week 2 to build a full prototype for the technical question/answerer you built in Week 1 Exercise.\n",
"\n",
"This should include a Gradio UI, streaming, use of the system prompt to add expertise, and the ability to switch between models. Bonus points if you can demonstrate use of a tool!\n",
"\n",
"If you feel bold, see if you can add audio input so you can talk to it, and have it respond with audio. ChatGPT or Claude can help you, or email me if you have questions.\n",
"\n",
"I will publish a full solution here soon - unless someone beats me to it...\n",
"\n",
"There are so many commercial applications for this, from a language tutor, to a company onboarding solution, to a companion AI to a course (like this one!) I can't wait to see your results."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7b624d5b-69a2-441f-9147-fde105d3d551",
"metadata": {},
"outputs": [],
"source": [
"# sample question to use in the Gradle UI that pops up\n",
"\n",
"question = \"\"\"\n",
"How good at Software Development is Elijah Rwothoromo? \\\n",
"He has a Wordpress site https://rwothoromo.wordpress.com/. \\\n",
"He also has a LinkedIn profile https://www.linkedin.com/in/rwothoromoelaijah/. \\\n",
"As well as a GitHub Profile https://www.github.com/rwothoromo/.\\\n",
"What can we learn from him?\n",
"\"\"\"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a07e7793-b8f5-44f4-aded-5562f633271a",
"metadata": {},
"outputs": [],
"source": [
"# imports\n",
"\n",
"import re, requests, os, json, tempfile, gradio as gr, anthropic, google.generativeai, ollama\n",
"from bs4 import BeautifulSoup\n",
"from IPython.display import Markdown, display, update_display\n",
"from dotenv import load_dotenv\n",
"from openai import OpenAI\n",
"from pydub import AudioSegment\n",
"from pydub.playback import play\n",
"from io import BytesIO\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "efb88276-6d74-4d94-95a2-b8ca82a4716c",
"metadata": {},
"outputs": [],
"source": [
"# Load environment variables\n",
"load_dotenv()\n",
"\n",
"openai_api_key = os.getenv('OPENAI_API_KEY')\n",
"if openai_api_key:\n",
" print(f\"OpenAI API Key exists and begins {openai_api_key[:8]}\")\n",
"else:\n",
" print(\"OpenAI API Key not set\")\n",
"\n",
"\n",
"anthropic_api_key = os.getenv('ANTHROPIC_API_KEY')\n",
"if anthropic_api_key:\n",
" print(f\"Anthropic API Key exists and begins {anthropic_api_key[:8]}\")\n",
"else:\n",
" print(\"Anthropic API Key not set\")\n",
"\n",
"\n",
"google_api_key = os.getenv('GOOGLE_API_KEY')\n",
"if google_api_key:\n",
" print(f\"Google API Key exists and begins {google_api_key[:8]}\")\n",
"else:\n",
" print(\"Google API Key not set\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "484f0c3e-638d-4af7-bb9b-36faf6048f3c",
"metadata": {},
"outputs": [],
"source": [
"# constants\n",
"\n",
"MODEL_CLAUDE = \"claude-sonnet-4-20250514\"\n",
"MODEL_GEMINI = \"gemini-2.5-flash\"\n",
"MODEL_GPT = 'gpt-4o-mini'\n",
"MODEL_LLAMA = 'llama3.2'\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "2e292401-e62f-4bfc-b060-07462ad20d3d",
"metadata": {},
"outputs": [],
"source": [
"# system messages\n",
"\n",
"system_message = \"You are an expert assistant. Synthesize a comprehensive answer in markdown format.\"\n",
"system_prompt_with_url_data = \"You are an expert assistant. \\\n",
" Analyze the user's question and the provided text from relevant websites to synthesize a comprehensive answer in markdown format.\\\n",
" Provide a short summary, ignoring text that might be navigation-related.\"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "84252e03-ccde-4ecf-975b-78227291ca5c",
"metadata": {},
"outputs": [],
"source": [
"# set up environment\n",
"\n",
"headers = {\n",
" \"User-Agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36\"\n",
"}\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "49396924-47c2-4f7d-baa2-9b0fece9da4a",
"metadata": {},
"outputs": [],
"source": [
"# Website class for URLs to be scraped\n",
"\n",
"class Website:\n",
" def __init__(self, url):\n",
" \"\"\"\n",
" Create this Website object from the given url using the BeautifulSoup library\n",
" \"\"\"\n",
" self.url = url\n",
" response = requests.get(url, headers=headers)\n",
" soup = BeautifulSoup(response.content, 'html.parser')\n",
" self.title = soup.title.string if soup.title else \"No title found\"\n",
" for irrelevant in soup.body([\"script\", \"style\", \"img\", \"input\"]):\n",
" irrelevant.decompose()\n",
" self.text = soup.body.get_text(separator=\"\\n\", strip=True)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c4d23747-d78a-4f36-9862-c00e1e8d9e44",
"metadata": {},
"outputs": [],
"source": [
"# Instantiate models with API keys from environment variables\n",
"\n",
"openai = OpenAI(api_key=os.getenv(\"OPENAI_API_KEY\"))\n",
"claude = anthropic.Anthropic(api_key=os.getenv(\"ANTHROPIC_API_KEY\"))\n",
"google.generativeai.configure(api_key=os.getenv(\"GOOGLE_API_KEY\"))\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "67e150be-502e-4ba4-9586-3a2f3fae3830",
"metadata": {},
"outputs": [],
"source": [
"# To scrape data based on URLs in the user prompt\n",
"\n",
"def scrape_urls(text):\n",
" try:\n",
" # Extract all URLs from the text string using regular expressions\n",
" urls = re.findall(r'https?://[^\\s)]+', text)\n",
" \n",
" if len(urls) > 0:\n",
" scraped_content = []\n",
" for url in urls:\n",
" print(f\"Scraping: {url}\")\n",
" try:\n",
" site = Website(url)\n",
" content = f\"Content from {url}:\\n---\\n{site.text}\\n---\\n\"\n",
" scraped_content.append(content)\n",
" print(f\"Scraping done!\")\n",
" except Exception as e:\n",
" print(f\"Could not scrape {url}: {e}\")\n",
" scraped_content.append(f\"Could not retrieve content from {url}.\\n\")\n",
" \n",
" return \"\\n\".join(scraped_content)\n",
" else:\n",
" return None\n",
" except Exception as e:\n",
" print(f\"Error during website scraping: {e}\")\n",
" return \"Sorry, I encountered an error and could not complete scraping the website(s).\"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "bd9d0511-2f78-4270-81f8-73708388dfad",
"metadata": {},
"outputs": [],
"source": [
"# Tool definition for scrape_urls\n",
"\n",
"scraping_function = {\n",
" \"name\": \"scrape_urls\",\n",
" \"description\": \"Scrapes available URLs for data to update the User prompt. Call this whenever a customer provides a URL.\",\n",
" \"parameters\": {\n",
" \"type\": \"object\",\n",
" \"properties\": {\n",
" \"text\": {\n",
" \"type\": \"string\",\n",
" \"description\": \"The website URL or user prompt containing URLs.\"\n",
" }\n",
" },\n",
" \"required\": [\"text\"]\n",
" }\n",
"}\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "47733d5b-bb0a-44dd-b56d-a54677c88f80",
"metadata": {},
"outputs": [],
"source": [
"# Instantiate the tools\n",
"\n",
"# tools = [{\"type\": \"function\", \"function\": scraping_function}]\n",
"\n",
"# Define Ollama tools\n",
"tools_gpt_ollama = [{\"type\": \"function\", \"function\": scraping_function}]\n",
"\n",
"# Define Claude tools\n",
"tools_claude = [{\n",
" \"name\": scraping_function[\"name\"],\n",
" \"description\": scraping_function[\"description\"],\n",
" \"input_schema\": scraping_function[\"parameters\"]\n",
"}]\n",
"\n",
"# Gemini tool definition must be a FunctionDeclaration object without the top-level `type` in parameters.\n",
"tools_gemini = [google.generativeai.protos.FunctionDeclaration(\n",
" name=portable_scraping_function_definition[\"name\"],\n",
" description=portable_scraping_function_definition[\"description\"],\n",
" parameters=google.generativeai.protos.Schema(\n",
" type=google.generativeai.protos.Type.OBJECT,\n",
" properties={\n",
" \"text\": google.generativeai.protos.Schema(\n",
" type=google.generativeai.protos.Type.STRING,\n",
" description=portable_scraping_function_definition[\"parameters\"][\"properties\"][\"text\"][\"description\"]\n",
" )\n",
" },\n",
" required=portable_scraping_function_definition[\"parameters\"][\"required\"]\n",
" )\n",
")]\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "aa3fa01b-97d0-443e-b0cc-55d277878cb7",
"metadata": {},
"outputs": [],
"source": [
"# Handle multiple tools\n",
"\n",
"def handle_tool_call(tool_call, user_message):\n",
" function_name = None\n",
" arguments = None\n",
" tool_call_id = None\n",
" \n",
" # Logic for different model tool call object formats\n",
" if isinstance(tool_call, dict) and 'function' in tool_call: # Ollama\n",
" function_name = tool_call['function']['name']\n",
" try:\n",
" arguments = json.loads(tool_call['function']['arguments'])\n",
" except (json.JSONDecodeError, TypeError):\n",
" arguments = {'text': tool_call['function'].get('arguments', user_message)}\n",
" elif hasattr(tool_call, 'function'): # GPT, Claude\n",
" function_name = tool_call.function.name\n",
" tool_call_id = getattr(tool_call, 'id', None)\n",
" if isinstance(tool_call.function.arguments, dict):\n",
" arguments = tool_call.function.arguments\n",
" else:\n",
" try:\n",
" arguments = json.loads(tool_call.function.arguments)\n",
" except (json.JSONDecodeError, TypeError):\n",
" arguments = {'text': tool_call.function.arguments}\n",
" elif hasattr(tool_call, 'name'): # Gemini\n",
" function_name = tool_call.name\n",
" arguments = tool_call.args\n",
"\n",
" # Fallback if arguments are not parsed correctly\n",
" if not arguments or 'text' not in arguments:\n",
" arguments = {'text': user_message}\n",
" \n",
" if function_name == \"scrape_urls\":\n",
" url_scraped_data = scrape_urls(arguments['text'])\n",
" response_content = json.dumps({\"url_scraped_data\": url_scraped_data})\n",
" else:\n",
" response_content = json.dumps({\"error\": f\"Unknown tool: {function_name}\"})\n",
"\n",
" response = {\n",
" \"role\": \"tool\",\n",
" \"content\": response_content,\n",
" \"tool_call_id\": tool_call_id\n",
" }\n",
" return response\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "14083620-1b16-4c8b-8365-c221b831e678",
"metadata": {},
"outputs": [],
"source": [
"# Audio output\n",
"\n",
"def talker(message):\n",
" response = openai.audio.speech.create(\n",
" model=\"tts-1\",\n",
" voice=\"onyx\",\n",
" input=message\n",
" )\n",
" \n",
" audio_stream = BytesIO(response.content)\n",
" audio = AudioSegment.from_file(audio_stream, format=\"mp3\")\n",
" play(audio)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f9601a49-a490-4454-bd47-591ad793dc30",
"metadata": {},
"outputs": [],
"source": [
"# To transcribe an audio prompt/input to text\n",
"\n",
"def transcribe_audio(audio_file):\n",
" if audio_file is None:\n",
" return \"\"\n",
" \n",
" with tempfile.NamedTemporaryFile(suffix=\".wav\", delete=True) as tmpfile:\n",
" audio = AudioSegment.from_file(audio_file, format=\"wav\")\n",
" audio.export(tmpfile.name, format=\"wav\")\n",
" \n",
" with open(tmpfile.name, \"rb\") as audio_file_obj:\n",
" transcript = openai.audio.transcriptions.create(\n",
" model=\"whisper-1\", \n",
" file=audio_file_obj\n",
" )\n",
" return transcript.text\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "70c79408-f5f4-424b-b96c-d07e6893af6a",
"metadata": {},
"outputs": [],
"source": [
"# More involved Gradio code as we're not using the preset Chat interface!\n",
"# Passing in inbrowser=True in the last line will cause a Gradio window to pop up immediately.\n",
"\n",
"with gr.Blocks() as ui:\n",
" with gr.Row():\n",
" chatbot = gr.Chatbot(height=500)\n",
" with gr.Row():\n",
" entry = gr.Textbox(label=\"Chat with our AI Assistant:\", scale=4)\n",
" submit_btn = gr.Button(\"Submit\", scale=1)\n",
" with gr.Row():\n",
" audio_input = gr.Audio(sources=[\"microphone\"], type=\"filepath\", label=\"Speak to our AI Assistant\", scale=4)\n",
" submit_audio_btn = gr.Button(\"Submit Audio\", scale=1)\n",
"\n",
" with gr.Row():\n",
" models = [\"Claude\", \"Gemini\", \"GPT\", \"Ollama\"]\n",
" model_dropdown = gr.Dropdown(\n",
" label=\"Select a model\",\n",
" choices=models,\n",
" value=models[2]\n",
" )\n",
"\n",
" audio_options = [\"Yes\", \"No\"]\n",
" audio_dropdown = gr.Dropdown(\n",
" label=\"Select whether to respond with audio\",\n",
" choices=audio_options,\n",
" value=audio_options[1]\n",
" )\n",
" \n",
" with gr.Row():\n",
" clear = gr.Button(\"Clear\")\n",
"\n",
" def user_message_updater(user_message, history):\n",
" return \"\", history + [[user_message, None]]\n",
"\n",
" def chat_with_assistant(history, target_model, use_audio_output):\n",
" messages = []\n",
" for msg_user, msg_assistant in history:\n",
" messages.append({\"role\": \"user\", \"content\": msg_user})\n",
" if msg_assistant:\n",
" messages.append({\"role\": \"assistant\", \"content\": msg_assistant})\n",
" \n",
" user_message = history[-1][0]\n",
" final_response_content = \"\"\n",
" \n",
" if target_model == \"Claude\":\n",
" response = claude.messages.create(\n",
" model=MODEL_CLAUDE,\n",
" max_tokens=200,\n",
" temperature=0.7,\n",
" system=system_prompt_with_url_data,\n",
" messages=messages,\n",
" tools=tools_claude,\n",
" )\n",
" \n",
" tool_calls = [content_block for content_block in response.content if content_block.type == \"tool_use\"]\n",
" if tool_calls:\n",
" tool_use = tool_calls[0]\n",
" tool_output_content = scrape_urls(tool_use.input[\"text\"])\n",
" \n",
" messages.append({\"role\": \"assistant\", \"content\": response.content})\n",
" messages.append({\n",
" \"role\": \"user\",\n",
" \"content\": [\n",
" {\n",
" \"type\": \"tool_result\",\n",
" \"tool_use_id\": tool_use.id,\n",
" \"content\": tool_output_content\n",
" }\n",
" ]\n",
" })\n",
"\n",
" response = claude.messages.create(\n",
" model=MODEL_CLAUDE,\n",
" max_tokens=200,\n",
" temperature=0.7,\n",
" system=system_prompt_with_url_data,\n",
" messages=messages,\n",
" )\n",
" final_response_content = response.content[0].text\n",
"\n",
" elif target_model == \"Gemini\":\n",
" messages_gemini = []\n",
" for m in history:\n",
" messages_gemini.append({\"role\": \"user\", \"parts\": [{\"text\": m[0]}]})\n",
" if m[1]:\n",
" messages_gemini.append({\"role\": \"model\", \"parts\": [{\"text\": m[1]}]})\n",
" \n",
" model = google.generativeai.GenerativeModel(\n",
" model_name=MODEL_GEMINI,\n",
" system_instruction=system_message,\n",
" tools=tools_gemini\n",
" )\n",
" \n",
" chat = model.start_chat(history=messages_gemini[:-1])\n",
" response = chat.send_message(messages_gemini[-1])\n",
"\n",
" # Check if the response is a tool call before trying to extract text\n",
" if response.candidates[0].content.parts[0].function_call:\n",
" tool_call = response.candidates[0].content.parts[0].function_call\n",
" response_tool = handle_tool_call(tool_call, user_message)\n",
"\n",
" tool_response_content = json.loads(response_tool[\"content\"])\n",
" tool_response_gemini = {\n",
" \"role\": \"tool\",\n",
" \"parts\": [{\n",
" \"function_response\": {\n",
" \"name\": tool_call.name,\n",
" \"response\": tool_response_content\n",
" }\n",
" }]\n",
" }\n",
" \n",
" # Send the tool output back and get a new response\n",
" response = chat.send_message(tool_response_gemini)\n",
" final_response_content = response.text\n",
" else:\n",
" # If the original response was not a tool call, get the text directly\n",
" final_response_content = response.text\n",
"\n",
" elif target_model == \"Ollama\":\n",
" messages_ollama = [{\"role\": \"system\", \"content\": system_message}] + messages\n",
" response = ollama.chat(\n",
" model=MODEL_LLAMA,\n",
" messages=messages_ollama,\n",
" stream=False,\n",
" tools=tools_gpt_ollama,\n",
" )\n",
"\n",
" if 'tool_calls' in response['message'] and response['message']['tool_calls']:\n",
" response_tool = handle_tool_call(response['message']['tool_calls'][0], user_message)\n",
" messages_ollama.append({\"role\": \"assistant\", \"content\": response['message']['content'], \"tool_calls\": response['message']['tool_calls']})\n",
" messages_ollama.append(response_tool)\n",
" \n",
" response = ollama.chat(\n",
" model=MODEL_LLAMA,\n",
" messages=messages_ollama,\n",
" stream=False,\n",
" )\n",
" final_response_content = response['message']['content']\n",
" \n",
" else: # Assuming GPT is default\n",
" messages_gpt = [{\"role\": \"system\", \"content\": system_message}] + messages\n",
" response_stream = openai.chat.completions.create(model=MODEL_GPT, messages=messages_gpt, stream=True, tools=tools_gpt_ollama)\n",
" final_response_content = \"\"\n",
" for chunk in response_stream:\n",
" content = chunk.choices[0].delta.content or \"\"\n",
" tool_calls_chunk = chunk.choices[0].delta.tool_calls\n",
" if content:\n",
" final_response_content += content\n",
" \n",
" if tool_calls_chunk:\n",
" tool_call = tool_calls_chunk[0]\n",
" response_tool = handle_tool_call(tool_call, user_message)\n",
" \n",
" messages_gpt.append({\"role\": \"assistant\", \"tool_calls\": [tool_call]})\n",
" messages_gpt.append(response_tool)\n",
" \n",
" response_stream_after_tool = openai.chat.completions.create(model=MODEL_GPT, messages=messages_gpt, stream=True)\n",
" for chunk_after_tool in response_stream_after_tool:\n",
" final_response_content += chunk_after_tool.choices[0].delta.content or \"\"\n",
" break\n",
"\n",
" history[-1][1] = final_response_content\n",
" \n",
" if use_audio_output != \"No\":\n",
" talker(final_response_content)\n",
"\n",
" return history\n",
"\n",
" def transcribe_and_chat(audio_file, history, target_model, use_audio_output):\n",
" if audio_file:\n",
" transcribed_text = transcribe_audio(audio_file)\n",
" new_history = history + [[transcribed_text, None]]\n",
" return chat_with_assistant(new_history, target_model, use_audio_output)\n",
" else:\n",
" return history\n",
"\n",
" entry.submit(\n",
" user_message_updater,\n",
" inputs=[entry, chatbot],\n",
" outputs=[entry, chatbot],\n",
" queue=False\n",
" ).then(\n",
" chat_with_assistant,\n",
" inputs=[chatbot, model_dropdown, audio_dropdown],\n",
" outputs=[chatbot]\n",
" )\n",
"\n",
" submit_btn.click(\n",
" user_message_updater,\n",
" inputs=[entry, chatbot],\n",
" outputs=[entry, chatbot],\n",
" queue=False\n",
" ).then(\n",
" chat_with_assistant,\n",
" inputs=[chatbot, model_dropdown, audio_dropdown],\n",
" outputs=[chatbot]\n",
" )\n",
"\n",
" audio_input.stop(\n",
" transcribe_and_chat,\n",
" inputs=[audio_input, chatbot, model_dropdown, audio_dropdown],\n",
" outputs=[chatbot],\n",
" queue=False\n",
" )\n",
"\n",
" submit_audio_btn.click(\n",
" transcribe_and_chat,\n",
" inputs=[audio_input, chatbot, model_dropdown, audio_dropdown],\n",
" outputs=[chatbot],\n",
" queue=False\n",
" )\n",
" \n",
" clear.click(lambda: None, inputs=None, outputs=[chatbot], queue=False)\n",
"\n",
"ui.launch(inbrowser=True)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "eb23b6cb-27af-43d6-8234-fe8295e7fe57",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.7"
}
},
"nbformat": 4,
"nbformat_minor": 5
}