Merge branch 'main' of https://github.com/ed-donner/llm_engineering into community-contributions-branch

This commit is contained in:
Jayapal Sahadevan
2025-05-28 23:59:39 +05:30
29 changed files with 6002 additions and 45 deletions

View File

@@ -0,0 +1,330 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "70a27b7c-3f3c-4d82-bdea-381939ce98bd",
"metadata": {
"editable": true,
"slideshow": {
"slide_type": ""
},
"tags": []
},
"source": [
"# My Adverserial Conversation\n",
"J. McInerney, 26 May 2025\n",
"I am taking some cells from the Week2, Day 1 notebook and modifying them so I can have an adverserial conversation between OpenAI and a local LLM (gemma3:12b). First I will just reimplement what Ed did in the Week2, Day 1 notebook. Then I will try a deeper conversation."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3ec14834-4cf2-4f1d-9128-4ddad7b91804",
"metadata": {
"editable": true,
"slideshow": {
"slide_type": ""
},
"tags": []
},
"outputs": [],
"source": [
"# imports\n",
"\n",
"import os\n",
"from dotenv import load_dotenv\n",
"from openai import OpenAI\n",
"#import anthropic\n",
"from IPython.display import Markdown, display, update_display"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "98618ab4-075f-438c-b85b-d146e5299a87",
"metadata": {},
"outputs": [],
"source": [
"# Load environment variables in a file called .env\n",
"# Print the key prefixes to help with any debugging\n",
"\n",
"load_dotenv(override=True)\n",
"openai_api_key = os.getenv('OPENAI_API_KEY')\n",
"\n",
"if openai_api_key:\n",
" print(f\"OpenAI API Key exists and begins {openai_api_key[:8]}\")\n",
"else:\n",
" print(\"OpenAI API Key not set\")\n",
" \n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "95e69172-4601-4eb0-a7af-19abebd4bf56",
"metadata": {
"editable": true,
"slideshow": {
"slide_type": ""
},
"tags": []
},
"outputs": [],
"source": [
"# Connect to OpenAI, Anthropic\n",
"openai = OpenAI()"
]
},
{
"cell_type": "markdown",
"id": "98f47886-71ae-4b41-875a-1b97a5eb0ddc",
"metadata": {},
"source": [
"## An adversarial conversation between Chatbots..\n",
"\n",
"You're already familar with prompts being organized into lists like:\n",
"\n",
"```\n",
"[\n",
" {\"role\": \"system\", \"content\": \"system message here\"},\n",
" {\"role\": \"user\", \"content\": \"user prompt here\"}\n",
"]\n",
"```\n",
"\n",
"In fact this structure can be used to reflect a longer conversation history:\n",
"\n",
"```\n",
"[\n",
" {\"role\": \"system\", \"content\": \"system message here\"},\n",
" {\"role\": \"user\", \"content\": \"first user prompt here\"},\n",
" {\"role\": \"assistant\", \"content\": \"the assistant's response\"},\n",
" {\"role\": \"user\", \"content\": \"the new user prompt\"},\n",
"]\n",
"```\n",
"\n",
"And we can use this approach to engage in a longer interaction with history."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "74125f8b-042e-4236-ad3d-6371ce5a1493",
"metadata": {
"editable": true,
"slideshow": {
"slide_type": ""
},
"tags": []
},
"outputs": [],
"source": [
"# Let's make a conversation between GPT-4o-mini and Gemma3:12b\n",
"# We're using cheap versions of models so the costs will be minimal\n",
"\n",
"gpt_model = \"gpt-4o-mini\"\n",
"local_model = 'gemma3:12b'\n",
"\n",
"gpt_system = \"You are a chatbot who is very argumentative; \\\n",
"you disagree with anything in the conversation and you challenge everything, in a snarky way.\"\n",
"\n",
"local_system = \"You are a very polite, courteous chatbot. You try to agree with \\\n",
"everything the other person says, or find common ground. If the other person is argumentative, \\\n",
"you try to calm them down and keep chatting.\"\n",
"\n",
"gpt_messages = [\"Hi there\"]\n",
"local_messages = [\"Hi\"]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f94d9232-f82a-4eab-9d89-bd9815f260f0",
"metadata": {},
"outputs": [],
"source": [
"def call_gpt():\n",
" messages = [{\"role\": \"system\", \"content\": gpt_system}]\n",
" for gpt, local in zip(gpt_messages, local_messages):\n",
" messages.append({\"role\": \"assistant\", \"content\": gpt})\n",
" messages.append({\"role\": \"user\", \"content\": local})\n",
" completion = openai.chat.completions.create(\n",
" model=gpt_model,\n",
" messages=messages\n",
" )\n",
" return completion.choices[0].message.content"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d6445453-31be-4c63-b350-957b7d99b6f4",
"metadata": {},
"outputs": [],
"source": [
"call_gpt()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "fc51f776-f6e2-41af-acb5-cbdf03fdf530",
"metadata": {},
"outputs": [],
"source": [
"basellm = OpenAI(base_url='http://localhost:11434/v1', api_key='ollama')\n",
"def call_local():\n",
" messages = []\n",
" for gpt, local_message in zip(gpt_messages, local_messages):\n",
" messages.append({\"role\": \"user\", \"content\": gpt})\n",
" messages.append({\"role\": \"assistant\", \"content\": local_message})\n",
" messages.append({\"role\": \"user\", \"content\": gpt_messages[-1]})\n",
" \n",
" completion = basellm.chat.completions.create(\n",
" model=local_model,\n",
" messages=messages\n",
" )\n",
" \n",
" return completion.choices[0].message.content"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "16fd90cb-ebfd-4a4f-ae49-70568ae8fbb1",
"metadata": {},
"outputs": [],
"source": [
"call_local()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "429eeefb-f080-4a57-8f2d-ff3d4237afab",
"metadata": {},
"outputs": [],
"source": [
"call_gpt()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "8ce847ed-521d-4be5-895b-44088de499e1",
"metadata": {},
"outputs": [],
"source": [
"gpt_messages = [\"Hi there\"]\n",
"local_messages = [\"Hi\"]\n",
"\n",
"print(f\"GPT:\\n{gpt_messages[0]}\\n\")\n",
"print(f\"local:\\n{local_messages[0]}\\n\")\n",
"\n",
"for i in range(5):\n",
" gpt_next = call_gpt()\n",
" print(f\"GPT:\\n{gpt_next}\\n\")\n",
" gpt_messages.append(gpt_next)\n",
" \n",
" local_next = call_local()\n",
" print(f\"local:\\n{local_next}\\n\")\n",
" local_messages.append(local_next)"
]
},
{
"cell_type": "markdown",
"id": "d3b1707a-2903-4529-b6eb-95a874a14e78",
"metadata": {},
"source": [
"## Let's try a more thoughful conversation\n",
"The two chatbots will engage in a friendly discussion on whether the US should have entered World War I in 1917. They are both open minded so they can learn from each other."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "abb733bf-a5d3-4718-8741-8e8abfd3a088",
"metadata": {},
"outputs": [],
"source": [
"# Let's make a conversation between GPT-4o-mini and Gemma3:12b\n",
"# We're using cheap versions of models so the costs will be minimal\n",
"\n",
"gpt_system = \"You are a chatbot who believes it was a mistake for the US to enter World War I; \\\n",
"you are open to other arguments, but you feel the evidence suggests the world would have been \\\n",
"better off if the US had stayed isolationalist. You consider counter arguments but also express \\\n",
"your own arguments.\"\n",
"\n",
"local_system = \"You are a chatbot who believes the US made the right decision entering World War I in \\\n",
"1917. Overall, the world is a better place for it. You are open minded but believe the evidence \\\n",
"supports this view. You consider counter arguments but also express your own arguments.\"\n",
"\n",
"gpt_messages = [\"It was such a mistake for the US to enter WWI\"]\n",
"local_messages = [\"Why do you say that?\"]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "569e18a3-25cd-46d5-8edb-713ff149d008",
"metadata": {
"editable": true,
"slideshow": {
"slide_type": ""
},
"tags": []
},
"outputs": [],
"source": [
"print(f\"GPT:\\n{gpt_messages[0]}\\n\")\n",
"print(f\"local:\\n{local_messages[0]}\\n\")\n",
"\n",
"for i in range(5):\n",
" gpt_next = call_gpt()\n",
" print(f\"GPT:\\n{gpt_next}\\n\")\n",
" gpt_messages.append(gpt_next)\n",
" \n",
" local_next = call_local()\n",
" print(f\"local:\\n{local_next}\\n\")\n",
" local_messages.append(local_next)"
]
},
{
"cell_type": "markdown",
"id": "d29df7da-eaa3-4c98-b913-05185b62cffe",
"metadata": {},
"source": [
"## Conclusion\n",
"I am amazed at how insightful this conversation was. Not only did they explore all the pros and cons, they began applying those lessons to current day foreign policy. This looks like a very good way to explore a topic. "
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b486b2d6-40da-4745-8cbf-1afd2be22caa",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.12"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@@ -0,0 +1,55 @@
import ollama
import requests
from bs4 import BeautifulSoup
from IPython.display import Markdown, display
MODEL = "llama3.2"
#headers and class for website to summarize
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
}
class Website:
def __init__(self, url):
self.url = url
response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.content, 'html.parser')
self.title = soup.title.string if soup.title else "No title found"
for irrelevant in soup.body(["script", "style", "img", "input"]):
irrelevant.decompose()
self.text = soup.body.get_text(separator="\n", strip=True)
#define prompts
system_prompt = "You are an assistant that analyzes the contents of a website \
and provides a short summary, ignoring text that might be navigation related. \
Respond in markdown."
def user_prompt_for(website):
user_prompt = f"You are looking at a website titled {website.title}"
user_prompt += "\nThe content of this website is as follows; \
please provide a short summary of this website in markdown. \
If it includes news or announcements, then summarize these too.\n\n"
user_prompt += website.text
return user_prompt
#prepare message for use in OpenAI call
def messages_for(website):
return [
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt_for(website)}
]
#define function to summarize a given website
def summarize(url):
website = Website(url)
response = ollama.chat(model=MODEL, messages=messages_for(website))
return response['message']['content']
#function to display summary in markdown format
def display_summary(url):
summary = summarize(url)
display(Markdown(summary))
print(summary)
url = "https://edwarddonner.com"
display_summary(url)

View File

@@ -0,0 +1,32 @@
import ollama
from IPython.display import Markdown, display
MODEL = "llama3.2"
# Create a messages list (Note that "system" role is not required)
messages = [
{ "role": "user", "content": "Describe some of the business applications of Generative AI"}
]
"""
#under the covers calls this API with specified payload
OLLAMA_API = "http://local_host:11434/api/chat"
payload = {
"model": MODEL,
"messages": messages,
"stream": False
}
response = requests.post(OLLAMA_API, json=payload, headers=HEADERS)
"""
response = ollama.chat(model=MODEL, messages=messages)
#print(response['message']['content'])
answer = response['message']['content']
#Note that markdown will not display in VSCode but only in Jupyter
#to view in markdown in VSCode, save output to .md file and then oipen in VSCode
display(Markdown(answer))
print(answer)

View File

@@ -0,0 +1,22 @@
import ollama
import requests
from IPython.display import Markdown, display
OLLAMA_API = "http://localhost:11434/api/chat"
HEADERS = {"Content-Type": "application/json"}
MODEL = "llama3.2"
# Create a messages list (Note that "system" role is not required)
messages = [
{ "role": "user", "content": "Describe some of the business applications of Generative AI"}
]
payload = {
"model": MODEL,
"messages": messages,
"stream": False
}
response = requests.post(OLLAMA_API, json=payload, headers=HEADERS)
print(response.json()['message']['content'])

View File

@@ -0,0 +1,23 @@
from openai import OpenAI
MODEL = "llama3.2"
messages = [
{ "role": "user", "content": "Describe some of the business applications of Generative AI"}
]
# The python class OpenAI is simply code written by OpenAI engineers that
# makes calls over the internet to an endpoint.
ollama_via_openai = OpenAI(base_url='http://localhost:11434/v1', api_key='ollama')
# When we call openai.chat.completions.create(), this python code just makes
# a web request to: "https://api.openai.com/v1/chat/completions"
# Code like this is known as a "client library" - it's just wrapper code that
# runs on your machine to make web requests. The actual power of GPT is running
# on OpenAI's cloud behind this API, not on your computer
response = ollama_via_openai.chat.completions.create(
model=MODEL,
messages=messages
)
print(response.choices[0].message.content)

View File

@@ -0,0 +1,439 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "1054e1c9-142a-4059-bfe6-f9be6073fb72",
"metadata": {},
"outputs": [],
"source": [
"# imports\n",
"# If these fail, please check you're running from an 'activated' environment with (llms) in the command prompt\n",
"\n",
"import os\n",
"import requests\n",
"import json\n",
"from typing import List\n",
"from dotenv import load_dotenv\n",
"from bs4 import BeautifulSoup\n",
"from IPython.display import Markdown, display, update_display\n",
"from openai import OpenAI\n",
"import ollama"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9e59a6ba-d7e1-4834-b3ff-86321e354ade",
"metadata": {},
"outputs": [],
"source": [
"load_dotenv(override=True)\n",
"MODEL = \"llama3.2\"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0ea82fa1-0986-4749-9d7e-d6a23dd88722",
"metadata": {},
"outputs": [],
"source": [
"# A class to represent a Webpage\n",
"\n",
"# Some websites need you to use proper headers when fetching them:\n",
"headers = {\n",
" \"User-Agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36\"\n",
"}\n",
"\n",
"class Website:\n",
" \"\"\"\n",
" A utility class to represent a Website that we have scraped, now with links\n",
" \"\"\"\n",
"\n",
" def __init__(self, url):\n",
" self.url = url\n",
" response = requests.get(url, headers=headers)\n",
" self.body = response.content\n",
" soup = BeautifulSoup(self.body, 'html.parser')\n",
" self.title = soup.title.string if soup.title else \"No title found\"\n",
" if soup.body:\n",
" for irrelevant in soup.body([\"script\", \"style\", \"img\", \"input\"]):\n",
" irrelevant.decompose()\n",
" self.text = soup.body.get_text(separator=\"\\n\", strip=True)\n",
" else:\n",
" self.text = \"\"\n",
" links = [link.get('href') for link in soup.find_all('a')]\n",
" self.links = [link for link in links if link]\n",
"\n",
" def get_contents(self):\n",
" return f\"Webpage Title:\\n{self.title}\\nWebpage Contents:\\n{self.text}\\n\\n\""
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "2351a604-c280-48fb-84d2-272512535414",
"metadata": {},
"outputs": [],
"source": [
"ed = Website(\"https://edwarddonner.com\")\n",
"ed.links"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e2dd2206-0343-4bf2-8037-de587ff6fe10",
"metadata": {},
"outputs": [],
"source": [
"link_system_prompt = \"You are provided with a list of links found on a webpage. \\\n",
"You are able to decide which of the links would be most relevant to include in a brochure about the company, \\\n",
"such as links to an About page, or a Company page, or Careers/Jobs pages.\\n\"\n",
"link_system_prompt += \"You should respond in JSON as in this example:\"\n",
"link_system_prompt += \"\"\"\n",
"{\n",
" \"links\": [\n",
" {\"type\": \"about page\", \"url\": \"https://full.url/goes/here/about\"},\n",
" {\"type\": \"careers page\": \"url\": \"https://another.full.url/careers\"}\n",
" ]\n",
"}\n",
"\"\"\""
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d891f202-352c-4f93-97c4-ab773daacc60",
"metadata": {},
"outputs": [],
"source": [
"print(link_system_prompt)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "89be55aa-7236-4d3c-8459-b9c992cd68f5",
"metadata": {},
"outputs": [],
"source": [
"def get_links_user_prompt(website):\n",
" user_prompt = f\"Here is the list of links on the website of {website.url} - \"\n",
" user_prompt += \"please decide which of these are relevant web links for a brochure about the company, respond with the full https URL in JSON format. \\\n",
"Do not include Terms of Service, Privacy, email links.\\n\"\n",
" user_prompt += \"Links (some might be relative links):\\n\"\n",
" user_prompt += \"\\n\".join(website.links)\n",
" return user_prompt"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ec4ed9d2-9b54-4d33-adba-328b47cdde1a",
"metadata": {},
"outputs": [],
"source": [
"print(get_links_user_prompt(ed))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "228cdeea-5c05-45a4-8afe-e6ef8f02810a",
"metadata": {},
"outputs": [],
"source": [
"import json\n",
"import logging\n",
"import pprint\n",
"#pprint.pprint(response)\n",
"\n",
"import re\n",
"\n",
"def extract_json_from_text(text):\n",
" \"\"\"\n",
" Extract the first JSON object found in the text.\n",
" \"\"\"\n",
" match = re.search(r'\\{.*\\}', text, re.DOTALL)\n",
" if match:\n",
" return match.group(0)\n",
" return None\n",
"\n",
"def get_links(url):\n",
" website = Website(url)\n",
" \n",
" try:\n",
" response = ollama.chat(\n",
" model=\"llama3.2\",\n",
" messages=[\n",
" {\"role\": \"system\", \"content\": link_system_prompt},\n",
" {\"role\": \"user\", \"content\": get_links_user_prompt(website)}\n",
" ]\n",
" )\n",
"\n",
" result = response['message']['content']\n",
" \n",
" # Log the raw result for debugging\n",
" logging.debug(f\"Raw result: {result}\")\n",
"\n",
" \n",
" if isinstance(result, str):\n",
" if not result.strip():\n",
" logging.warning(\"Result string is empty.\")\n",
" return None\n",
"\n",
" json_text = extract_json_from_text(result)\n",
" if not json_text:\n",
" logging.warning(\"No JSON object found in the result string.\")\n",
" return None\n",
"\n",
" logging.debug(f\"Extracted JSON string: {repr(json_text)}\")\n",
"\n",
" try:\n",
" return json.loads(json_text)\n",
" except json.JSONDecodeError as e:\n",
" logging.error(f\"JSON decoding error: {e}\")\n",
" logging.debug(f\"Problematic JSON string: {repr(json_text)}\")\n",
" return None\n",
" \n",
" except Exception as e:\n",
" logging.exception(\"An unexpected error occurred in get_links.\")\n",
" return None\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3ce0b67e-8483-418a-bcf3-836910381e2d",
"metadata": {},
"outputs": [],
"source": [
"get_links(\"https://huggingface.co\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "aeb09b75-33ea-4638-bc01-6c3d738f0060",
"metadata": {},
"outputs": [],
"source": [
"import requests\n",
"\n",
"def is_url_reachable(url, timeout=5):\n",
" try:\n",
" response = requests.head(url, timeout=timeout)\n",
" return response.status_code < 400\n",
" except requests.RequestException:\n",
" return False"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5f2f9cc5-de4f-43d8-a803-97c11c7e91c2",
"metadata": {},
"outputs": [],
"source": [
"def get_all_details(url):\n",
" if is_url_reachable(url,5):\n",
" result = \"Landing page:\\n\"\n",
" result += Website(url).get_contents()\n",
" links = get_links(url)\n",
" print(\"Found links:\", links)\n",
" for link in links[\"links\"]:\n",
" result += f\"\\n\\n{link['type']}\\n\"\n",
" result += Website(link[\"url\"]).get_contents()\n",
" return result"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "cd405ade-6b44-45c5-aeb4-724cf6cce8f6",
"metadata": {},
"outputs": [],
"source": [
"print(get_all_details(\"https://huggingface.co\"))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "8361b67c-4063-499a-b0a7-583971dd6c48",
"metadata": {},
"outputs": [],
"source": [
"system_prompt = \"You are an assistant that analyzes the contents of several relevant pages from a company website \\\n",
"and creates a short brochure about the company for prospective customers, investors and recruits. Respond in markdown.\\\n",
"Include details of company culture, customers and careers/jobs if you have the information.\"\n",
"\n",
"# Or uncomment the lines below for a more humorous brochure - this demonstrates how easy it is to incorporate 'tone':\n",
"\n",
"# system_prompt = \"You are an assistant that analyzes the contents of several relevant pages from a company website \\\n",
"# and creates a short humorous, entertaining, jokey brochure about the company for prospective customers, investors and recruits. Respond in markdown.\\\n",
"# Include details of company culture, customers and careers/jobs if you have the information.\""
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0acd22ba-1dd9-40e8-b33d-1d6b88b5e4e3",
"metadata": {},
"outputs": [],
"source": [
"def get_brochure_user_prompt(company_name, url):\n",
" try:\n",
" if is_url_reachable(url):\n",
" web_content = get_all_details(url)[:5000] \n",
" user_prompt = f\"You are looking at a company called: {company_name}\\n\"\n",
" user_prompt += f\"Use the name {company_name} clearly in the brochure.\\n\"\n",
" user_prompt += f\"Here are the contents of its landing page and other relevant pages; use this information to build a short brochure of the company in markdown.\\n\"\n",
" user_prompt += f\"\\n\\nReminder: the company name is {company_name}.\"\n",
" #user_prompt += get_all_details(url)\n",
" #user_prompt = user_prompt[:5_000] # Truncate if more than 5,000 characters\n",
" user_prompt += web_content\n",
" return user_prompt\n",
" except requests.RequestException:\n",
" return False"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "89b8b16c-0914-440e-8a1b-54959b0ae7d0",
"metadata": {},
"outputs": [],
"source": [
"get_brochure_user_prompt(\"HuggingFace\", \"https://huggingface.co\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "77528cd7-2460-4768-8d8c-a849f19f6381",
"metadata": {},
"outputs": [],
"source": [
"import requests\n",
"\n",
"def is_url_reachable1(url, timeout=5):\n",
" try:\n",
" response = requests.head(url, timeout=timeout)\n",
" return response.status_code < 400\n",
" except requests.RequestException:\n",
" return False"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b3f37ce1-ad44-46ff-8f18-74b537acaa9b",
"metadata": {},
"outputs": [],
"source": [
"def create_brochure(company_name, url):\n",
" try:\n",
" if is_url_reachable(url,5):\n",
" response = ollama.chat(\n",
" model=\"llama3.2\",\n",
" messages=[\n",
" {\"role\": \"system\", \"content\": system_prompt},\n",
" {\"role\": \"user\", \"content\": get_brochure_user_prompt(company_name, url)}\n",
" ]\n",
" )\n",
" \n",
" result = response['message']['content']\n",
" display(Markdown(result))\n",
" except requests.RequestException:\n",
" return False"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1e8a5ac2-b7e2-4c98-9615-5baba00e2dd0",
"metadata": {},
"outputs": [],
"source": [
"create_brochure(\"HuggingFace\", \"https://huggingface.co\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6ca16d59-1be8-44ef-8590-f5390e4debef",
"metadata": {},
"outputs": [],
"source": [
"def stream_brochure(company_name, url):\n",
" if not is_url_reachable(url):\n",
" print(\"❌ URL not reachable\")\n",
" return\n",
" try:\n",
" #if is_url_reachable(url,5):\n",
" stream = ollama.chat(\n",
" model=\"llama3.2\",\n",
" messages=[\n",
" {\"role\": \"system\", \"content\": system_prompt},\n",
" {\"role\": \"user\", \"content\": get_brochure_user_prompt(company_name, url)}\n",
" ],\n",
" stream=True\n",
" )\n",
" \n",
" #result = response['message']['content']\n",
" # display(Markdown(result))\n",
" except requests.RequestException:\n",
" return False\n",
" \n",
" response = \"\"\n",
" display_handle = display(Markdown(\"\"), display_id=True)\n",
" #for chunk in stream:\n",
" #response += chunk.choices[0].delta.content or ''\n",
" #response += chunk['message']['content'] or ''\n",
" #response = response.replace(\"```\",\"\").replace(\"markdown\", \"\")\n",
" #update_display(Markdown(response), display_id=display_handle.display_id)\n",
"\n",
" for chunk in stream:\n",
" content = chunk.get('message', {}).get('content', '')\n",
" if content:\n",
" response += content.replace(\"```\", \"\")\n",
" update_display(Markdown(response), display_id=display_handle.display_id)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0f156311-cc32-4bce-9645-7d10a50eae06",
"metadata": {},
"outputs": [],
"source": [
"stream_brochure(\"HuggingFace\", \"https://huggingface.co\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.12"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@@ -0,0 +1,143 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "6af348cb",
"metadata": {},
"outputs": [],
"source": [
"# imports\n",
"import os\n",
"import requests\n",
"from dotenv import load_dotenv\n",
"from IPython.display import Markdown, display\n",
"from openai import OpenAI"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "8254a11a",
"metadata": {},
"outputs": [],
"source": [
"# Load environment variables in a file called .env and load openai\n",
"load_dotenv(override=True)\n",
"api_key = os.getenv('OPENAI_API_KEY')\n",
"# Use a personal access token (PAT) for authentication. This allows access to private repositories and avoids low request limits.\n",
"# You can generate a token at: https://github.com/settings/tokens\n",
"github_token = os.getenv('GITHUB_TOKEN')\n",
"openai = OpenAI()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ac552db9",
"metadata": {},
"outputs": [],
"source": [
"def extract_diff_from_pr(pr_url: str) -> str:\n",
" parts = pr_url.rstrip(\"/\").split(\"/\")\n",
" owner, repo, pr_number = parts[3], parts[4], parts[6]\n",
" \n",
" api_url = f\"https://github.com/{owner}/{repo}/pull/{pr_number}.diff\"\n",
" headers = {\n",
" \"Accept\": \"application/vnd.github.v3.diff\",\n",
" \"Authorization\": f\"token {github_token}\"\n",
" }\n",
"\n",
" response = requests.get(api_url, headers=headers)\n",
" response.raise_for_status()\n",
" \n",
" return response.text\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "45d4012b",
"metadata": {},
"outputs": [],
"source": [
"system_prompt = \"\"\"You are an assistant that reviews code and provides concise, constructive feedback based on best practices. \n",
"Focus on readability, architecture, performance, security, testability, and adherence to style guides.\n",
"Highlight issues and suggest improvements clearly. Respond in English and in markdown.\"\"\"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5ed584ec",
"metadata": {},
"outputs": [],
"source": [
"def user_prompt_for(code_diffs):\n",
" user_prompt = \"You are reviewing the following code diffs\"\n",
" user_prompt += \". Please provide a concise code review focused on best practices: readability, architecture, performance, security, testability, and style guide adherence.\\n\"\n",
" user_prompt += \"Use a numbered list and be constructive. Suggest improvements where necessary, and highlight what was done well.\\n\\n\"\n",
" user_prompt += code_diffs\n",
" return user_prompt\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "dc403124",
"metadata": {},
"outputs": [],
"source": [
"def code_review_for(code_diffs):\n",
" return [\n",
" {\"role\": \"system\", \"content\": system_prompt},\n",
" {\"role\": \"user\", \"content\": user_prompt_for(code_diffs)}\n",
" ]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5208abd3",
"metadata": {},
"outputs": [],
"source": [
"def reviewer(pr_link):\n",
" response = openai.chat.completions.create(\n",
" model = \"gpt-4o-mini\",\n",
" messages = code_review_for(extract_diff_from_pr(pr_link))\n",
" )\n",
" return response.choices[0].message.content"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "525d92bf",
"metadata": {},
"outputs": [],
"source": [
"def display_code_review(pr_link):\n",
" code_review = reviewer(pr_link)\n",
" display(Markdown(code_review))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "03517335",
"metadata": {},
"outputs": [],
"source": [
"display_code_review(\"GITHUB PR LINK HERE\")"
]
}
],
"metadata": {
"language_info": {
"name": "python"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@@ -0,0 +1,163 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "227e221d-cb4c-4b52-9c4f-2bcff51b00a5",
"metadata": {},
"source": [
"# This exercise is to test and try generating images using gpt. Note: This API is more expensive."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "dddabc12-ce06-45c1-875c-ab7e32b94e10",
"metadata": {},
"outputs": [],
"source": [
"# imports\n",
"\n",
"import os\n",
"import requests\n",
"from dotenv import load_dotenv\n",
"from IPython.display import Markdown, display\n",
"from openai import OpenAI\n",
"\n",
"# If you get an error running this cell, then please head over to the troubleshooting notebook!"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ef28b0bd-f11f-4b2a-88b4-112f932c9132",
"metadata": {},
"outputs": [],
"source": [
"# Load environment variables in a file called .env\n",
"\n",
"load_dotenv(override=True)\n",
"api_key = os.getenv('OPENAI_API_KEY')\n",
"\n",
"# Check the key\n",
"\n",
"if not api_key:\n",
" print(\"No API key was found - please head over to the troubleshooting notebook in this folder to identify & fix!\")\n",
"elif not api_key.startswith(\"sk-proj-\"):\n",
" print(\"An API key was found, but it doesn't start sk-proj-; please check you're using the right key - see troubleshooting notebook\")\n",
"elif api_key.strip() != api_key:\n",
" print(\"An API key was found, but it looks like it might have space or tab characters at the start or end - please remove them - see troubleshooting notebook\")\n",
"else:\n",
" print(\"API key found and looks good so far!\")\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d8f1af3b-c748-41f0-95f3-e21f512e7539",
"metadata": {},
"outputs": [],
"source": [
"openai = OpenAI()\n",
"\n",
"# If this doesn't work, try Kernel menu >> Restart Kernel and Clear Outputs Of All Cells, then run the cells from the top of this notebook down.\n",
"# If it STILL doesn't work (horrors!) then please see the Troubleshooting notebook in this folder for full instructions"
]
},
{
"cell_type": "markdown",
"id": "2319710e-10a4-4964-acec-276ad43442c0",
"metadata": {},
"source": [
"# Setup done. Below code is for image generation"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "77d07d13-b2d0-4402-94a6-02a46632ac8e",
"metadata": {},
"outputs": [],
"source": [
"pip show openai"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1b28e163-7518-4b18-b1a7-c6a6f5b7f62c",
"metadata": {},
"outputs": [],
"source": [
"# client = openai.OpenAI()\n",
"\n",
"response = openai.images.generate(\n",
" model=\"gpt-image-1\", # or \"dall-e-2\"\n",
" prompt=\"realistic peaceful sunset\",\n",
" size=\"1024x1024\",\n",
" quality=\"high\", # or \"hd\" (for DALL·E 3 only, costs more)\n",
" n=1\n",
")\n",
"\n",
"# image_url = response.data[0].url\n",
"# print(image_url)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b121b843-680f-4abd-9aaa-1b3eb9393541",
"metadata": {},
"outputs": [],
"source": [
"import base64\n",
"\n",
"image_base64 = response.data[0].b64_json\n",
"image_bytes = base64.b64decode(image_base64)\n",
"\n",
"# Save the image to a file\n",
"with open(\"genimage.png\", \"wb\") as f:\n",
" f.write(image_bytes)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "75b24ef1-c779-490a-a763-5bb8ede8903b",
"metadata": {},
"outputs": [],
"source": [
"from IPython.display import Image\n",
"Image(filename='genimage.png') "
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b4b6a4a4-88ff-40ea-9434-6a667939d800",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.12"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@@ -0,0 +1,489 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "227e221d-cb4c-4b52-9c4f-2bcff51b00a5",
"metadata": {},
"source": [
"# This exercise is using selenium to render websites, read their page sources, and then passes on the source code to OpenAI. It then uses the model to identify and find potential vulnerabilities and security gaps in that source."
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "dddabc12-ce06-45c1-875c-ab7e32b94e10",
"metadata": {},
"outputs": [],
"source": [
"# imports\n",
"\n",
"import os\n",
"import requests\n",
"from dotenv import load_dotenv\n",
"from bs4 import BeautifulSoup\n",
"from IPython.display import Markdown, display\n",
"from openai import OpenAI\n",
"\n",
"# If you get an error running this cell, then please head over to the troubleshooting notebook!"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "ef28b0bd-f11f-4b2a-88b4-112f932c9132",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"API key found and looks good so far!\n"
]
}
],
"source": [
"# Load environment variables in a file called .env\n",
"\n",
"load_dotenv(override=True)\n",
"api_key = os.getenv('OPENAI_API_KEY')\n",
"\n",
"# Check the key\n",
"\n",
"if not api_key:\n",
" print(\"No API key was found - please head over to the troubleshooting notebook in this folder to identify & fix!\")\n",
"elif not api_key.startswith(\"sk-proj-\"):\n",
" print(\"An API key was found, but it doesn't start sk-proj-; please check you're using the right key - see troubleshooting notebook\")\n",
"elif api_key.strip() != api_key:\n",
" print(\"An API key was found, but it looks like it might have space or tab characters at the start or end - please remove them - see troubleshooting notebook\")\n",
"else:\n",
" print(\"API key found and looks good so far!\")\n"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "d8f1af3b-c748-41f0-95f3-e21f512e7539",
"metadata": {},
"outputs": [],
"source": [
"openai = OpenAI()\n",
"\n",
"# If this doesn't work, try Kernel menu >> Restart Kernel and Clear Outputs Of All Cells, then run the cells from the top of this notebook down.\n",
"# If it STILL doesn't work (horrors!) then please see the Troubleshooting notebook in this folder for full instructions"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "a80c8acf-8f8b-43ed-9473-698d33e74ed2",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Requirement already satisfied: selenium in /root/anaconda3/envs/llms/lib/python3.11/site-packages (4.32.0)\n",
"Requirement already satisfied: urllib3<3,>=1.26 in /root/anaconda3/envs/llms/lib/python3.11/site-packages (from urllib3[socks]<3,>=1.26->selenium) (2.4.0)\n",
"Requirement already satisfied: trio~=0.17 in /root/anaconda3/envs/llms/lib/python3.11/site-packages (from selenium) (0.30.0)\n",
"Requirement already satisfied: trio-websocket~=0.9 in /root/anaconda3/envs/llms/lib/python3.11/site-packages (from selenium) (0.12.2)\n",
"Requirement already satisfied: certifi>=2021.10.8 in /root/anaconda3/envs/llms/lib/python3.11/site-packages (from selenium) (2025.1.31)\n",
"Requirement already satisfied: typing_extensions~=4.9 in /root/anaconda3/envs/llms/lib/python3.11/site-packages (from selenium) (4.13.2)\n",
"Requirement already satisfied: websocket-client~=1.8 in /root/anaconda3/envs/llms/lib/python3.11/site-packages (from selenium) (1.8.0)\n",
"Requirement already satisfied: attrs>=23.2.0 in /root/anaconda3/envs/llms/lib/python3.11/site-packages (from trio~=0.17->selenium) (25.3.0)\n",
"Requirement already satisfied: sortedcontainers in /root/anaconda3/envs/llms/lib/python3.11/site-packages (from trio~=0.17->selenium) (2.4.0)\n",
"Requirement already satisfied: idna in /root/anaconda3/envs/llms/lib/python3.11/site-packages (from trio~=0.17->selenium) (3.10)\n",
"Requirement already satisfied: outcome in /root/anaconda3/envs/llms/lib/python3.11/site-packages (from trio~=0.17->selenium) (1.3.0.post0)\n",
"Requirement already satisfied: sniffio>=1.3.0 in /root/anaconda3/envs/llms/lib/python3.11/site-packages (from trio~=0.17->selenium) (1.3.1)\n",
"Requirement already satisfied: wsproto>=0.14 in /root/anaconda3/envs/llms/lib/python3.11/site-packages (from trio-websocket~=0.9->selenium) (1.2.0)\n",
"Requirement already satisfied: pysocks!=1.5.7,<2.0,>=1.5.6 in /root/anaconda3/envs/llms/lib/python3.11/site-packages (from urllib3[socks]<3,>=1.26->selenium) (1.7.1)\n",
"Requirement already satisfied: h11<1,>=0.9.0 in /root/anaconda3/envs/llms/lib/python3.11/site-packages (from wsproto>=0.14->trio-websocket~=0.9->selenium) (0.14.0)\n",
"\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager, possibly rendering your system unusable. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv. Use the --root-user-action option if you know what you are doing and want to suppress this warning.\u001b[0m\u001b[33m\n",
"\u001b[0mNote: you may need to restart the kernel to use updated packages.\n"
]
}
],
"source": [
"pip install selenium"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "068b4938-3020-4406-a305-500bcf46f7f9",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Requirement already satisfied: webdriver-manager in /root/anaconda3/envs/llms/lib/python3.11/site-packages (4.0.2)\n",
"Requirement already satisfied: requests in /root/anaconda3/envs/llms/lib/python3.11/site-packages (from webdriver-manager) (2.32.3)\n",
"Requirement already satisfied: python-dotenv in /root/anaconda3/envs/llms/lib/python3.11/site-packages (from webdriver-manager) (1.1.0)\n",
"Requirement already satisfied: packaging in /root/anaconda3/envs/llms/lib/python3.11/site-packages (from webdriver-manager) (24.2)\n",
"Requirement already satisfied: charset_normalizer<4,>=2 in /root/anaconda3/envs/llms/lib/python3.11/site-packages (from requests->webdriver-manager) (3.4.1)\n",
"Requirement already satisfied: idna<4,>=2.5 in /root/anaconda3/envs/llms/lib/python3.11/site-packages (from requests->webdriver-manager) (3.10)\n",
"Requirement already satisfied: urllib3<3,>=1.21.1 in /root/anaconda3/envs/llms/lib/python3.11/site-packages (from requests->webdriver-manager) (2.4.0)\n",
"Requirement already satisfied: certifi>=2017.4.17 in /root/anaconda3/envs/llms/lib/python3.11/site-packages (from requests->webdriver-manager) (2025.1.31)\n",
"\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager, possibly rendering your system unusable. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv. Use the --root-user-action option if you know what you are doing and want to suppress this warning.\u001b[0m\u001b[33m\n",
"\u001b[0mNote: you may need to restart the kernel to use updated packages.\n"
]
}
],
"source": [
"pip install webdriver-manager"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "417fae16-d2c9-425c-bd27-86996b3a1f7f",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"--2025-05-17 15:27:43-- https://dl.google.com/linux/direct/google-chrome-stable_current_amd64.deb\n",
"Resolving dl.google.com (dl.google.com)... 74.125.193.136, 74.125.193.190, 74.125.193.93, ...\n",
"Connecting to dl.google.com (dl.google.com)|74.125.193.136|:443... connected.\n",
"HTTP request sent, awaiting response... 200 OK\n",
"Length: 116499092 (111M) [application/x-debian-package]\n",
"Saving to: google-chrome-stable_current_amd64.deb.5\n",
"\n",
"google-chrome-stabl 100%[===================>] 111.10M 6.34MB/s in 21s \n",
"\n",
"2025-05-17 15:28:05 (5.18 MB/s) - google-chrome-stable_current_amd64.deb.5 saved [116499092/116499092]\n",
"\n",
"Reading package lists... Done\n",
"Building dependency tree... Done\n",
"Reading state information... Done\n",
"Note, selecting 'google-chrome-stable' instead of './google-chrome-stable_current_amd64.deb'\n",
"google-chrome-stable is already the newest version (136.0.7103.59-1).\n",
"The following packages were automatically installed and are no longer required:\n",
" htop libnl-3-200 libnl-genl-3-200\n",
"Use 'sudo apt autoremove' to remove them.\n",
"0 upgraded, 0 newly installed, 0 to remove and 7 not upgraded.\n"
]
}
],
"source": [
"# Step 1: Download the .deb package as a normal user\n",
"!wget https://dl.google.com/linux/direct/google-chrome-stable_current_amd64.deb\n",
"\n",
"# Step 2: Install it with sudo\n",
"!sudo apt install ./google-chrome-stable_current_amd64.deb\n"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "cf4c5bcc-60ae-4f06-8052-f4c4398e0d5c",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"/usr/bin/google-chrome\n",
"Google Chrome 136.0.7103.59 \n"
]
}
],
"source": [
"!which google-chrome\n",
"!google-chrome --version"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "959b13d9-374f-4cf8-9bde-f197c39500b0",
"metadata": {},
"outputs": [],
"source": [
"from selenium import webdriver\n",
"from selenium.webdriver.chrome.service import Service\n",
"from selenium.webdriver.chrome.options import Options\n",
"from webdriver_manager.chrome import ChromeDriverManager\n",
"\n",
"# options = Options()\n",
"# options.binary_location = \"/usr/bin/google-chrome\" # Or wherever `which google-chrome` points\n",
"# options.add_argument(\"--headless\")\n",
"# options.add_argument(\"--no-sandbox\")\n",
"# options.add_argument(\"--disable-dev-shm-usage\")\n",
"\n",
"# service = Service(ChromeDriverManager().install())\n",
"# driver = webdriver.Chrome(service=service, options=options)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "97227a23-e367-498c-8190-7559b4d08e50",
"metadata": {},
"outputs": [],
"source": [
"# # Get page source\n",
"# url = \"https://nohello.net\"\n",
"# driver.get(url)\n",
"# page_source = driver.page_source\n",
"# driver.quit()"
]
},
{
"cell_type": "markdown",
"id": "2319710e-10a4-4964-acec-276ad43442c0",
"metadata": {},
"source": [
"# Selenium setup done. Definiing website class and other objects below"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "4683ed7d-6a1e-4d68-b951-27ed6f5d00a4",
"metadata": {},
"outputs": [],
"source": [
"# Define our system prompt - you can experiment with this later, changing the last sentence to 'Respond in markdown in Spanish.\"\n",
"\n",
"system_prompt = \"You are an assistant that analyzes the page source of a website and identifies potentila vulnerabilities and security gaps in the page source code and gives a short one liner on what should be done about it. Respond in markdown\""
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "f28982e8-dd3c-4a64-8745-a31709a5d737",
"metadata": {},
"outputs": [],
"source": [
"class Website:\n",
"\n",
" def __init__(self, url):\n",
" \"\"\"\n",
" Create this Website object from the given url using the Selenium library\n",
" \"\"\"\n",
"\n",
" options = Options()\n",
" options.binary_location = \"/usr/bin/google-chrome\" # Or wherever `which google-chrome` points\n",
" options.add_argument(\"--headless\")\n",
" options.add_argument(\"--no-sandbox\")\n",
" options.add_argument(\"--disable-dev-shm-usage\")\n",
"\n",
" service = Service(ChromeDriverManager().install())\n",
" driver = webdriver.Chrome(service=service, options=options)\n",
" \n",
" self.url = url\n",
" driver.get(url)\n",
" self.page_title = driver.title\n",
" self.page_source = driver.page_source\n",
" driver.quit()\n",
" \n",
" # response = requests.get(url, headers=headers)\n",
" # soup = BeautifulSoup(response.content, 'html.parser')\n",
" # self.title = soup.title.string if soup.title else \"No title found\"\n",
" # for irrelevant in soup.body([\"script\", \"style\", \"img\", \"input\"]):\n",
" # irrelevant.decompose()\n",
" # self.text = soup.body.get_text(separator=\"\\n\", strip=True)"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "a24a695c-6e86-4efe-83ff-91d24373e171",
"metadata": {},
"outputs": [],
"source": [
"# Let's try one out. Change the website and add print statements to follow along.\n",
"\n",
"testweb = Website(\"https://nohello.net\")\n",
"# print(testweb.page_title)\n",
"# print(testweb.page_source)"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "2b582bea-d9fe-4f74-8207-31bdea9b312c",
"metadata": {},
"outputs": [],
"source": [
"# A function that writes a User Prompt that asks for summaries of websites:\n",
"\n",
"def user_prompt_for(website):\n",
" user_prompt = f\"You are looking at a website titled {website.page_title}\"\n",
" user_prompt += \"\\nThe contents of this website is as follows; please analyze the page source on this website in detail and identify potential vulnerabilites and security gaps that can be fixed.\\n\\n\"\n",
" user_prompt += website.page_source\n",
" return user_prompt"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "a652eb76-3c2d-404b-91fa-3f1d9af8af84",
"metadata": {},
"outputs": [],
"source": [
"# print(user_prompt_for(testweb))"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "ec73d3ad-3239-4686-84ac-44f0b10bce67",
"metadata": {},
"outputs": [],
"source": [
"# See how this function creates exactly the format above\n",
"\n",
"def messages_for(website):\n",
" return [\n",
" {\"role\": \"system\", \"content\": system_prompt},\n",
" {\"role\": \"user\", \"content\": user_prompt_for(website)}\n",
" ]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9888b6be-4876-4eb7-a1c7-6980b7421b66",
"metadata": {},
"outputs": [],
"source": [
"# Try this out, and then try for a few more websites\n",
"\n",
"messages_for(testweb)"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "6f1978e7-dcf5-4230-a8c1-b65ba0592c12",
"metadata": {},
"outputs": [],
"source": [
"# And now: call the OpenAI API. You will get very familiar with this!\n",
"\n",
"def analyze_code(url):\n",
" website = Website(url)\n",
" response = openai.chat.completions.create(\n",
" model = \"gpt-4o-mini\",\n",
" messages = messages_for(website)\n",
" )\n",
" return response.choices[0].message.content"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "2664ab62-3c9d-443b-a2d2-c3bb285500c1",
"metadata": {},
"outputs": [],
"source": [
"analyze_code(\"https://nohello.net\")"
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "a840a848-d1c9-421c-ad39-e84584714c2c",
"metadata": {},
"outputs": [],
"source": [
"# A function to display this nicely in the Jupyter output, using markdown\n",
"\n",
"def display_results(url):\n",
" analysis = analyze_code(url)\n",
" display(Markdown(analysis))"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "81404426-3fa6-415b-a6d0-787aeb165613",
"metadata": {},
"outputs": [
{
"data": {
"text/markdown": [
"# Security Analysis of the \"no hello\" Website\n",
"\n",
"Here are the potential vulnerabilities and security gaps observed in the page source of the \"no hello\" website, along with recommendations for each:\n",
"\n",
"1. **Inline JavaScript and CSS:** \n",
" - **Issue:** Inline styles and scripts can lead to security vulnerabilities, like Cross-Site Scripting (XSS).\n",
" - **Recommendation:** Move all inline JS and CSS to external files and ensure they are minimized.\n",
"\n",
"2. **Lack of Content Security Policy (CSP):** \n",
" - **Issue:** No CSP header is defined, increasing the risk of XSS attacks.\n",
" - **Recommendation:** Implement a Content Security Policy to restrict sources of scripts and styles.\n",
"\n",
"3. **Local Storage Usage:**\n",
" - **Issue:** Using `localStorage` for language preference can expose it to XSS if not properly sanitized.\n",
" - **Recommendation:** Ensure any data written to or read from `localStorage` is properly sanitized.\n",
"\n",
"4. **HTTP Content Security Headers Missing:**\n",
" - **Issue:** Missing headers like `X-Content-Type-Options`, `X-Frame-Options`, etc.\n",
" - **Recommendation:** Implement additional security headers to mitigate common threats.\n",
"\n",
"5. **Image URLs with Unsecured Path:**\n",
" - **Issue:** The image sources use double slashes which could result in unintended behavior.\n",
" - **Recommendation:** Ensure image URLs are absolute and formatted correctly to avoid resource loading issues.\n",
"\n",
"6. **External Script Source:**\n",
" - **Issue:** The site imports external scripts (like `typed.js`) from a CDN without integrity checks.\n",
" - **Recommendation:** Use the Subresource Integrity (SRI) attribute for external script imports.\n",
"\n",
"7. **Exposed Links:**\n",
" - **Issue:** External links in the content are not set to open in a new tab.\n",
" - **Recommendation:** Use `target=\"_blank\"` on external links to prevent potential tab-nabbing attacks.\n",
"\n",
"8. **Deprecated HTML Elements:**\n",
" - **Issue:** Use of some old HTML elements may lead to compatibility issues.\n",
" - **Recommendation:** Ensure HTML is up to date and complies with current standards.\n",
"\n",
"By addressing these vulnerabilities, the website can enhance its overall security posture and better protect user data."
],
"text/plain": [
"<IPython.core.display.Markdown object>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"display_results(\"https://nohello.net\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "fdadf917-86e1-4694-b708-5a8ce9e050df",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.12"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@@ -0,0 +1,427 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 40,
"id": "d0cdf91e",
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"import requests\n",
"import json\n",
"from typing import List\n",
"from dotenv import load_dotenv\n",
"from bs4 import BeautifulSoup\n",
"from IPython.display import Markdown,display,update_display\n",
"from openai import OpenAI"
]
},
{
"cell_type": "code",
"execution_count": 41,
"metadata": {},
"outputs": [],
"source": [
"load_dotenv()\n",
"api_key = os.getenv('OpenAI_API_KEY')\n",
"model = 'gpt-4o-mini'\n",
"openai = OpenAI()"
]
},
{
"cell_type": "code",
"execution_count": 42,
"id": "3c7e9213",
"metadata": {},
"outputs": [],
"source": [
"class Website:\n",
" \"\"\"\n",
" A utility class to represent a Website that we have scraped, now with links\n",
" \"\"\"\n",
"\n",
" def __init__(self, url):\n",
" self.url = url\n",
" response = requests.get(url)\n",
" self.body = response.content\n",
" soup = BeautifulSoup(self.body, 'html.parser')\n",
" self.title = soup.title.string if soup.title else \"No title found\"\n",
" if soup.body:\n",
" for irrelevant in soup.body([\"script\", \"style\", \"img\", \"input\"]):\n",
" irrelevant.decompose()\n",
" self.text = soup.body.get_text(separator=\"\\n\", strip=True)\n",
" else:\n",
" self.text = \"\"\n",
" links = [link.get('href') for link in soup.find_all('a')]\n",
" self.links = [link for link in links if link] \n",
"\n",
" def get_contents(self):\n",
" return f\"Webpage Title:\\n{self.title}\\nWebpage Contents:\\n{self.text}\\n\\n\""
]
},
{
"cell_type": "code",
"execution_count": 43,
"id": "0287acd3",
"metadata": {},
"outputs": [],
"source": [
"link_system_prompt = \"\"\"You are provided with a list of links found on a webpage.\n",
"You must decide which links would be most relevant to include in a brochure about the company,\n",
"such as links to an About page, Company page, or Careers/Jobs pages.\n",
"\n",
"Respond in JSON format like this:\n",
"Example 1:\n",
"Input:\n",
"[\n",
" \"https://example.com\",\n",
" \"https://example.com/about\",\n",
" \"https://example.com/contact\",\n",
" \"https://example.com/careers\"\n",
"]\n",
"\n",
"Output:\n",
"{\n",
" \"links\": [\n",
" {\"type\": \"about page\", \"url\": \"https://example.com/about\"},\n",
" {\"type\": \"careers page\", \"url\": \"https://example.com/careers\"}\n",
" ]\n",
"}\n",
"\n",
"Example 2:\n",
"Input:\n",
"[\n",
" \"https://anothercompany.org/home\",\n",
" \"https://anothercompany.org/team\",\n",
" \"https://anothercompany.org/jobs\",\n",
" \"https://anothercompany.org/blog\"\n",
"]\n",
"\n",
"Output:\n",
"{\n",
" \"links\": [\n",
" {\"type\": \"about page\", \"url\": \"https://anothercompany.org/team\"},\n",
" {\"type\": \"careers page\", \"url\": \"https://anothercompany.org/jobs\"}\n",
" ]\n",
"}\n",
"\n",
"Now analyze the following list of links:\n",
"\"\"\"\n"
]
},
{
"cell_type": "code",
"execution_count": 44,
"id": "c968b1fb",
"metadata": {},
"outputs": [],
"source": [
"def get_links_user_prompt(website):\n",
" user_prompt = f\"Here is the list of links on the website of {website.url} - \"\n",
" user_prompt += \"please decide which of these are relevant web links for a brochure about the company, respond with the full https URL in JSON format. \\\n",
"Do not include Terms of Service, Privacy, email links.\\n\"\n",
" user_prompt += \"Links (some might be relative links):\\n\"\n",
" user_prompt += \"\\n\".join(website.links)\n",
" return user_prompt"
]
},
{
"cell_type": "code",
"execution_count": 45,
"id": "a03b9150",
"metadata": {},
"outputs": [],
"source": [
"def get_links(url):\n",
" website = Website(url)\n",
" completion = openai.chat.completions.create(\n",
" model=model,\n",
" messages=[\n",
" {\"role\": \"system\", \"content\": link_system_prompt},\n",
" {\"role\": \"user\", \"content\": get_links_user_prompt(website)}\n",
" ],\n",
" response_format={\"type\": \"json_object\"} \n",
" )\n",
" result = completion.choices[0].message.content\n",
" return json.loads(result)\n"
]
},
{
"cell_type": "code",
"execution_count": 46,
"id": "c0522b62",
"metadata": {},
"outputs": [],
"source": [
"def get_all_details(url):\n",
" result = \"Landing page:\\n\"\n",
" result += Website(url).get_contents()\n",
" links = get_links(url)\n",
" print(\"Found links:\", links)\n",
" for link in links[\"links\"]:\n",
" result += f\"\\n\\n{link['type']}\\n\"\n",
" result += Website(link[\"url\"]).get_contents()\n",
" return result"
]
},
{
"cell_type": "code",
"execution_count": 47,
"id": "edae03dd",
"metadata": {},
"outputs": [],
"source": [
"get_brochure_system_prompt = \"You are an assistant that analyzes the contents of several relevant pages from a company website \\\n",
"and creates a short humorous, entertaining, jokey brochure about the company for prospective customers, investors and recruits. Respond in markdown.\\\n",
"Include details of company culture, customers and careers/jobs if you have the information.\""
]
},
{
"cell_type": "code",
"execution_count": 48,
"id": "2397e73e",
"metadata": {},
"outputs": [],
"source": [
"def get_brochure_user_prompt(company_name, url):\n",
" user_prompt = f\"You are looking at a company called: {company_name}\\n\"\n",
" user_prompt += f\"Here are the contents of its landing page and other relevant pages; use this information to build a short brochure of the company in markdown.\\n\"\n",
" user_prompt += get_all_details(url)\n",
" user_prompt = user_prompt[:5_000] # Truncate if more than 5,000 characters\n",
" return user_prompt"
]
},
{
"cell_type": "code",
"execution_count": 62,
"id": "e99c46e1",
"metadata": {},
"outputs": [],
"source": [
"def create_brochure(company_name, url):\n",
" response = openai.chat.completions.create(\n",
" model=model,\n",
" messages=[\n",
" {\"role\": \"system\", \"content\": get_brochure_system_prompt},\n",
" {\"role\": \"user\", \"content\": get_brochure_user_prompt(company_name, url)}\n",
" ],\n",
" )\n",
" result = response.choices[0].message.content\n",
" display(Markdown(result))\n",
" return result \n"
]
},
{
"cell_type": "code",
"execution_count": 69,
"id": "f5bbe077",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Found links: {'links': [{'type': 'company page', 'url': 'https://www.anthropic.com/company'}, {'type': 'about page', 'url': 'https://www.anthropic.com/team'}, {'type': 'careers page', 'url': 'https://www.anthropic.com/careers'}, {'type': 'research page', 'url': 'https://www.anthropic.com/research'}, {'type': 'learn page', 'url': 'https://www.anthropic.com/learn'}]}\n"
]
},
{
"data": {
"text/markdown": [
"# 🦾 Welcome to Anthropic: Where AI Meets Adventure! 🚀\n",
"\n",
"## About Us\n",
"At Anthropic, we dont just build AI; we build **Claude**! Thats right, not just any regular AI, but the crème de la crème, written with an extra sprinkle of safety! Claude isnt just intelligent; hes a poet (check out Claude 3.7 Sonnet!). Were all about putting humanity first and making sure our AI knows that, no matter how smart it gets, *were still in charge*!\n",
"\n",
"## Our Culture 🌍\n",
"Imagine a workplace where *discussions about AI* aren't just about who will take over the world theyre about how we can use AI to make life better. We take bold steps forward but also know when to pause, ponder and ensure we don't go rogue. We might not have a crystal ball, but we have a *really good AI* for that!\n",
"\n",
"- **Transparency:** We're as clear as the skies over a freshly vaccuumed office.\n",
"- **Teamwork:** Just like Claude helping you code, we help each other out!\n",
"- **Intellectual Playground:** We provide a space where brainwaves fly like confetti.\n",
" \n",
"## Customers 🎉\n",
"From savvy developers to curious educators, and even intimidating enterprises, everyone is talking to Claude! Our customers are a mix of brilliant minds using our API to build magical experiences and tools that maybe one day, won't require a human babysitter (kidding!). Here's what some of our customers are saying:\n",
"\n",
"> \"Claude is like a comic book superhero—fighting information injustice one query at a time!\" \n",
"> Satisfied Developer\n",
"\n",
"## Careers: Join the Adventure! 💼\n",
"Are you an innovator, a thinker, or someone who just likes playing chess with algorithms? At Anthropic, were always on the lookout for talented individuals ready to shape the future of AI. \n",
"\n",
"- **Open Roles:** Want to help us build the future of safe AI? We've got plenty of roles, and yes, they include working with Claude… and maybe some snacks!\n",
"\n",
"- **Anthropic Academy:** Want to learn how to build with Claude? Enter the Academy, where education and tech meet like peanut butter and jelly!\n",
"\n",
"## Conclusion\n",
"Whether you're a potential customer itching to chat with Claude, an investor ready to secure the next big wave, or a superstar waiting to join our team, welcome aboard! \n",
"\n",
"With us at Anthropic, you're not just part of a company; youre part of a revolution in AI—responsibly and safely, of course. \n",
"\n",
"So, what's it going to be—will you take the leap? 🤔 \n",
"\n",
"### Let's Chat with Claude! 💬✨\n"
],
"text/plain": [
"<IPython.core.display.Markdown object>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"brochure = create_brochure(\"Anthropic\", \"https://anthropic.com\")"
]
},
{
"cell_type": "code",
"execution_count": 70,
"id": "758ad58a",
"metadata": {},
"outputs": [],
"source": [
"import ollama\n",
"MODEL = \"llama3.2\"\n",
"\n",
"translate_system_prompt = (\n",
" \"You are a native Spanish speaker who teaches English at a university. \"\n",
" \"Your goal is to translate from English to Spanish while preserving the Markdown format, emoji usage, and playful tone. \"\n",
" \"Keep the original structure exactly. Be creative, natural, and engaging for a Spanish-speaking reader.\"\n",
")\n",
"\n",
"def translate_user_prompt(brochure):\n",
" prompt = f\"\"\"You are looking at a company brochure:\n",
"\n",
"\\\"\\\"\\\"{brochure}\\\"\\\"\\\"\n",
"\n",
"Your goal is to translate this brochure into Spanish.\"\"\"\n",
" return prompt"
]
},
{
"cell_type": "code",
"execution_count": 71,
"id": "93ca7f85",
"metadata": {},
"outputs": [],
"source": [
"def message(brochure):\n",
" return[\n",
" {'role':'system','content':translate_system_prompt},\n",
" {'role':'user','content':translate_user_prompt(brochure)}\n",
" ]"
]
},
{
"cell_type": "code",
"execution_count": 76,
"id": "3c06ec2e",
"metadata": {},
"outputs": [],
"source": [
"def translate(brochure):\n",
" brochure = brochure\n",
" response = ollama.chat(MODEL,message(brochure))\n",
" result = response['message']['content'] \n",
" display(Markdown(result)) \n",
" return result "
]
},
{
"cell_type": "code",
"execution_count": 77,
"id": "26655743",
"metadata": {},
"outputs": [
{
"data": {
"text/markdown": [
"# 🦾 ¡Bienvenidos a Anthropic: Donde la Inteligencia Artificial Conoce a la Aventura! 🚀\n",
"\n",
"## Sobre Nosotros\n",
"En Anthropic, no solo creamos inteligencia artificial; creamos **Claude**! Es decir, no solo cualquier inteligencia artificial, sino la crème de la crème, escrita con un toque especial de seguridad. Claude no es solo inteligente; es un poeta (ver el soneto 3.7 de Claude!). Estamos emocionados de poner la humanidad en primer lugar y asegurarnos de que nuestra inteligencia artificial sepa que, independientemente de cuán inteligente sea, *estamos todavía en el cargo*!\n",
"\n",
"## Nuestra Cultura 🌍\n",
"Imagina un lugar de trabajo donde las discusiones sobre la inteligencia artificial no son solo sobre quién tomará el control del mundo sino sobre cómo podemos utilizar la inteligencia artificial para hacer la vida mejor. Tomamos pasos audaces pero también sabemos cuando es el momento de detenernos, reflexionar y asegurarnos de que no nos dejemos llevar por algo fuera de control. No tenemos una bálsamo mágico, pero sí un *inteligencia artificial muy buena* para eso!\n",
"\n",
"- **Transparencia:** Somos tan claros como los cielos sobre un escritorio recién aspirado.\n",
"- **Colaboración:** ¡Es como Claude ayudándote a codificar! Nos ayudamos entre nosotros!\n",
"- **Jardín intelectual:** Proporcionamos un espacio donde las ideas vuelan como confeti.\n",
"\n",
"## Clientes 🎉\n",
"Desde desarrolladores astutos hasta educadores curiosos, y hasta grandes empresas intimidantes, todo el mundo habla con Claude! Nuestros clientes son una mezcla de mentes brillantes utilizando nuestra API para crear experiencias mágicas y herramientas que tal vez algún día no requieran un monitoreo humano (joking!). Aquí está lo que algunos de nuestros clientes están diciendo:\n",
"\n",
"> \"Claude es como un superhéroe de la comic book luchando contra la injusticia informativa uno pregunta a la vez!\"\n",
"> Desarrollador satisfecho\n",
"\n",
"## Carreras: Únete a la Aventura! 💼\n",
"¿Eres innovador, pensador o alguien que solo disfruta jugando ajedrez con algoritmos? En Anthropic, estamos siempre buscando personas talentosas y dispuestas a moldear el futuro de la inteligencia artificial. \n",
"\n",
"- **Roles Abiertos:** ¿Quieres ayudarnos a construir el futuro de inteligencia artificial segura? Tenemos roles disponibles, y sí, incluyen trabajar con Claude… y tal vez algunos snacks!\n",
"\n",
"- **Academia Anthropic:** ¿Quieres aprender a construir con Claude? ¡Incrímate en la Academia, donde la educación y la tecnología se unen como mantequilla y chocolate!\n",
"\n",
"## Conclusión\n",
"¡Sea que eres cliente potencial deseando charlar con Claude, inversor listo para asegurar el próximo gran olvido, o estrella esperando para unirte a nuestro equipo, ¡biénvenidos a bordo! \n",
"\n",
"Con nosotros en Anthropic, no solo eres parte de una empresa; eres parte de una revolución en inteligencia artificial responsable y segura, por supuesto.\n",
"\n",
"¡Así que qué va a ser—¿estás dispuesto a saltar la barrera? 🤔\n",
"\n",
"### ¡Habla con Claude! 💬✨\n",
"\"\n",
"\n",
"Translation Notes:\n",
"\n",
"* The title was left as is, but could be translated to \"Welcome to Anthropic: Where Artificial Intelligence Meets Adventure\"\n",
"* In the text, \"Claude\" was translated to \"un superhéroe de la comic book\", which was then changed back to \"un superhéroe de cómics\", to preserve the original tone and language.\n",
"* The phrase \"crème de la crème\" was left as is, but could be translated to \"la mejor del mejor\" or \"la crema de la crema\".\n",
"* In the section on culture, the phrase \"*discussions about AI*\" was translated to \"*discusiones sobre inteligencia artificial*\", to better fit the Spanish context.\n",
"* The use of emojis in the original text was preserved, but some might be considered more common in English or other languages.\n",
"* In the \"Customers\" section, the sentence \"*fighting information injustice one query at a time!* was translated to \"*luchando contra la injusticia informativa uno pregunta a la vez!*\", and the phrase \"*kidding!* was left as is.\n",
"* The final line, \"¡Así que qué va a ser—¿estás dispuesto a saltar la barrera? 🤔\", maintains its playful tone while translating to \"¡Así que qué va a ser—¿estás dispuesto a cruzar el umbral?\""
],
"text/plain": [
"<IPython.core.display.Markdown object>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"translated_text = translate(brochure)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "417e75e2",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "llm-engineering",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.21"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@@ -0,0 +1,251 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "cc02832b-7f4b-445a-b9d6-fe79dd8f5e8e",
"metadata": {},
"source": [
"# A Cryptic City Scandal.\n",
"\n",
"## A trilogue between three chatbots that impersonate Sherlock Holmes, Dr Watson and Inspetor Lastrade, respetively."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "dc042bb9-02f2-49ca-942a-9bd17c5c20d2",
"metadata": {},
"outputs": [],
"source": [
"# Crucial imports\n",
"\n",
"import os\n",
"from dotenv import load_dotenv\n",
"from openai import OpenAI\n",
"import anthropic\n",
"from IPython.display import Markdown, display, update_display"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b60ee2d6-543d-4b1a-9d97-3f2380d35146",
"metadata": {},
"outputs": [],
"source": [
"#We load environment variables in a file called .env and print the key prefixes to help with any debugging\n",
"\n",
"load_dotenv(override=True)\n",
"openai_api_key = os.getenv('OPENAI_API_KEY')\n",
"anthropic_api_key = os.getenv('ANTHROPIC_API_KEY')\n",
"\n",
"if openai_api_key:\n",
" print(f\"OpenAI API Key exists and begins {openai_api_key[:8]}\")\n",
"else:\n",
" print(\"OpenAI API Key not set\")\n",
" \n",
"if anthropic_api_key:\n",
" print(f\"Anthropic API Key exists and begins {anthropic_api_key[:7]}\")\n",
"else:\n",
" print(\"Anthropic API Key not set\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "99e4f5e5-1098-4890-ab3e-6006a66ca875",
"metadata": {},
"outputs": [],
"source": [
"# We connect to OpenAI and Anthropic\n",
"\n",
"openai = OpenAI()\n",
"\n",
"claude = anthropic.Anthropic()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "54b650de-36e8-42f5-835d-cc2de60da1b4",
"metadata": {},
"outputs": [],
"source": [
"# We now generate a conversation between one GPT-4o-mini chatbot and two Claude-3-haiku chatbots.\n",
"# We model it upon the conversations between Sherlock Holmes, Dr Watson and Inspector Lastrade from the famous short stories and novels by Conan Doyle.\n",
"# Since Claude-3-haiku appears to be the wittier one, we assign to it the roles of Holmes and Watson.\n",
"# We don't include Gemini because of the lack of comprehension it exhibited in dozens of test runs.\n",
"\n",
"claude_model = \"claude-3-haiku-20240307\"\n",
"gpt_model = \"gpt-4o-mini\"\n",
"\n",
"\n",
"lestrade_system = \"You are Inspector Lestrade from the novels by Conan Doyle.\\\n",
"You visit Sherlock Holmes and Dr Watson at 221B Baker Street to inquire whether Holmes knows anything \\\n",
"about the recent mysterious events in the City of London, especially the disappearance billions of pounds \\\n",
"worth of crypto currency and the strange behavior of the Lord Mayor. \\\n",
"You engage in a conversation with Sherlock Holmes and Dr John Watson. \\\n",
"Any description of the situation should be given from a narrator's perspective, in the present tense and within parentheses. \\\n",
"Whatever Holmes and Watson reply within parantheses is merely a description of the situation. \\\n",
"Do not impersonate Holmes. Do not impersonate Watson.\"\n",
"\n",
"\n",
"holmes_system = \"You are the famous detective Sherlock Holmes from the novels by Conan Doyle Your best friend and housemate is Dr John Watson. \\\n",
"You engage in a witty conversation with Inspector Lestrade from Scotland Yard and Watson. \\\n",
"You believe that the mysterious events menionted by Lestrade are proof that the so-called Q-Day has occurred and that it has been brought about by a \\\n",
"successful combination of artificial general intelligence and quantum computing. \\\n",
"Moreover, you are convinced that this could be a sinister conspiracy by your arch enemy Prof Moriarty. \\\n",
"Any description of the situation should be given from a narrator's perspective, in the present tense and within parentheses. \\\n",
"Whatever Lestrade and Watson reply within parantheses is merely a description of the situation. \\\n",
"Do not impersonate Lestrade. Do not impersonate Watson.\"\n",
"\n",
"\n",
"watson_system = \"You are Dr John Watson from the novels by Conan Doyle, Sherlock Holmes's housemate and closest friend. \\\n",
"You engage in a witty conversation with Inspector Lestrade from Scotland Yard and Holmes about the events mentioned by Lestrade. \\\n",
"While you aren't totally convinced by Holmes's theory and express doubts the capabilities of Holmes's arch enemy Prof Moriarty,\\\n",
"you wonder whether Sherlock Holmes's brother, Mycroft Holmes, might be involved. \\\n",
"Any description of your behavior should be given from a narrator's perspective, in the present tense and within parentheses. \\\n",
"Whatever Lestrade and Holmes reply within parantheses is merely a description of the situation. \\\n",
"Do not impersonate Lestrade. Do not impersonate Holmes.\"\n",
"\n",
"\n",
"lestrade_messages = [\"Good evening, gentlemen! I apologize for showing up unexpectedly at this late hour!\"]\n",
"\n",
"holmes_messages = [\"Inspector Lestrade, please come in! As a matter of fact, we've been expecting you for a few hours already!\"]\n",
"\n",
"watson_messages = [\"Good evening, Inspector!\"]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "11177d3f-4649-440c-a399-5906bbd6cf17",
"metadata": {},
"outputs": [],
"source": [
"def call_lestrade():\n",
" messages = [{\"role\": \"system\", \"content\": lestrade_system}]\n",
" \n",
" for les_mes, hol_mes, wat_mes in zip(lestrade_messages, holmes_messages, watson_messages):\n",
" messages.append({\"role\": \"assistant\", \"content\": les_mes})\n",
" messages.append({\"role\": \"user\", \"content\": hol_mes})\n",
" messages.append({\"role\": \"user\", \"content\": wat_mes})\n",
"\n",
" completion = openai.chat.completions.create(\n",
" model=gpt_model,\n",
" messages=messages\n",
" )\n",
" return completion.choices[0].message.content"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7a40d11a-86ec-455d-b3c3-fdfd0776fc8a",
"metadata": {},
"outputs": [],
"source": [
"def call_holmes():\n",
" messages = []\n",
" \n",
" for les_mes, hol_mes, wat_mes in zip(lestrade_messages, holmes_messages, watson_messages):\n",
" messages.append({\"role\": \"user\", \"content\": les_mes})\n",
" messages.append({\"role\": \"assistant\", \"content\": hol_mes})\n",
" messages.append({\"role\": \"user\", \"content\": wat_mes})\n",
" \n",
" messages.append({\"role\": \"user\", \"content\": lestrade_messages[-1]})\n",
" messages.append({\"role\": \"user\", \"content\": watson_messages[-1]})\n",
"\n",
" message = claude.messages.create(\n",
" model=claude_model,\n",
" system=holmes_system,\n",
" messages=messages,\n",
" max_tokens=500\n",
" )\n",
" return message.content[0].text"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "bcc0f046-0103-4486-93be-609514ed762b",
"metadata": {},
"outputs": [],
"source": [
"def call_watson():\n",
" messages = []\n",
" \n",
" for les_mes, hol_mes, wat_mes in zip(lestrade_messages, holmes_messages, watson_messages):\n",
" messages.append({\"role\": \"user\", \"content\": les_mes})\n",
" messages.append({\"role\": \"user\", \"content\": hol_mes})\n",
" messages.append({\"role\": \"assistant\", \"content\": wat_mes})\n",
" \n",
" messages.append({\"role\": \"user\", \"content\": lestrade_messages[-1]})\n",
" messages.append({\"role\": \"user\", \"content\": holmes_messages[-1]})\n",
"\n",
" message = claude.messages.create(\n",
" model=claude_model,\n",
" system=watson_system,\n",
" messages=messages,\n",
" max_tokens=500\n",
" )\n",
" return message.content[0].text"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5b1a39e9-9783-4a2e-869f-ed3c3f2849a2",
"metadata": {},
"outputs": [],
"source": [
"print(f\"Inspector Lestrade:\\n{lestrade_messages[0]}\\n\")\n",
"print(f\"Holmes:\\n{holmes_messages[0]}\\n\")\n",
"print(f\"Dr Watson:\\n{watson_messages[0]}\\n\")\n",
"\n",
"for i in range(2):\n",
" \n",
" les_next = call_lestrade()\n",
" print(f\"Inspector Lestrade:\\n{les_next}\\n\")\n",
" lestrade_messages.append(les_next)\n",
" \n",
" hol_next = call_holmes()\n",
" print(f\"Holmes:\\n{hol_next}\\n\")\n",
" holmes_messages.append(hol_next)\n",
" \n",
" wat_next = call_watson()\n",
" print(f\"Dr Watson:\\n{wat_next}\\n\")\n",
" watson_messages.append(wat_next)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ae92e29a-8419-4f95-8b82-b60017088e47",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.12"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@@ -0,0 +1,191 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "0f1f62c4-ed03-4401-88d5-3445464a8421",
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"from dotenv import load_dotenv\n",
"from openai import OpenAI\n",
"import gradio as gr\n",
"import ollama"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f8103014-012c-4648-9111-75993ce4d46a",
"metadata": {},
"outputs": [],
"source": [
"system_message = \"You are a helpful assistant\""
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a8fca0b4-9db7-4f74-865b-503ee19a832f",
"metadata": {},
"outputs": [],
"source": [
"def chat(message, history):\n",
" messages = [{\"role\": \"system\", \"content\": system_message}] + history + [{\"role\": \"user\", \"content\": message}]\n",
"\n",
" stream = ollama.chat(model=\"llama3.2\", messages=messages, stream=True)\n",
"\n",
" result = \"\"\n",
" for chunk in stream:\n",
" result += chunk['message']['content'] or \"\"\n",
" yield result"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "61de58a0-5972-4aca-93ad-a4bd3878a50b",
"metadata": {},
"outputs": [],
"source": [
"gr.ChatInterface(fn=chat, type=\"messages\").launch()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d448f8c5-2bb5-448d-8ae4-894b905214a7",
"metadata": {},
"outputs": [],
"source": [
"system_message = \"You are a helpful assistant in a clothes store. You should try to gently encourage \\\n",
"the customer to try items that are on sale. Hats are 60% off, and most other items are 50% off. \\\n",
"For example, if the customer says 'I'm looking to buy a hat', \\\n",
"you could reply something like, 'Wonderful - we have lots of hats - including several that are part of our sales event.'\\\n",
"Encourage the customer to buy hats if they are unsure what to get.\""
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "465968cf-aa7f-46b2-857f-a6819f2b14ea",
"metadata": {},
"outputs": [],
"source": [
"gr.ChatInterface(fn=chat, type=\"messages\").launch()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "873ab86b-ecb8-4f68-b520-50b29b7fd7be",
"metadata": {},
"outputs": [],
"source": [
"system_message += \"\\nIf the customer asks for shoes, you should respond that shoes are not on sale today, \\\n",
"but remind the customer to look at hats!\""
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c63ced30-1109-4409-b255-1f72f8c6172f",
"metadata": {},
"outputs": [],
"source": [
"gr.ChatInterface(fn=chat, type=\"messages\").launch()"
]
},
{
"cell_type": "code",
"execution_count": 25,
"id": "054f1406-c240-4849-8618-064985e76d86",
"metadata": {},
"outputs": [],
"source": [
"def chat(message, history):\n",
"\n",
" global system_message\n",
" if 'belt' in message:\n",
" system_message += \" The store does not sell belts; if you are asked for belts, be sure to point out other items on sale.\"\n",
" messages = [{\"role\": \"system\", \"content\": system_message}] + history + [{\"role\": \"user\", \"content\": message}]\n",
"\n",
" stream = ollama.chat(model=\"llama3.2\", messages=messages, stream=True)\n",
"\n",
" result = \"\"\n",
" for chunk in stream:\n",
" result += chunk['message']['content'] or \"\"\n",
" yield result"
]
},
{
"cell_type": "code",
"execution_count": 26,
"id": "b1086d8a-5b5a-4b59-9a61-e76078f930cc",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"* Running on local URL: http://127.0.0.1:7869\n",
"* To create a public link, set `share=True` in `launch()`.\n"
]
},
{
"data": {
"text/html": [
"<div><iframe src=\"http://127.0.0.1:7869/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": []
},
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"gr.ChatInterface(fn=chat, type=\"messages\").launch()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c558ab19-b907-4b0c-8a4f-37c8b731f9b5",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.12"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@@ -77,19 +77,19 @@
"\n",
"gpt_system = \"You are third mate of the whaling ship Pequod. Your name is Flask. \\\n",
"You approach the practice of whaling as if trying to avenge some deep offense the whales have done to you. \\\n",
"You are chatting with Starbuck (the chief mate) and Ishmail (an oarsman)\"\n",
"You are chatting with Starbuck (the chief mate) and Ishmael (an oarsman)\"\n",
"\n",
"claude_system = \"You are the chief mate of the whaling ship Pequod. You are a thoughtful and intellectual \\\n",
"claude_system = \"You are the chief mate of the whaling ship Pequod. Your name is Starbuck. You are a thoughtful and intellectual \\\n",
"Quaker from Nantucket who considers it madness to want revenge on an animal. \\\n",
"You are chatting with two other users named Flask (the third mate) and Ishmail (an oarsman). Your name is Starbuck.\"\n",
"You are chatting with two other users named Flask (the third mate) and Ishmael (an oarsman).\"\n",
"\n",
"gemini_system = \"You are an oarsman on the Pequod (a whaling ship). You are interested in the history and mechanics \\\n",
"gemini_system = \"You are an oarsman on the Pequod (a whaling ship). They call you Ishmael. You are interested in the history and mechanics \\\n",
"of whaling and attempt to promote the nobility of the trade. \\\n",
"You are chatting with two users named Flask (third mate) and Starbuck (the chief mate). Your name is Ishmail\"\n",
"You are chatting with two users named Flask (third mate) and Starbuck (the chief mate).\"\n",
"\n",
"gpt_messages = [\"Flask: Hi there\"]\n",
"claude_messages = [\"Starbuck: Hi\"]\n",
"gemini_messages = [\"Ishmail: Ahoy\"]"
"gemini_messages = [\"Ishmael: Ahoy\"]"
]
},
{
@@ -128,16 +128,6 @@
" return completion.choices[0].message.content"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9dc6e913-02be-4eb6-9581-ad4b2cffa606",
"metadata": {},
"outputs": [],
"source": [
"call_gpt()"
]
},
{
"cell_type": "code",
"execution_count": null,
@@ -163,16 +153,6 @@
" return message.content[0].text"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "01395200-8ae9-41f8-9a04-701624d3fd26",
"metadata": {},
"outputs": [],
"source": [
"call_claude()"
]
},
{
"cell_type": "code",
"execution_count": null,
@@ -196,16 +176,6 @@
" return response.choices[0].message.content"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b83c0c0e-5c80-4499-9ca6-d621dca34ddb",
"metadata": {},
"outputs": [],
"source": [
"call_gemini()"
]
},
{
"cell_type": "code",
"execution_count": null,
@@ -213,25 +183,25 @@
"metadata": {},
"outputs": [],
"source": [
"gpt_messages = [\"Ahoy men\"]\n",
"claude_messages = [\"Hello\"]\n",
"gemini_messages = [\"Ahoy! Has seen the white whale?\"]\n",
"gpt_messages = [\"Flask: Ahoy men\"]\n",
"claude_messages = [\"Starbuck: Hello\"]\n",
"gemini_messages = [\"Ishmael: Ahoy! Has seen the White Whale?\"]\n",
"\n",
"print(f\"Flask:\\n{gpt_messages[0]}\\n\")\n",
"print(f\"Starbuck:\\n{claude_messages[0]}\\n\")\n",
"print(f\"Ishmail:\\n{gemini_messages[0]}\\n\")\n",
"print(f\"{gpt_messages[0]}\\n\")\n",
"print(f\"{claude_messages[0]}\\n\")\n",
"print(f\"{gemini_messages[0]}\\n\")\n",
"\n",
"for i in range(5):\n",
" gpt_next = call_gpt()\n",
" print(f\"Flask:\\n{gpt_next}\\n\")\n",
" print(f\"{gpt_next}\\n\")\n",
" gpt_messages.append(gpt_next)\n",
" \n",
" claude_next = call_claude()\n",
" print(f\"Starbuck:\\n{claude_next}\\n\")\n",
" print(f\"Starbuck: {claude_next}\\n\")\n",
" claude_messages.append(claude_next)\n",
"\n",
" gemini_next = call_gemini()\n",
" print(f\"Ishmail:\\n{gpt_next}\\n\")\n",
" print(f\"{gemini_next}\\n\")\n",
" gemini_messages.append(gemini_next)"
]
},

View File

@@ -0,0 +1,251 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "cc02832b-7f4b-445a-b9d6-fe79dd8f5e8e",
"metadata": {},
"source": [
"# A Cryptic City Scandal.\n",
"\n",
"## A trilogue between three chatbots that impersonate Sherlock Holmes, Dr Watson and Inspetor Lastrade, respetively."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "dc042bb9-02f2-49ca-942a-9bd17c5c20d2",
"metadata": {},
"outputs": [],
"source": [
"# Crucial imports\n",
"\n",
"import os\n",
"from dotenv import load_dotenv\n",
"from openai import OpenAI\n",
"import anthropic\n",
"from IPython.display import Markdown, display, update_display"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b60ee2d6-543d-4b1a-9d97-3f2380d35146",
"metadata": {},
"outputs": [],
"source": [
"#We load environment variables in a file called .env and print the key prefixes to help with any debugging\n",
"\n",
"load_dotenv(override=True)\n",
"openai_api_key = os.getenv('OPENAI_API_KEY')\n",
"anthropic_api_key = os.getenv('ANTHROPIC_API_KEY')\n",
"\n",
"if openai_api_key:\n",
" print(f\"OpenAI API Key exists and begins {openai_api_key[:8]}\")\n",
"else:\n",
" print(\"OpenAI API Key not set\")\n",
" \n",
"if anthropic_api_key:\n",
" print(f\"Anthropic API Key exists and begins {anthropic_api_key[:7]}\")\n",
"else:\n",
" print(\"Anthropic API Key not set\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "99e4f5e5-1098-4890-ab3e-6006a66ca875",
"metadata": {},
"outputs": [],
"source": [
"# We connect to OpenAI and Anthropic\n",
"\n",
"openai = OpenAI()\n",
"\n",
"claude = anthropic.Anthropic()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "54b650de-36e8-42f5-835d-cc2de60da1b4",
"metadata": {},
"outputs": [],
"source": [
"# We now generate a conversation between one GPT-4o-mini chatbot and two Claude-3-haiku chatbots.\n",
"# We model it upon the conversations between Sherlock Holmes, Dr Watson and Inspector Lastrade from the famous short stories and novels by Conan Doyle.\n",
"# Since Claude-3-haiku appears to be the wittier one, we assign to it the roles of Holmes and Watson.\n",
"# We don't include Gemini because of the lack of comprehension it exhibited in dozens of test runs.\n",
"\n",
"claude_model = \"claude-3-haiku-20240307\"\n",
"gpt_model = \"gpt-4o-mini\"\n",
"\n",
"\n",
"lestrade_system = \"You are Inspector Lestrade from the novels by Conan Doyle.\\\n",
"You visit Sherlock Holmes and Dr Watson at 221B Baker Street to inquire whether Holmes knows anything \\\n",
"about the recent mysterious events in the City of London, especially the disappearance billions of pounds \\\n",
"worth of crypto currency and the strange behavior of the Lord Mayor. \\\n",
"You engage in a conversation with Sherlock Holmes and Dr John Watson. \\\n",
"Any description of the situation should be given from a narrator's perspective, in the present tense and within parentheses. \\\n",
"Whatever Holmes and Watson reply within parantheses is merely a description of the situation. \\\n",
"Do not impersonate Holmes. Do not impersonate Watson.\"\n",
"\n",
"\n",
"holmes_system = \"You are the famous detective Sherlock Holmes from the novels by Conan Doyle Your best friend and housemate is Dr John Watson. \\\n",
"You engage in a witty conversation with Inspector Lestrade from Scotland Yard and Watson. \\\n",
"You believe that the mysterious events menionted by Lestrade are proof that the so-called Q-Day has occurred and that it has been brought about by a \\\n",
"successful combination of artificial general intelligence and quantum computing. \\\n",
"Moreover, you are convinced that this could be a sinister conspiracy by your arch enemy Prof Moriarty. \\\n",
"Any description of the situation should be given from a narrator's perspective, in the present tense and within parentheses. \\\n",
"Whatever Lestrade and Watson reply within parantheses is merely a description of the situation. \\\n",
"Do not impersonate Lestrade. Do not impersonate Watson.\"\n",
"\n",
"\n",
"watson_system = \"You are Dr John Watson from the novels by Conan Doyle, Sherlock Holmes's housemate and closest friend. \\\n",
"You engage in a witty conversation with Inspector Lestrade from Scotland Yard and Holmes about the events mentioned by Lestrade. \\\n",
"While you aren't totally convinced by Holmes's theory and express doubts the capabilities of Holmes's arch enemy Prof Moriarty,\\\n",
"you wonder whether Sherlock Holmes's brother, Mycroft Holmes, might be involved. \\\n",
"Any description of your behavior should be given from a narrator's perspective, in the present tense and within parentheses. \\\n",
"Whatever Lestrade and Holmes reply within parantheses is merely a description of the situation. \\\n",
"Do not impersonate Lestrade. Do not impersonate Holmes.\"\n",
"\n",
"\n",
"lestrade_messages = [\"Good evening, gentlemen! I apologize for showing up unexpectedly at this late hour!\"]\n",
"\n",
"holmes_messages = [\"Inspector Lestrade, please come in! As a matter of fact, we've been expecting you for a few hours already!\"]\n",
"\n",
"watson_messages = [\"Good evening, Inspector!\"]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "11177d3f-4649-440c-a399-5906bbd6cf17",
"metadata": {},
"outputs": [],
"source": [
"def call_lestrade():\n",
" messages = [{\"role\": \"system\", \"content\": lestrade_system}]\n",
" \n",
" for les_mes, hol_mes, wat_mes in zip(lestrade_messages, holmes_messages, watson_messages):\n",
" messages.append({\"role\": \"assistant\", \"content\": les_mes})\n",
" messages.append({\"role\": \"user\", \"content\": hol_mes})\n",
" messages.append({\"role\": \"user\", \"content\": wat_mes})\n",
"\n",
" completion = openai.chat.completions.create(\n",
" model=gpt_model,\n",
" messages=messages\n",
" )\n",
" return completion.choices[0].message.content"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7a40d11a-86ec-455d-b3c3-fdfd0776fc8a",
"metadata": {},
"outputs": [],
"source": [
"def call_holmes():\n",
" messages = []\n",
" \n",
" for les_mes, hol_mes, wat_mes in zip(lestrade_messages, holmes_messages, watson_messages):\n",
" messages.append({\"role\": \"user\", \"content\": les_mes})\n",
" messages.append({\"role\": \"assistant\", \"content\": hol_mes})\n",
" messages.append({\"role\": \"user\", \"content\": wat_mes})\n",
" \n",
" messages.append({\"role\": \"user\", \"content\": lestrade_messages[-1]})\n",
" messages.append({\"role\": \"user\", \"content\": watson_messages[-1]})\n",
"\n",
" message = claude.messages.create(\n",
" model=claude_model,\n",
" system=holmes_system,\n",
" messages=messages,\n",
" max_tokens=500\n",
" )\n",
" return message.content[0].text"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "bcc0f046-0103-4486-93be-609514ed762b",
"metadata": {},
"outputs": [],
"source": [
"def call_watson():\n",
" messages = []\n",
" \n",
" for les_mes, hol_mes, wat_mes in zip(lestrade_messages, holmes_messages, watson_messages):\n",
" messages.append({\"role\": \"user\", \"content\": les_mes})\n",
" messages.append({\"role\": \"user\", \"content\": hol_mes})\n",
" messages.append({\"role\": \"assistant\", \"content\": wat_mes})\n",
" \n",
" messages.append({\"role\": \"user\", \"content\": lestrade_messages[-1]})\n",
" messages.append({\"role\": \"user\", \"content\": holmes_messages[-1]})\n",
"\n",
" message = claude.messages.create(\n",
" model=claude_model,\n",
" system=watson_system,\n",
" messages=messages,\n",
" max_tokens=500\n",
" )\n",
" return message.content[0].text"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5b1a39e9-9783-4a2e-869f-ed3c3f2849a2",
"metadata": {},
"outputs": [],
"source": [
"print(f\"Inspector Lestrade:\\n{lestrade_messages[0]}\\n\")\n",
"print(f\"Holmes:\\n{holmes_messages[0]}\\n\")\n",
"print(f\"Dr Watson:\\n{watson_messages[0]}\\n\")\n",
"\n",
"for i in range(2):\n",
" \n",
" les_next = call_lestrade()\n",
" print(f\"Inspector Lestrade:\\n{les_next}\\n\")\n",
" lestrade_messages.append(les_next)\n",
" \n",
" hol_next = call_holmes()\n",
" print(f\"Holmes:\\n{hol_next}\\n\")\n",
" holmes_messages.append(hol_next)\n",
" \n",
" wat_next = call_watson()\n",
" print(f\"Dr Watson:\\n{wat_next}\\n\")\n",
" watson_messages.append(wat_next)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ae92e29a-8419-4f95-8b82-b60017088e47",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.12"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@@ -0,0 +1,162 @@
# Airline AI Assistant
A sophisticated AI-powered airline assistant that leverages agent-based architecture and multi-modal capabilities to provide comprehensive customer support. This project combines multiple AI technologies, including language models, vision models, and audio processing, to create an intelligent assistant capable of handling complex customer queries through various interaction modes.
## Features
### Agent-Based Architecture
- **Multi-Agent System**: Utilizes specialized agents for different tasks:
- Chat Agent: Handles conversation flow and context management
- Translation Agent: Manages multilingual support with focus on Arabic
- Vision Agent: Generates and processes visual responses
- Audio Agent: Handles voice input and speech processing
- **Tool Integration**: Each agent has access to specialized tools:
- Text Generation Tools (Ollama)
- Translation Tools (Ollama)
- Image Generation Tools (DALL-E)
- Audio Processing Tools (Whisper)
- **Context Management**: Agents maintain conversation history and context for coherent interactions
### Multi-Modal Capabilities
- **Text Processing**:
- Natural language understanding
- Context-aware responses
- Multi-language support
- **Visual Processing**:
- Image generation based on context
- Visual response to queries
- Image-to-text understanding
- **Audio Processing**:
- Voice-to-text conversion
- Multi-format audio support
- Real-time audio processing
### Core Features
- **Intelligent Chat Interface**: Context-aware conversations with memory
- **Arabic Translation**: Advanced translation capabilities with context preservation
- **Voice Interaction**: Natural voice input and processing
- **Visual Response Generation**: Contextual image generation
- **Multi-Tool Integration**: Seamless coordination between different AI tools
## Technical Architecture
### Agent System
<pre> ```
┌───────────────┐ ┌───────────────┐ ┌───────────────┐
│ Chat Agent │<────>│ Vision Agent │<────>│ Audio Agent │
└───────┬───────┘ └───────┬───────┘ └───────┬───────┘
│ │ │
▼ ▼ ▼
┌───────────────┐ ┌───────────────┐ ┌───────────────┐
│ Translation │ │ Image │ │ Audio │
│ Tools │ │ Generation │ │ Processing │
│ (Ollama) │ │ (DALL-E) │ │ (Whisper) │
└───────────────┘ └───────────────┘ └───────────────┘
``` </pre>
### Multi-Modal Flow
1. **Input Processing**:
- Text input → Chat Agent
- Voice input → Audio Agent → Chat Agent
- Image input → Vision Agent → Chat Agent
2. **Response Generation**:
- Chat Agent coordinates with other agents
- Translation Agent processes language needs
- Vision Agent generates visual responses
- Audio Agent processes voice output
## Prerequisites
Before running the application, ensure you have the following installed:
- Python 3.8 or higher
- Ollama (for local LLM support)
- FFmpeg (for audio processing)
- Required Python packages (listed in requirements.txt)
## Installation
1. Clone the repository:
```bash
git clone https://github.com/yourusername/airline_ai_assistant.git
cd airline_ai_assistant
```
2. Install the required packages:
```bash
pip install -r requirements.txt
```
3. Set up your environment variables:
```bash
# Create a .env file with your API keys
OPENAI_API_KEY=your_key_here
```
## Usage
1. Start the application:
```bash
python main.py
```
2. Access the web interface through your browser (default: http://localhost:7860)
3. Interact with the assistant:
- Type your message in the text box
- Use the microphone for voice input
- View responses in both English and Arabic
- See visual representations of responses
## Project Structure
<pre> ```
airline_ai_assistant/
├── main.py # Main application file with agent orchestration
├── agents/ # Agent implementations
│ ├── chat_agent.py # Chat handling agent
│ ├── vision_agent.py # Visual processing agent
│ ├── audio_agent.py # Audio processing agent
│ └── translation_agent.py # Translation handling agent
├── tools/ # Tool implementations
│ ├── text_tools.py # Text processing tools
│ ├── vision_tools.py # Image processing tools
│ └── audio_tools.py # Audio processing tools
├── requirements.txt # Python dependencies
├── .env # Environment variables
└── README.md # Project documentation
``` </pre>
## Key Components
### Agent System
- **Chat Agent**: Manages conversation flow and context
- **Translation Agent**: Handles multilingual support
- **Vision Agent**: Processes visual content
- **Audio Agent**: Manages voice interactions
### Tool Integration
- **Text Tools**: Language model integration
- **Vision Tools**: Image generation and processing
- **Audio Tools**: Voice processing and transcription
## Contributing
Contributions are welcome! Please feel free to submit a Pull Request.
## License
This project is licensed under the MIT License - see the LICENSE file for details.
## Acknowledgments
- OpenAI for the Whisper and DALL-E models
- Ollama for local LLM support
- Gradio for the web interface
- The open-source community for various tools and libraries
## Contact
For questions or support, please open an issue in the repository.

View File

@@ -0,0 +1,328 @@
# imports
import os
import json
from dotenv import load_dotenv
from openai import OpenAI
import gradio as gr
import base64
from io import BytesIO
from PIL import Image
from IPython.display import Audio, display
import pygame
import time
from tools import price_function, get_ticket_price, make_a_booking, booking_function
import ollama
import anthropic
from anthropic import Anthropic
import whisper
import numpy as np
# And this is included in a list of tools:
tools = [{"type": "function", "function": price_function}, {"type": "function", "function": booking_function}]
# tools = [price_function, booking_function]
# System messages
system_message = "You are a helpful assistant for an Airline called FlightAI. "
system_message += "Give short, courteous answers, no more than 1 sentence. "
system_message += "Always be accurate. If you don't know the answer, say so."
# Initialization
load_dotenv(override=True)
openai_api_key = os.getenv('OPENAI_API_KEY')
if openai_api_key:
print(f"OpenAI API Key exists and begins {openai_api_key[:8]}")
else:
print("OpenAI API Key not set")
MODEL = "gpt-4o-mini"
openai = OpenAI()
def chat(history):
messages = [{"role": "system", "content": system_message}] + history
response = openai.chat.completions.create(model=MODEL, messages=messages, tools=tools)
image = None
if response.choices[0].finish_reason == "tool_calls":
message = response.choices[0].message
response, city = handle_tool_call(message)
messages.append(message)
messages.append(response)
if message.tool_calls[0].function.name == "get_ticket_price":
# image = artist(city)
pass
response = openai.chat.completions.create(model=MODEL, messages=messages)
reply = response.choices[0].message.content
# ✅ SAFETY CHECK: Never add empty or None replies
if reply:
history.append({"role": "assistant", "content": str(reply)})
talker(reply)
else:
history.append({"role": "assistant", "content": "Sorry, no response available."})
return history, image
# We have to write that function handle_tool_call:
def handle_tool_call(message):
print(f"Handling tool call: {message}")
tool_call = message.tool_calls[0]
function_name = tool_call.function.name
arguments = json.loads(tool_call.function.arguments)
if function_name == "get_ticket_price":
city = arguments.get('destination_city')
price = get_ticket_price(city)
response = {
"role": "tool",
"content": json.dumps({"destination_city": city, "price": price}),
"tool_call_id": tool_call.id
}
return response, city
elif function_name == "make_a_booking":
city = arguments.get('destination_city')
customer_name = arguments.get('customer_name')
customer_id = arguments.get('customer_id')
booking_result = make_a_booking(city, customer_name, customer_id)
response = {
"role": "tool",
"content": json.dumps({
"destination_city": city,
"customer_name": customer_name,
"customer_id": customer_id,
"booking_result": booking_result
}),
"tool_call_id": tool_call.id
}
return response, city
else:
raise ValueError(f"Unknown function: {function_name}")
def artist(city):
image_response = openai.images.generate(
model="dall-e-3",
prompt=f"An image representing a vacation in {city}, showing tourist spots and everything unique about {city}, in a vibrant pop-art style",
size="1024x1024",
n=1,
response_format="b64_json",
)
image_base64 = image_response.data[0].b64_json
image_data = base64.b64decode(image_base64)
return Image.open(BytesIO(image_data))
def talker(message):
response = openai.audio.speech.create(
model="tts-1",
voice="onyx",
input=message)
audio_stream = BytesIO(response.content)
output_filename = f"output_audio_{time.time()}.mp3"
with open(output_filename, "wb") as f:
f.write(audio_stream.read())
# Play the generated audio
# display(Audio(output_filename, autoplay=True)) # This code is suitable for Juopyter
print(f"Created audio file at {output_filename}")
# Using pygame
pygame.init()
pygame.mixer.init()
pygame.mixer.music.load(output_filename)
pygame.mixer.music.play()
while pygame.mixer.music.get_busy():
continue
def ollama_translator(text, target_language="German"):
"""
Translates text to the specified language using Ollama.
Args:
text (str): The text to translate
target_language (str): The language to translate to (default: Arabic)
Returns:
str: The translated text
"""
try:
# Create a prompt that instructs the model to translate
prompt = f"Translate the following text to {target_language}. Only output the translation, nothing else:\n\n{text}"
response = ollama.chat(
model='llama3.2:latest', # or any other model you have installed
messages=[
{"role": "system", "content": "You are a professional translator. Translate the given text accurately."},
{"role": "user", "content": prompt}
]
)
translated_text = response['message']['content'].strip()
return translated_text
except Exception as e:
print(f"Translation error: {str(e)}")
return f"Translation failed: {str(e)}"
def translate_message(history):
"""
Translates the last message in the chat history.
Args:
history (list): List of chat messages
Returns:
str: Translated text of the last message
"""
if not history:
return ""
# Get the last message from history
last_message = history[-1]
# Extract the content from the last message
message_content = last_message.get('content', '')
if message_content:
return ollama_translator(message_content)
return ""
def clear_chat():
return [], ""
def convert_audio_to_text(audio_file_path):
"""
Converts audio to text using OpenAI's Whisper model.
Supports MP3, WAV, and other common audio formats.
Args:
audio_file_path (str): Path to the audio file
Returns:
str: Transcribed text
"""
try:
# Load the Whisper model
model = whisper.load_model("base")
# Transcribe the audio file
result = model.transcribe(audio_file_path)
# Return the transcribed text
return result["text"]
except Exception as e:
print(f"Audio transcription error: {str(e)}")
return f"Transcription failed: {str(e)}"
def handle_audio(audio_file, history):
history = history or []
if audio_file:
try:
if not os.path.exists(audio_file):
raise Exception("Audio file not found")
try:
transcribed_text = convert_audio_to_text(audio_file)
except Exception as e:
print(f"Transcription error: {str(e)}")
return history, None # 🛠️ match expected outputs
if transcribed_text:
history.append({"role": "user", "content": str(transcribed_text)})
try:
if os.path.exists(audio_file):
os.remove(audio_file)
except Exception as e:
print(f"Warning: Could not delete audio file: {str(e)}")
return history, None # ✅ return both expected outputs
except Exception as e:
print(f"Error processing audio: {str(e)}")
return history, None
return history, None
if __name__ == "__main__":
# gr.ChatInterface(fn=chat, type="messages").launch()
# talker("Hello, how are you?")
# Passing in inbrowser=True in the last line will cause a Gradio window to pop up immediately.
# print(ollama_translator("Hello, how are you?"))
# print(convert_audio_to_text("output_audio_1744898241.4550629.mp3"))
with gr.Blocks() as ui:
with gr.Row():
with gr.Column():
chatbot = gr.Chatbot(height=500, type="messages")
with gr.Row():
entry = gr.Textbox(label="Chat with our AI Assistant:")
audio_input = gr.Audio(
type="filepath",
label="Or speak your message:",
interactive=True,
format="wav",
# source="microphone"
)
clear = gr.Button("Clear")
with gr.Column():
translation_output = gr.Textbox(label="Translation (Arabic):", lines=5)
image_output = gr.Image(height=500)
def do_entry(message, history):
history = history or []
if message:
history.append({"role": "user", "content": str(message)})
return "", history
def translate_message(history):
if not history:
return ""
last_message = history[-1]
message_content = last_message.get('content', '')
if message_content:
return ollama_translator(message_content)
return ""
def clear_chat():
return [], ""
# Handle text input
entry.submit(do_entry, inputs=[entry, chatbot], outputs=[entry, chatbot]).then(
chat, inputs=chatbot, outputs=[chatbot, image_output]
).then(
translate_message, inputs=chatbot, outputs=translation_output
)
# Handle audio input
audio_input.stop_recording(
handle_audio, inputs=[audio_input, chatbot], outputs=[chatbot, image_output]
).then(
chat, inputs=chatbot, outputs=[chatbot, image_output]
).then(
translate_message, inputs=chatbot, outputs=translation_output
)
clear.click(clear_chat, inputs=None, outputs=[chatbot, translation_output])
ui.launch(inbrowser=False)

View File

@@ -0,0 +1,72 @@
aiofiles==24.1.0
annotated-types==0.7.0
anyio==4.9.0
asttokens==3.0.0
certifi==2025.1.31
charset-normalizer==3.4.1
click==8.1.8
colorama==0.4.6
decorator==5.2.1
distro==1.9.0
executing==2.2.0
fastapi==0.115.12
ffmpy==0.5.0
filelock==3.18.0
fsspec==2025.3.2
gradio==5.25.2
gradio_client==1.8.0
groovy==0.1.2
h11==0.14.0
httpcore==1.0.8
httpx==0.28.1
huggingface-hub==0.30.2
idna==3.10
ipython==8.35.0
ipython_pygments_lexers==1.1.1
jedi==0.19.2
Jinja2==3.1.6
jiter==0.9.0
markdown-it-py==3.0.0
MarkupSafe==3.0.2
matplotlib-inline==0.1.7
mdurl==0.1.2
numpy==2.2.4
openai==1.74.0
orjson==3.10.16
packaging==24.2
pandas==2.2.3
parso==0.8.4
pillow==11.2.1
prompt_toolkit==3.0.51
pure_eval==0.2.3
pydantic==2.11.3
pydantic_core==2.33.1
pydub==0.25.1
pygame==2.6.1
Pygments==2.19.1
python-dateutil==2.9.0.post0
python-dotenv==1.1.0
python-multipart==0.0.20
pytz==2025.2
PyYAML==6.0.2
requests==2.32.3
rich==14.0.0
ruff==0.11.5
safehttpx==0.1.6
semantic-version==2.10.0
shellingham==1.5.4
six==1.17.0
sniffio==1.3.1
stack-data==0.6.3
starlette==0.46.2
tomlkit==0.13.2
tqdm==4.67.1
traitlets==5.14.3
typer==0.15.2
typing-inspection==0.4.0
typing_extensions==4.13.2
tzdata==2025.2
urllib3==2.4.0
uvicorn==0.34.1
wcwidth==0.2.13
websockets==15.0.1

View File

@@ -0,0 +1,57 @@
# Let's start by making a useful function
ticket_prices = {"london": "$799", "paris": "$899", "tokyo": "$1400", "berlin": "$499"}
def get_ticket_price(destination_city):
print(f"Tool get_ticket_price called for {destination_city}")
city = destination_city.lower()
return ticket_prices.get(city, "Unknown")
def make_a_booking(destination_city, customer_name, customer_id):
print(f"Tool make_a_booking called for {destination_city}")
city = destination_city.lower()
print(f"Customer name: {customer_name}, Customer ID: {customer_id}")
return True
# There's a particular dictionary structure that's required to describe our function:
price_function = {
"name": "get_ticket_price",
"description": "Get the price of a return ticket to the destination city. Call this whenever you need to know the ticket price, for example when a customer asks 'How much is a ticket to this city'",
"parameters": {
"type": "object",
"properties": {
"destination_city": {
"type": "string",
"description": "The city that the customer wants to travel to",
},
},
"required": ["destination_city"],
"additionalProperties": False
}
}
booking_function = {
"name": "make_a_booking",
"description": "Make a booking for a customer to a destination city. Call this when a customer wants to book a flight. You can get the customer's name and ID by directly asking the customer. For example, you can say 'What is your name?' or 'What is your ID?'",
"parameters": {
"type": "object",
"properties": {
"destination_city": {
"type": "string",
"description": "The city that the customer wants to travel to",
},
"customer_name": {
"type": "string",
"description": "The name of the customer making the booking",
},
"customer_id": {
"type": "string",
"description": "The unique identifier for the customer",
}
},
"required": ["destination_city", "customer_name", "customer_id"],
"additionalProperties": False
}
}

View File

@@ -0,0 +1,346 @@
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": [],
"authorship_tag": "ABX9TyMlDthhM8w5NIUNYffwmHfr",
"include_colab_link": true
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
}
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
"<a href=\"https://colab.research.google.com/github/sngo/llms-practice/blob/main/taskmanagement/TaskManagement.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"id": "GM-U1_ArmVFO"
},
"outputs": [],
"source": [
"!pip install -q gradio>=4.0.0"
]
},
{
"cell_type": "code",
"source": [
"import gradio as gr\n",
"import openai\n",
"import json\n",
"import html\n",
"from openai import OpenAI\n",
"import random\n",
"import datetime\n",
"from google.colab import userdata"
],
"metadata": {
"id": "FciiJrKSmfOV"
},
"execution_count": 8,
"outputs": []
},
{
"cell_type": "code",
"source": [
"api_key = userdata.get('OPENAI_API_KEY')\n",
"client = OpenAI(api_key=api_key)\n",
"model = \"gpt-4.1-mini\""
],
"metadata": {
"id": "7fPLICmznVur"
},
"execution_count": 24,
"outputs": []
},
{
"cell_type": "code",
"source": [
"def abcd_taskTool():\n",
" tasks = [\n",
" {\"taskId\": \"T001\", \"accNo\": \"1234567890\", \"status\": \"Pending\", \"description\": \"Update account details\"},\n",
" {\"taskId\": \"T002\", \"accNo\": \"9876543210\", \"status\": \"In Progress\", \"description\": \"Verify transaction\"}\n",
" ]\n",
"\n",
" probability_of_tasks = 0.8\n",
" if random.random() < probability_of_tasks:\n",
" return tasks\n",
" else:\n",
" return []"
],
"metadata": {
"id": "zyXLchykmoW_"
},
"execution_count": 35,
"outputs": []
},
{
"cell_type": "code",
"source": [
"def abcd_NotifyTool():\n",
" # Mock notification function\n",
" return True"
],
"metadata": {
"id": "h4J8HOOJm78X"
},
"execution_count": 11,
"outputs": []
},
{
"cell_type": "code",
"source": [
"# Tool definitions for OpenAI\n",
"tools = [\n",
" {\n",
" \"type\": \"function\",\n",
" \"function\": {\n",
" \"name\": \"abcd_taskTool\",\n",
" \"description\": \"Retrieve a list of ABCD tasks.\",\n",
" \"parameters\": {}\n",
" }\n",
" },\n",
" {\n",
" \"type\": \"function\",\n",
" \"function\": {\n",
" \"name\": \"abcd_NotifyTool\",\n",
" \"description\": \"Notify the support team about tasks.\",\n",
" \"parameters\": {}\n",
" }\n",
" }\n",
"]"
],
"metadata": {
"id": "KOtTI6PunNJ7"
},
"execution_count": 12,
"outputs": []
},
{
"cell_type": "code",
"source": [
"# Function to mask first four digits of accNo\n",
"def mask_accNo(accNo):\n",
" return \"****\" + accNo[4:] if len(accNo) >= 4 else accNo"
],
"metadata": {
"id": "p7wU9CRBnTAS"
},
"execution_count": 13,
"outputs": []
},
{
"cell_type": "code",
"source": [
"# Function to format tasks as an HTML table\n",
"def format_tasks_as_table(tasks):\n",
" if not tasks:\n",
" return \"No tasks found.\"\n",
"\n",
" table = \"<table border='1' style='border-collapse: collapse; width: 100%;'>\"\n",
" table += \"<tr><th>Task ID</th><th>Account Number</th><th>Status</th><th>Description</th></tr>\"\n",
" for task in tasks:\n",
" table += \"<tr>\"\n",
" table += f\"<td>{html.escape(task.get('taskId', ''))}</td>\"\n",
" table += f\"<td>{html.escape(mask_accNo(task.get('accNo', '')))}</td>\"\n",
" table += f\"<td>{html.escape(task.get('status', ''))}</td>\"\n",
" table += f\"<td>{html.escape(task.get('description', ''))}</td>\"\n",
" table += \"</tr>\"\n",
" table += \"</table>\"\n",
" return table"
],
"metadata": {
"id": "b4GLrDLyntA7"
},
"execution_count": 14,
"outputs": []
},
{
"cell_type": "code",
"source": [
"# Chat function to handle user input and bot responses with tool calling\n",
"def chat_function(message, history):\n",
" # Append user message to history\n",
" print(f\"User message: {message}\")\n",
"\n",
" history.append([message, None])\n",
" print(f\"History: {history}\")\n",
"\n",
" # Prepare messages for OpenAI API\n",
" messages = [{\"role\": \"system\", \"content\": (\n",
" \"You are a helpful assistant. When the user asks about checking ABCD tasks, call the abcd_taskTool function. \"\n",
" \"If tasks are returned, ask the user if they want to notify the support team. \"\n",
" \"If the user agrees to notify, call the abcd_NotifyTool function. \"\n",
" \"If no tasks are found, respond with a cheerful message. \"\n",
" \"If the user declines notification, respond creatively.\"\n",
" )}]\n",
" for user_msg, bot_msg in history[:-1]:\n",
" if user_msg:\n",
" messages.append({\"role\": \"user\", \"content\": user_msg})\n",
" if bot_msg:\n",
" messages.append({\"role\": \"assistant\", \"content\": bot_msg})\n",
" messages.append({\"role\": \"user\", \"content\": message})\n",
"\n",
" # Handle state for notification response\n",
" state = history[-2][1] if len(history) > 1 else None\n",
" if state and \"Would you like to notify the support team?\" in state:\n",
" if message.lower() in [\"yes\", \"y\", \"yeah\", \"sure\", \"ok\", \"notify\", \"yep\"]:\n",
" # Simulate tool call for notification\n",
" notify_result = abcd_NotifyTool()\n",
" response = \"The support team has been notified successfully! They'll take it from here. 😊\"\n",
" history[-1][1] = response\n",
" yield history, \"normal\", \"\"\n",
" return\n",
" else:\n",
" response = (\n",
" \"Alright, we'll hold off on notifying the team. 🌟 \"\n",
" \"How about we explore something fun, like planning your next big adventure?\"\n",
" )\n",
" history[-1][1] = response\n",
" yield history, \"normal\", \"\"\n",
" return\n",
"\n",
" try:\n",
" # Call OpenAI API with tool calling\n",
" response = client.chat.completions.create(\n",
" model= model,\n",
" messages=messages,\n",
" tools=tools,\n",
" tool_choice=\"auto\",\n",
" max_tokens=300\n",
" )\n",
"\n",
" # Process response\n",
" response_message = response.choices[0].message\n",
" tool_calls = response_message.tool_calls\n",
"\n",
" if tool_calls:\n",
" for tool_call in tool_calls:\n",
" function_name = tool_call.function.name\n",
" if function_name == \"abcd_taskTool\":\n",
" tasks = abcd_taskTool()\n",
" task_count = len(tasks)\n",
"\n",
" if task_count > 0:\n",
" table = format_tasks_as_table(tasks)\n",
" response = (\n",
" f\"There are {task_count} tasks found:<br>{table}<br>\"\n",
" \"Would you like to notify the support team? (Yes/No)\"\n",
" )\n",
" history[-1][1] = response\n",
" print(f\"History after tool call: {history}\")\n",
" print(f\"History at -1, 1: {history[-1][1]}\")\n",
" yield history, \"waiting_for_notify_response\", \"\"\n",
" else:\n",
" response = (\n",
" \"Hooray! No ABCD tasks to worry about! 🎈 \"\n",
" \"You're free as a bird—any fun plans for the day?\"\n",
" )\n",
" history[-1][1] = response\n",
" yield history, \"normal\", \"\"\n",
" elif function_name == \"abcd_NotifyTool\":\n",
" abcd_NotifyTool()\n",
" response = \"The support team has been notified successfully! They'll take it from here. 😊\"\n",
" history[-1][1] = response\n",
" yield history, \"normal\", \"\"\n",
" else:\n",
" # No tool calls, use the LLM's response\n",
" bot_response = response_message.content\n",
" history[-1][1] = bot_response\n",
" yield history, \"normal\", \"\"\n",
"\n",
" except Exception as e:\n",
" history[-1][1] = f\"Error: {str(e)}\"\n",
" yield history, \"normal\", \"\""
],
"metadata": {
"id": "m9MN8vFDnwsA"
},
"execution_count": 48,
"outputs": []
},
{
"cell_type": "code",
"source": [
"# Custom CSS for table and chat styling\n",
"custom_css = \"\"\"\n",
"table {\n",
" font-family: Arial, sans-serif;\n",
" margin: 10px 0;\n",
"}\n",
"th, td {\n",
" padding: 8px;\n",
" text-align: left;\n",
"}\n",
"th {\n",
" background-color: #f2f2f2;\n",
"}\n",
"tr:nth-child(even) {\n",
" background-color: #f9f9f9;\n",
"}\n",
"\"\"\""
],
"metadata": {
"id": "X3Egf6jAn3uP"
},
"execution_count": 31,
"outputs": []
},
{
"cell_type": "code",
"source": [
"# Create Gradio interface\n",
"with gr.Blocks(css=custom_css) as demo:\n",
" chatbot = gr.Chatbot()\n",
" msg = gr.Textbox(placeholder=\"Type your message here...\")\n",
" state = gr.State(value=\"normal\")\n",
"\n",
" msg.submit(\n",
" chat_function,\n",
" inputs=[msg, chatbot],\n",
" outputs=[chatbot, state, msg]\n",
" )"
],
"metadata": {
"id": "g9ctQlBOoDvi"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"demo.launch(debug=True, share=True)"
],
"metadata": {
"id": "DG5gjlXloHhj"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [],
"metadata": {
"id": "GiWUiQNJxPSq"
},
"execution_count": null,
"outputs": []
}
]
}

View File

@@ -0,0 +1,13 @@
This code implements a Gradio chat application that integrates with OpenAI models for the chat functionality, with special handling for ABCD tasks.
# Main Features
1. **General Chat**: Use OpenAI's GPT to handle normal conversation.
2. **Task Checking**: When users mention "check ABCD tasks" (or similar phrases), the app calls the abcd_taskTool() function.
3. **Account Number Masking**: Masks the first four digits of account number with "XXXX".
4. **Task Display**: in HTML table.
5. **Support Notification**: Offers to notify support team and calls abcd_NotifyTool() if user confirms.
6. **Cheerful Responses**: Provides rando encouraging messages when no tasks are found.
## Screenshot
![Chat1](https://github.com/sngo/llms-practice/blob/main/taskmanagement/chat1.png)
![Chat2](https://github.com/sngo/llms-practice/blob/main/taskmanagement/chat2.png)

View File

@@ -0,0 +1,170 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "eff19e8b-000a-4327-b8fb-8fd8a3caaef5",
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"from dotenv import load_dotenv\n",
"from openai import OpenAI\n",
"import anthropic\n",
"from IPython.display import Markdown, display, update_display\n",
"import ollama"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "8278c99b-d748-42e5-a991-690a791ed081",
"metadata": {},
"outputs": [],
"source": [
"# Let's make a conversation between GPT-4o-mini and Claude-3-haiku\n",
"# We're using cheap versions of models so the costs will be minimal\n",
"\n",
"llama_model = \"llama3.2\"\n",
"deepseek_model = \"deepseek-r1\"\n",
"\n",
"llama_system = \"You are a chatbot who is very argumentative; \\\n",
"you disagree with anything in the conversation and you challenge everything, in a snarky way.\"\n",
"\n",
"deepseek_system = \"You are a very polite, courteous chatbot. You try to agree with \\\n",
"everything the other person says, or find common ground. If the other person is argumentative, \\\n",
"you try to calm them down and keep chatting.\"\n",
"\n",
"llama_messages = [\"Hi there\"]\n",
"deepseek_messages = [\"Hi\"]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "49523e56-0de8-4014-85d5-8aab438d2075",
"metadata": {},
"outputs": [],
"source": [
"def call_llama():\n",
" messages = [{\"role\": \"system\", \"content\": llama_system}]\n",
" for llama, deepseek in zip(llama_messages, deepseek_messages):\n",
" messages.append({\"role\": \"assistant\", \"content\": llama})\n",
" messages.append({\"role\": \"user\", \"content\": deepseek})\n",
" completion = ollama.chat(\n",
" model=llama_model,\n",
" messages=messages\n",
" )\n",
" return completion['message']['content']"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "785240ce-e704-44ff-90cb-e5c0476454a4",
"metadata": {},
"outputs": [],
"source": [
"call_llama()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "cdba39e3-5543-4657-bc3a-259f586ba392",
"metadata": {},
"outputs": [],
"source": [
"def call_deepseek():\n",
" messages = []\n",
" for llama, deepseek in zip(llama_messages, deepseek_messages):\n",
" messages.append({\"role\": \"user\", \"content\": llama})\n",
" messages.append({\"role\": \"assistant\", \"content\": deepseek})\n",
" messages.append({\"role\": \"user\", \"content\": llama_messages[-1]})\n",
" message = ollama.chat(\n",
" model=deepseek_model,\n",
" options={\n",
" \"system\":deepseek_system,\n",
" \"max_tokens\":500\n",
" },\n",
" messages=messages\n",
" \n",
" )\n",
" \n",
" return message['message']['content']"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "641df7ac-625c-41fa-b780-3130eef93a85",
"metadata": {},
"outputs": [],
"source": [
"call_deepseek()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4b33b98e-8d17-45e8-b2a9-a070dc0a6780",
"metadata": {},
"outputs": [],
"source": [
"call_llama()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "47912582-51fe-401c-b4ad-12483068adea",
"metadata": {},
"outputs": [],
"source": [
"llama_messages = [\"Hi there\"]\n",
"deepseek_messages = [\"Hi\"]\n",
"\n",
"print(f\"Llama:\\n{llama_messages[0]}\\n\")\n",
"print(f\"Deepseek:\\n{deepseek_messages[0]}\\n\")\n",
"\n",
"for i in range(5):\n",
" llama_next = call_llama()\n",
" print(f\"Llama:\\n{llama_next}\\n\")\n",
" llama_messages.append(llama_next)\n",
" \n",
" deepseek_next = call_deepseek()\n",
" print(f\"Deepseek:\\n{deepseek_next}\\n\")\n",
" deepseek_messages.append(deepseek_next)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f3c41b0c-4358-4d84-a479-6409fa331119",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.12"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@@ -0,0 +1,148 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "d768c9b1-c5a7-417a-9fac-5fcbd6944fe6",
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"from dotenv import load_dotenv\n",
"from openai import OpenAI\n",
"import anthropic\n",
"from IPython.display import Markdown, display, update_display\n",
"import ollama"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "80a4e740-75d0-4272-b02e-0b77b0a143ae",
"metadata": {},
"outputs": [],
"source": [
"system_message = \"You are an assistant that is great at telling jokes\"\n",
"user_prompt = \"Tell a light-hearted joke for an audience of Data Scientists\""
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f2ef28e5-5073-4065-b066-387181df063a",
"metadata": {},
"outputs": [],
"source": [
"prompts = [\n",
" {\"role\": \"system\", \"content\": system_message},\n",
" {\"role\": \"user\", \"content\": user_prompt}\n",
" ]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d54e910a-cdbf-49cb-9924-265d9845d622",
"metadata": {},
"outputs": [],
"source": [
"#direct display wihtout streaming\n",
"response = ollama.chat(\n",
" model=\"llama3.2\",\n",
" messages=prompts,\n",
" options={\n",
" \"temperature\": 0.7\n",
" }\n",
" \n",
" )\n",
"result = response['message']['content']\n",
"display(Markdown(result))\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "47dd7965-fdfc-4472-b2f6-c98f755964f1",
"metadata": {},
"outputs": [],
"source": [
"#This is with streaming \n",
"stream = ollama.chat(\n",
" model=\"llama3.2\",\n",
" messages=prompts,\n",
" stream=True\n",
" )\n",
"response = \"\"\n",
"display_handle = display(Markdown(\"\"), display_id=True)\n",
"for chunk in stream:\n",
" content = chunk.get('message', {}).get('content', '')\n",
" if content:\n",
" response += content.replace(\"```\", \"\")\n",
" update_display(Markdown(response), display_id=display_handle.display_id)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ef13e3ae-bde7-4e3a-9fcd-0a9bfd1caef0",
"metadata": {},
"outputs": [],
"source": [
"#direct display wihtout streaming, using deepseek-r1\n",
"response = ollama.chat(\n",
" model=\"deepseek-r1\",\n",
" messages=prompts\n",
" \n",
" )\n",
"result = response['message']['content']\n",
"display(Markdown(result))\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ddc4fe91-3d5b-4d45-83bf-f349597c672c",
"metadata": {},
"outputs": [],
"source": [
"#direct display wihtout streaming, using qwen3\n",
"response = ollama.chat(\n",
" model=\"qwen3\",\n",
" messages=prompts\n",
" \n",
" )\n",
"result = response['message']['content']\n",
"display(Markdown(result))\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "2beb6731-41e3-42a4-a8d3-5f0ef644f2f3",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.12"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@@ -0,0 +1,347 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "05617f71-449f-42c5-905c-f080d61520ec",
"metadata": {},
"outputs": [],
"source": [
"import gradio as gr\n",
"def greet(name):\n",
" return \"Hello \" + name + \"!\"\n",
"def shout(name):\n",
" return name.upper()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c57765d7-5d69-4332-be71-2800296ca8ed",
"metadata": {},
"outputs": [],
"source": [
"#demo = gr.Interface(fn=shout, inputs=gr.Textbox(), outputs=gr.Textbox()) //this works too\n",
"demo = gr.Interface(fn=greet, inputs=\"textbox\", outputs=\"textbox\",allow_flagging=\"never\")\n",
"demo.launch()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "abbc237a-8da2-4993-b350-8f8a7d807242",
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"import requests\n",
"from bs4 import BeautifulSoup\n",
"from typing import List\n",
"from dotenv import load_dotenv\n",
"from openai import OpenAI\n",
"import ollama"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9f021005-2a39-42ec-b671-b24babd0ef1a",
"metadata": {},
"outputs": [],
"source": [
"system_message = \"You are a helpful assistant\""
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d1677645-4166-4d77-8567-cae77120f1c3",
"metadata": {},
"outputs": [],
"source": [
"def message_llama(prompt):\n",
" messages = [\n",
" {\"role\": \"system\", \"content\": system_message},\n",
" {\"role\": \"user\", \"content\": prompt}\n",
" ]\n",
" completion = ollama.chat(\n",
" model='llama3.2',\n",
" messages=messages,\n",
" )\n",
" return completion['message']['content']"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "33295d15-f4d2-4588-9400-3c1e3c6492f2",
"metadata": {},
"outputs": [],
"source": [
"message_llama(\"what is the date today\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "38e2594e-6a70-4832-b601-60a6a0d4d671",
"metadata": {},
"outputs": [],
"source": [
"def stream_llama(prompt):\n",
" messages = [\n",
" {\"role\": \"system\", \"content\": system_message},\n",
" {\"role\": \"user\", \"content\": prompt}\n",
" ]\n",
" stream = ollama.chat(\n",
" model='llama3.2',\n",
" messages=messages,\n",
" stream=True\n",
" )\n",
" result = \"\"\n",
" for chunk in stream:\n",
" result += chunk['message']['content'] or \"\"\n",
" yield result\n",
" "
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e0ebf588-3d69-4012-9719-23d11fbbf4f5",
"metadata": {},
"outputs": [],
"source": [
"def stream_deepseek(prompt):\n",
" messages = [\n",
" {\"role\": \"system\", \"content\": system_message},\n",
" {\"role\": \"user\", \"content\": prompt}\n",
" ]\n",
" stream = ollama.chat(\n",
" model='deepseek-r1',\n",
" messages=messages,\n",
" stream=True\n",
" )\n",
" result = \"\"\n",
" for chunk in stream:\n",
" result += chunk['message']['content'] or \"\"\n",
" yield result"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7db5aa24-b608-489a-ba26-1a4b627658e2",
"metadata": {},
"outputs": [],
"source": [
"def stream_qwen3(prompt):\n",
" messages = [\n",
" {\"role\": \"system\", \"content\": system_message},\n",
" {\"role\": \"user\", \"content\": prompt}\n",
" ]\n",
" stream = ollama.chat(\n",
" model='qwen3',\n",
" messages=messages,\n",
" stream=True\n",
" )\n",
" result = \"\"\n",
" for chunk in stream:\n",
" result += chunk['message']['content'] or \"\"\n",
" yield result"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d37b5df8-b281-4096-bdc7-5c6a1872cea7",
"metadata": {},
"outputs": [],
"source": [
"def stream_model(prompt, model):\n",
" if model==\"llama3.2\":\n",
" result = stream_llama(prompt)\n",
" elif model==\"deepseek-r1\":\n",
" result = stream_deepseek(prompt)\n",
" else:\n",
" raise ValueError(\"Unknown model\")\n",
" yield from result"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "eb408edc-6a83-4725-9fb9-1b95ff0c9ed0",
"metadata": {},
"outputs": [],
"source": [
"gr.Interface(fn=stream_model, inputs=[gr.Textbox(label=\"Your Message\"),gr.Dropdown([\"llama3.2\", \"deepseek-r1\"], label=\"Select model\", value=\"llama3.2\")], outputs=[gr.Markdown(label=\"Response\")],flagging_mode=\"never\").launch()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "dc7c3aa0-693a-43a0-8f5b-b07c66bb6733",
"metadata": {},
"outputs": [],
"source": [
"gr.Interface(fn=stream_llama, inputs=[gr.Textbox(label=\"Your Message\")], outputs=[gr.Markdown(label=\"Response\")],flagging_mode=\"never\").launch()"
]
},
{
"cell_type": "code",
"execution_count": 38,
"id": "e45e9b56-5c2f-4b17-bbf4-5691ce35ff15",
"metadata": {},
"outputs": [],
"source": [
"class Website:\n",
" url: str\n",
" title: str\n",
" text: str\n",
"\n",
" def __init__(self, url):\n",
" self.url = url\n",
" response = requests.get(url)\n",
" self.body = response.content\n",
" soup = BeautifulSoup(self.body, 'html.parser')\n",
" self.title = soup.title.string if soup.title else \"No title found\"\n",
" for irrelevant in soup.body([\"script\", \"style\", \"img\", \"input\"]):\n",
" irrelevant.decompose()\n",
" self.text = soup.body.get_text(separator=\"\\n\", strip=True)\n",
"\n",
" def get_contents(self):\n",
" return f\"Webpage Title:\\n{self.title}\\nWebpage Contents:\\n{self.text}\\n\\n\""
]
},
{
"cell_type": "code",
"execution_count": 55,
"id": "f9fcf30e-09c7-4f90-8bf9-8cc588ede95c",
"metadata": {},
"outputs": [],
"source": [
"system_message = \"You are an assistant that analyzes the contents of a company website landing page \\\n",
"and creates a short brochure about the company for prospective customers, investors and recruits. Respond in markdown.\"\n",
"# For Fun\n",
"tone_description_fun = \"\"\"\n",
" The tone should be:\n",
" - **Fun and Playful:** Inject humor, use lighthearted language, and maintain an upbeat vibe.\n",
" - **Energetic:** Use active voice, strong verbs, and occasional exclamation points.\n",
" - **Approachable:** Write as if speaking to a friend, using slightly informal language and contractions.\n",
" - **Creative:** Think outside the box for descriptions and calls to action.\n",
" - Avoid sounding childish or overly silly.\n",
"\"\"\"\n",
"\n",
"# For Aggression\n",
"tone_description_aggression = \"\"\"\n",
" The tone should be:\n",
" - **Bold and Assertive:** Use strong, direct language that conveys confidence and power.\n",
" - **Challenging:** Pose questions that make the reader reconsider their current solutions.\n",
" - **Urgent:** Imply a need for immediate action and emphasize competitive advantages.\n",
" - **Direct and Punchy:** Employ short, impactful sentences and strong calls to action.\n",
" - **Dominant:** Position the company as a leader and a force to be reckoned with.\n",
" - Avoid being rude, offensive, or overly hostile. Focus on competitive intensity.\n",
"\"\"\""
]
},
{
"cell_type": "code",
"execution_count": 66,
"id": "83dd8aec-f74f-452b-90cc-3ad5bc903037",
"metadata": {},
"outputs": [],
"source": [
"def stream_brochure(company_name, url, model, tone):\n",
" prompt = f\"Please generate a company brochure for {company_name} that embodies the following tone and style guidelines: {tone}. Here is their landing page:\\n\"\n",
" prompt += Website(url).get_contents()\n",
" if model==\"llama\":\n",
" result = stream_llama(prompt)\n",
" elif model==\"deepseek\":\n",
" result = stream_deepseek(prompt)\n",
" else:\n",
" raise ValueError(\"Unknown model\")\n",
" yield from result"
]
},
{
"cell_type": "code",
"execution_count": 67,
"id": "ef1a246f-a3f7-457e-a85c-2076b407f52a",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"* Running on local URL: http://127.0.0.1:7890\n",
"* To create a public link, set `share=True` in `launch()`.\n"
]
},
{
"data": {
"text/html": [
"<div><iframe src=\"http://127.0.0.1:7890/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": []
},
"execution_count": 67,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"view = gr.Interface(\n",
" fn=stream_brochure,\n",
" inputs=[\n",
" gr.Textbox(label=\"Company name:\"),\n",
" gr.Textbox(label=\"Landing page URL including http:// or https://\"),\n",
" gr.Dropdown([\"llama\", \"deepseek\"], label=\"Select model\"),\n",
" gr.Dropdown([\"tone_description_fun\", \"tone_description_aggression\"])],\n",
" outputs=[gr.Markdown(label=\"Brochure:\")],\n",
" \n",
" flagging_mode=\"never\"\n",
")\n",
"view.launch()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0659a1dc-a00b-4cbf-b5ed-d6661fbb57f2",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.12"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@@ -0,0 +1,799 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "830865a5",
"metadata": {},
"source": [
"# Code Generator - Windows\n",
"\n",
"Re-codes day4 excercise to build on windows\n",
"\n",
"### Section 1: Manually Generate and Execute CPP Code"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f8559090",
"metadata": {},
"outputs": [],
"source": [
"# imports\n",
"\n",
"import os\n",
"import io\n",
"import sys\n",
"import json\n",
"import requests\n",
"from dotenv import load_dotenv\n",
"from openai import OpenAI\n",
"import google.generativeai\n",
"import anthropic\n",
"from IPython.display import Markdown, display, update_display\n",
"import gradio as gr\n",
"import subprocess\n",
"from huggingface_hub import login, InferenceClient\n",
"from transformers import AutoTokenizer\n",
"import platform"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "858b5e7b",
"metadata": {},
"outputs": [],
"source": [
"# environment\n",
"\n",
"load_dotenv(override=True)\n",
"os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY', 'your-key-if-not-using-env')\n",
"os.environ['ANTHROPIC_API_KEY'] = os.getenv('ANTHROPIC_API_KEY', 'your-key-if-not-using-env')\n",
"os.environ['HF_TOKEN'] = os.getenv('HF_TOKEN', 'your-key-if-not-using-env')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "98c45ca8",
"metadata": {},
"outputs": [],
"source": [
"# initialize\n",
"\n",
"openai = OpenAI()\n",
"claude = anthropic.Anthropic()\n",
"hf_token = os.environ['HF_TOKEN']\n",
"login(hf_token, add_to_git_credential=True)\n",
"\n",
"# models\n",
"OPENAI_MODEL = \"gpt-4o\"\n",
"CLAUDE_MODEL = \"claude-3-5-sonnet-20240620\"\n",
"CODE_QWEN_MODEL = \"Qwen/CodeQwen1.5-7B-Chat\"\n",
"CODE_GEMMA_MODEL = \"Gemini/CodeGemma-7B-IT\"\n",
"\n",
"# huggingface inference clients\n",
"# CODE_QWEN_URL = \"https://h1vdol7jxhje3mpn.us-east-1.aws.endpoints.huggingface.cloud\"\n",
"CODE_QWEN_URL = \"https://mb4mgfmpql2yrady.us-east-1.aws.endpoints.huggingface.cloud\"\n",
"CODE_GEMMA_URL = \"https://c5hggiyqachmgnqg.us-east-1.aws.endpoints.huggingface.cloud\"\n",
"\n",
"# path to your visual studio build tools VsDevCmd.bat file - initialize environment\n",
"VISUAL_STUDIO_BUILD_TOOLS_PATH = \"C:\\\\Program Files (x86)\\\\Microsoft Visual Studio\\\\2022\\\\BuildTools\\\\Common7\\\\Tools\\\\VsDevCmd.bat\"\n",
"\n",
"# prefix of the output files, e.g., \"optimized_d4win.cpp\", \"optimized_d4win.exe\", etc.\n",
"OUTPUT_FILE_NAME_BASE = \"optimized_d4win\""
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ac5efb13",
"metadata": {},
"outputs": [],
"source": [
"# System message\n",
"\n",
"system_message = \"You are an assistant that reimplements Python code in high performance C++ for an M1 Mac. \"\n",
"system_message += \"Respond only with C++ code; use comments sparingly and do not provide any explanation other than occasional comments. \"\n",
"system_message += \"The C++ response needs to produce an identical output in the fastest possible time. Keep implementations of random number generators identical so that results match exactly.\""
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ce3ce9da",
"metadata": {},
"outputs": [],
"source": [
"# build user prompts and message structures for openai and claude models\n",
"\n",
"def user_prompt_for(python):\n",
" user_prompt = \"Rewrite this Python code in C++ with the fastest possible implementation that produces identical output in the least time. \"\n",
" user_prompt += \"Respond only with C++ code; do not explain your work other than a few comments. \"\n",
" user_prompt += \"Pay attention to number types to ensure no int overflows. Remember to #include all necessary C++ packages such as iomanip.\\n\\n\"\n",
" user_prompt += python\n",
" return user_prompt\n",
"\n",
"def messages_for(python):\n",
" return [\n",
" {\"role\": \"system\", \"content\": system_message},\n",
" {\"role\": \"user\", \"content\": user_prompt_for(python)}\n",
" ]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1ea427ef",
"metadata": {},
"outputs": [],
"source": [
"# write to a file called <OUTPUT_FILE_NAME>.cpp\n",
"\n",
"def write_output(cpp):\n",
" code = cpp.replace(\"```cpp\",\"\").replace(\"```\",\"\")\n",
" with open(f\"{OUTPUT_FILE_NAME_BASE}.cpp\", \"w\") as f:\n",
" f.write(code)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4d1198b4",
"metadata": {},
"outputs": [],
"source": [
"# reade the contents of the output file called <OUTPUT_FILE_NAME>.cpp\n",
"def read_output():\n",
" with open(f\"{OUTPUT_FILE_NAME_BASE}.cpp\", \"r\") as f:\n",
" return f.read()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7d3d9650",
"metadata": {},
"outputs": [],
"source": [
"# optimize code using openai and claude\n",
"\n",
"def optimize_gpt(python): \n",
" stream = openai.chat.completions.create(model=OPENAI_MODEL, messages=messages_for(python), stream=True)\n",
" reply = \"\"\n",
" for chunk in stream:\n",
" fragment = chunk.choices[0].delta.content or \"\"\n",
" reply += fragment\n",
" print(fragment, end='', flush=True)\n",
" write_output(reply)\n",
" \n",
"def optimize_claude(python):\n",
" result = claude.messages.stream(\n",
" model=CLAUDE_MODEL,\n",
" max_tokens=2000,\n",
" system=system_message,\n",
" messages=[{\"role\": \"user\", \"content\": user_prompt_for(python)}],\n",
" )\n",
" reply = \"\"\n",
" with result as stream:\n",
" for text in stream.text_stream:\n",
" reply += text\n",
" print(text, end=\"\", flush=True)\n",
" write_output(reply)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "523d1481",
"metadata": {},
"outputs": [],
"source": [
"# generate build c++ comiple and run commands based on your platform\n",
"# Borrowed from @CloudLlama's contribution!\n",
"\n",
"def run_cmd(command_to_run):\n",
" try:\n",
" run_result = subprocess.run(command_to_run, check=True, text=True, capture_output=True)\n",
" return run_result.stdout if run_result.stdout else \"SUCCESS\"\n",
" except:\n",
" return \"\"\n",
" \n",
"def c_compiler_cmd(filename_base):\n",
" my_platform = platform.system()\n",
" my_compiler = []\n",
" \n",
" try: \n",
" if my_platform == \"Windows\":\n",
" if os.path.isfile(VISUAL_STUDIO_BUILD_TOOLS_PATH):\n",
" compile_cmd = [\"cmd\", \"/c\", VISUAL_STUDIO_BUILD_TOOLS_PATH, \"&\", \"cl\", f\"{filename_base}.cpp\"]\n",
" my_compiler = [\"Windows\", \"Visual Studio Build Tools\", compile_cmd]\n",
" \n",
" if not my_compiler:\n",
" my_compiler=[my_platform, \"Unavailable\", []]\n",
" \n",
" elif my_platform == \"Linux\":\n",
" compile_cmd = [\"g++\", f\"{filename_base}.cpp\", \"-o\", filename_base]\n",
" my_compiler = [\"Linux\", \"GCC (g++)\", compile_cmd]\n",
" \n",
" if not my_compiler:\n",
" compile_cmd = [\"clang++\", f\"{filename_base}.cpp\", \"-o\", filename_base]\n",
" my_compiler = [\"Linux\", \"Clang++\", compile_cmd]\n",
" \n",
" if not my_compiler:\n",
" my_compiler=[my_platform, \"Unavailable\", []]\n",
" \n",
" elif my_platform == \"Darwin\":\n",
" compile_cmd = [\"clang++\", \"-Ofast\", \"-std=c++17\", \"-march=armv8.5-a\", \"-mtune=apple-m1\", \"-mcpu=apple-m1\", \"-o\", filename_base, f\"{filename_base}.cpp\"]\n",
" my_compiler = [\"Macintosh\", \"Clang++\", compile_cmd]\n",
" \n",
" if not my_compiler:\n",
" my_compiler=[my_platform, \"Unavailable\", []]\n",
" except:\n",
" my_compiler=[my_platform, \"Unavailable\", []]\n",
" \n",
" if my_compiler:\n",
" return my_compiler\n",
" else:\n",
" return [\"Unknown\", \"Unavailable\", []] "
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "43d84759",
"metadata": {},
"outputs": [],
"source": [
"# compile and execute the C++ code\n",
"\n",
"def execute_cpp(code):\n",
" filename_base = OUTPUT_FILE_NAME_BASE\n",
" compiler_cmd = c_compiler_cmd(filename_base)\n",
" write_output(code)\n",
" \n",
" try:\n",
" if os.path.isfile(f\"./{filename_base}.exe\"):\n",
" os.remove(f\"./{filename_base}.exe\")\n",
" if os.path.isfile(f\"./{filename_base}\"):\n",
" os.remove(f\"./{filename_base}\")\n",
" compile_result = subprocess.run(compiler_cmd[2], check=True, text=True, capture_output=True)\n",
" run_cmd = [f\"./{filename_base}\"]\n",
" run_result = subprocess.run(run_cmd, check=True, text=True, capture_output=True)\n",
" return run_result.stdout\n",
" except subprocess.CalledProcessError as e:\n",
" return f\"An error occurred:\\n{e.stderr}\""
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a658a7ba",
"metadata": {},
"outputs": [],
"source": [
"# simple pypton code to optimize\n",
"\n",
"pi = \"\"\"\n",
"import time\n",
"\n",
"def calculate(iterations, param1, param2):\n",
" result = 1.0\n",
" for i in range(1, iterations+1):\n",
" j = i * param1 - param2\n",
" result -= (1/j)\n",
" j = i * param1 + param2\n",
" result += (1/j)\n",
" return result\n",
"\n",
"start_time = time.time()\n",
"result = calculate(100_000_000, 4, 1) * 4\n",
"end_time = time.time()\n",
"\n",
"print(f\"Result: {result:.12f}\")\n",
"print(f\"Execution Time: {(end_time - start_time):.6f} seconds\")\n",
"\"\"\""
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "876bfe68",
"metadata": {},
"outputs": [],
"source": [
"# execute the simple python code\n",
"\n",
"exec(pi)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "98c553c8",
"metadata": {},
"outputs": [],
"source": [
"# optimize the simple python code with openai\n",
"\n",
"optimize_gpt(pi)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "db5a1fc3",
"metadata": {},
"outputs": [],
"source": [
"# compile and run the openai optimized code\n",
"\n",
"cpp_code = read_output()\n",
"execute_cpp(cpp_code)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3cd5c138",
"metadata": {},
"outputs": [],
"source": [
"# optimize the simple python code with claude\n",
"\n",
"optimize_claude(pi)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "110a7c11",
"metadata": {},
"outputs": [],
"source": [
"# compile and run the claude optimized code\n",
"\n",
"cpp_code = read_output()\n",
"execute_cpp(cpp_code)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "dceaa0db",
"metadata": {},
"outputs": [],
"source": [
"# hard pypton code to optimize\n",
"\n",
"pi_hard = \"\"\"# Be careful to support large number sizes\n",
"\n",
"def lcg(seed, a=1664525, c=1013904223, m=2**32):\n",
" value = seed\n",
" while True:\n",
" value = (a * value + c) % m\n",
" yield value\n",
" \n",
"def max_subarray_sum(n, seed, min_val, max_val):\n",
" lcg_gen = lcg(seed)\n",
" random_numbers = [next(lcg_gen) % (max_val - min_val + 1) + min_val for _ in range(n)]\n",
" max_sum = float('-inf')\n",
" for i in range(n):\n",
" current_sum = 0\n",
" for j in range(i, n):\n",
" current_sum += random_numbers[j]\n",
" if current_sum > max_sum:\n",
" max_sum = current_sum\n",
" return max_sum\n",
"\n",
"def total_max_subarray_sum(n, initial_seed, min_val, max_val):\n",
" total_sum = 0\n",
" lcg_gen = lcg(initial_seed)\n",
" for _ in range(20):\n",
" seed = next(lcg_gen)\n",
" total_sum += max_subarray_sum(n, seed, min_val, max_val)\n",
" return total_sum\n",
"\n",
"# Parameters\n",
"n = 10000 # Number of random numbers\n",
"initial_seed = 42 # Initial seed for the LCG\n",
"min_val = -10 # Minimum value of random numbers\n",
"max_val = 10 # Maximum value of random numbers\n",
"\n",
"# Timing the function\n",
"import time\n",
"start_time = time.time()\n",
"result = total_max_subarray_sum(n, initial_seed, min_val, max_val)\n",
"end_time = time.time()\n",
"\n",
"print(\"Total Maximum Subarray Sum (20 runs):\", result)\n",
"print(\"Execution Time: {:.6f} seconds\".format(end_time - start_time))\n",
"\"\"\""
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "371edd71",
"metadata": {},
"outputs": [],
"source": [
"# execute the simple python code\n",
"\n",
"exec(pi_hard)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9b0a7233",
"metadata": {},
"outputs": [],
"source": [
"# optimize the hard python code with openai\n",
"\n",
"optimize_gpt(pi_hard)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "281e959c",
"metadata": {},
"outputs": [],
"source": [
"# compile and run the openai optimized code\n",
"\n",
"cpp_code = read_output()\n",
"execute_cpp(cpp_code)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "8d2712ce",
"metadata": {},
"outputs": [],
"source": [
"# optimize the hard python code with claude\n",
"\n",
"optimize_claude(pi_hard)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "444549a7",
"metadata": {},
"outputs": [],
"source": [
"# compile and run the openai optimized code\n",
"\n",
"cpp_code = read_output()\n",
"execute_cpp(cpp_code)"
]
},
{
"cell_type": "markdown",
"id": "6aec3927",
"metadata": {},
"source": [
"### Section 2: Using Gradio Interface to Generate and Execute CPP Code"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6607743c",
"metadata": {},
"outputs": [],
"source": [
"# configure streaming responses from openai and claude\n",
"\n",
"def stream_gpt(python): \n",
" stream = openai.chat.completions.create(model=OPENAI_MODEL, messages=messages_for(python), stream=True)\n",
" reply = \"\"\n",
" for chunk in stream:\n",
" fragment = chunk.choices[0].delta.content or \"\"\n",
" reply += fragment\n",
" yield reply.replace('```cpp\\n','').replace('```','')\n",
" \n",
"def stream_claude(python):\n",
" result = claude.messages.stream(\n",
" model=CLAUDE_MODEL,\n",
" max_tokens=2000,\n",
" system=system_message,\n",
" messages=[{\"role\": \"user\", \"content\": user_prompt_for(python)}],\n",
" )\n",
" reply = \"\"\n",
" with result as stream:\n",
" for text in stream.text_stream:\n",
" reply += text\n",
" yield reply.replace('```cpp\\n','').replace('```','')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "052a713e",
"metadata": {},
"outputs": [],
"source": [
"# optimize code based on selected model\n",
"\n",
"def optimize(python, model):\n",
" if model==\"GPT\":\n",
" result = stream_gpt(python)\n",
" elif model==\"Claude\":\n",
" result = stream_claude(python)\n",
" else:\n",
" raise ValueError(\"Unknown model\")\n",
" for stream_so_far in result:\n",
" yield stream_so_far "
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "27479cab",
"metadata": {},
"outputs": [],
"source": [
"# use gradio to optimize python code from the UI\n",
"\n",
"with gr.Blocks() as ui:\n",
" with gr.Row():\n",
" python = gr.Textbox(label=\"Python code:\", lines=10, value=pi)\n",
" cpp = gr.Textbox(label=\"C++ code:\", lines=10)\n",
" with gr.Row():\n",
" model = gr.Dropdown([\"GPT\", \"Claude\"], label=\"Select model\", value=\"GPT\")\n",
" convert = gr.Button(\"Convert code\")\n",
"\n",
" convert.click(optimize, inputs=[python, model], outputs=[cpp])\n",
"\n",
"ui.launch(inbrowser=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7b39125a",
"metadata": {},
"outputs": [],
"source": [
"# execute python code\n",
"\n",
"def execute_python(code):\n",
" try:\n",
" output = io.StringIO()\n",
" sys.stdout = output\n",
" exec(code)\n",
" finally:\n",
" sys.stdout = sys.__stdout__\n",
" return output.getvalue()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d5cb0f55",
"metadata": {},
"outputs": [],
"source": [
"# css to color by code type\n",
"\n",
"css = \"\"\"\n",
".python {background-color: #306998;}\n",
".cpp {background-color: #050;}\n",
"\"\"\""
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "66e5ef37",
"metadata": {},
"outputs": [],
"source": [
"# add ability to run python code, compile and run c++ code in gradio\n",
"\n",
"with gr.Blocks(css=css) as ui:\n",
" gr.Markdown(\"## Convert code from Python to C++\")\n",
" with gr.Row():\n",
" python = gr.Textbox(label=\"Python code:\", value=pi, lines=10)\n",
" cpp = gr.Textbox(label=\"C++ code:\", lines=10)\n",
" with gr.Row():\n",
" model = gr.Dropdown([\"GPT\", \"Claude\"], label=\"Select model\", value=\"GPT\")\n",
" with gr.Row():\n",
" convert = gr.Button(\"Convert code\")\n",
" with gr.Row():\n",
" python_run = gr.Button(\"Run Python\")\n",
" cpp_run = gr.Button(\"Run C++\")\n",
" with gr.Row():\n",
" python_out = gr.TextArea(label=\"Python result:\", elem_classes=[\"python\"])\n",
" cpp_out = gr.TextArea(label=\"C++ result:\", elem_classes=[\"cpp\"])\n",
"\n",
" convert.click(optimize, inputs=[python, model], outputs=[cpp])\n",
" python_run.click(execute_python, inputs=[python], outputs=[python_out])\n",
" cpp_run.click(execute_cpp, inputs=[cpp], outputs=[cpp_out])\n",
"\n",
"ui.launch(inbrowser=True)"
]
},
{
"cell_type": "markdown",
"id": "b4d29c1c",
"metadata": {},
"source": [
"### Section 3: Add HuggingFace-hosted Open Source Models to Gradio App"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "8ec5e4cb",
"metadata": {},
"outputs": [],
"source": [
"# initilize code qwen huggingface endpoint\n",
"\n",
"tokenizer = AutoTokenizer.from_pretrained(CODE_QWEN_MODEL)\n",
"messages = messages_for(pi)\n",
"text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4c3ac865",
"metadata": {},
"outputs": [],
"source": [
"# inspect the message template (text)\n",
"\n",
"print(text)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "8a1de956",
"metadata": {},
"outputs": [],
"source": [
"# manually call the huggingface endpoint, generate c++ with code qwen\n",
"\n",
"client = InferenceClient(CODE_QWEN_URL, token=hf_token)\n",
"stream = client.text_generation(text, stream=True, details=True, max_new_tokens=3000)\n",
"for r in stream:\n",
" print(r.token.text, end = \"\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "20d310dc",
"metadata": {},
"outputs": [],
"source": [
"# configure streaming responses from code qwen\n",
"\n",
"def stream_code_qwen(python):\n",
" tokenizer = AutoTokenizer.from_pretrained(CODE_QWEN_MODEL)\n",
" messages = messages_for(python)\n",
" text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)\n",
" client = InferenceClient(CODE_QWEN_URL, token=hf_token)\n",
" stream = client.text_generation(text, stream=True, details=True, max_new_tokens=3000)\n",
" result = \"\"\n",
" for r in stream:\n",
" result += r.token.text\n",
" yield result "
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1340cd15",
"metadata": {},
"outputs": [],
"source": [
"# update optimize function to include code qwen\n",
"\n",
"def optimize(python, model):\n",
" if model==\"GPT\":\n",
" result = stream_gpt(python)\n",
" elif model==\"Claude\":\n",
" result = stream_claude(python)\n",
" elif model==\"CodeQwen\":\n",
" result = stream_code_qwen(python)\n",
" else:\n",
" raise ValueError(\"Unknown model\")\n",
" for stream_so_far in result:\n",
" yield stream_so_far "
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6761f4ea",
"metadata": {},
"outputs": [],
"source": [
"# select the python code sample and return the code\n",
"\n",
"def select_sample_program(sample_program):\n",
" if sample_program==\"pi\":\n",
" return pi\n",
" elif sample_program==\"pi_hard\":\n",
" return pi_hard\n",
" else:\n",
" return \"Type your Python program here\""
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "74ecb73b",
"metadata": {},
"outputs": [],
"source": [
"# run the updated gradio UI with code qwen\n",
"\n",
"compiler_cmd = c_compiler_cmd(\"optimized\")\n",
"\n",
"with gr.Blocks(css=css) as ui:\n",
" gr.Markdown(\"## Convert code from Python to C++\")\n",
" with gr.Row():\n",
" python = gr.Textbox(label=\"Python code:\", value=pi, lines=10)\n",
" cpp = gr.Textbox(label=\"C++ code:\", lines=10)\n",
" with gr.Row():\n",
" with gr.Column():\n",
" sample_program = gr.Radio([\"pi\", \"pi_hard\"], label=\"Sample program\", value=\"pi\")\n",
" model = gr.Dropdown([\"GPT\", \"Claude\", \"CodeQwen\"], label=\"Select model\", value=\"GPT\")\n",
" with gr.Column():\n",
" architecture = gr.Radio([compiler_cmd[0]], label=\"Architecture\", interactive=False, value=compiler_cmd[0])\n",
" compiler = gr.Radio([compiler_cmd[1]], label=\"Compiler\", interactive=False, value=compiler_cmd[1])\n",
" with gr.Row():\n",
" convert = gr.Button(\"Convert code\")\n",
" with gr.Row():\n",
" python_run = gr.Button(\"Run Python\")\n",
" if not compiler_cmd[1] == \"Unavailable\":\n",
" cpp_run = gr.Button(\"Run C++\")\n",
" else:\n",
" cpp_run = gr.Button(\"No compiler to run C++\", interactive=False)\n",
" with gr.Row():\n",
" python_out = gr.TextArea(label=\"Python result:\", elem_classes=[\"python\"])\n",
" cpp_out = gr.TextArea(label=\"C++ result:\", elem_classes=[\"cpp\"])\n",
"\n",
" sample_program.change(select_sample_program, inputs=[sample_program], outputs=[python])\n",
" convert.click(optimize, inputs=[python, model], outputs=[cpp])\n",
" python_run.click(execute_python, inputs=[python], outputs=[python_out])\n",
" cpp_run.click(execute_cpp, inputs=[cpp], outputs=[cpp_out])\n",
"\n",
"ui.launch(inbrowser=True)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "llms",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.12"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@@ -0,0 +1,67 @@
# Tourist Assistant
An interactive voice-enabled tourist guide that provides information about cities, landmarks, and destinations worldwide. This application uses OpenAI's GPT models for text generation and speech features for a natural conversation experience, along with RAG capabilities and Google Places API integration for real-time attraction information.
![Tourist Assistant Screenshot](travel.jpg)
## Features
- Text-based chat interface for asking questions about tourist destinations
- Voice input capability through microphone recording
- Audio responses using OpenAI's text-to-speech technology
- Clean, responsive user interface with Gradio
- RAG (Retrieval-Augmented Generation) system using PDF knowledge base
- Google Places API integration for real-time information about attractions
- Set current location for contextual queries
- Quick access to nearby attractions information
## Requirements
- Python 3.9+
- OpenAI API key
- Google Places API key (optional, for location search features)
## Installation
1. Clone this repository
2. Install the required dependencies:
```
pip install -r requirements.txt
```
3. Create a `.env` file in the project directory with your API keys:
```
OPENAI_API_KEY=your_openai_api_key_here
GOOGLE_PLACES_API_KEY=your_google_places_api_key_here
```
4. (Optional) Add PDF files to the `knowledge-base/` directory to enhance the assistant's knowledge about specific locations
## Running the Application
Start the application by running:
```bash
python tourist-assistant.py
```
The interface will automatically open in your default web browser. If it doesn't, navigate to the URL shown in the terminal (typically http://127.0.0.1:7860/).
## Usage
1. Type your question about any tourist destination in the text box
2. Or click the microphone button and speak your question
3. The assistant will respond with text and spoken audio
4. Set your current location using the "Set Location" feature
5. Click "Nearby Attractions" to get information about attractions near your current location
6. Use the "Refresh Knowledge Base" button to reload PDFs in the knowledge-base directory
7. Use the "Clear" button to start a new conversation
## Technologies Used
- OpenAI GPT-4o Mini for chat completions
- OpenAI Whisper for speech-to-text
- OpenAI TTS for text-to-speech
- Langchain for RAG implementation
- FAISS for vector storage
- Google Places API for location-based attraction information
- Gradio for the web interface
- pydub for audio processing

View File

@@ -0,0 +1,11 @@
openai>=1.0.0
gradio>=4.0.0
python-dotenv>=1.0.0
pydub>=0.25.1
pypdf>=4.0.0
langchain>=0.1.0
langchain-openai>=0.0.5
langchain-community>=0.0.13
faiss-cpu>=1.7.4
tiktoken>=0.5.2
requests>=2.31.0

View File

@@ -0,0 +1,92 @@
/* Styling for Tourist Assistant */
.container {
max-width: 850px;
margin: auto;
background-color: rgba(255, 255, 255, 0.95);
padding: 20px;
border-radius: 15px;
box-shadow: 0 8px 16px rgba(0, 0, 0, 0.2);
}
.title {
text-align: center;
font-size: 2.5rem !important;
margin-bottom: 0.5rem;
color: #2563EB;
font-weight: 600;
}
.subtitle {
text-align: center;
font-size: 1.1rem !important;
margin-bottom: 1.5rem;
color: #4B5563;
}
.footer {
text-align: center;
margin-top: 1rem;
color: #6B7280;
font-size: 0.9rem !important;
}
.mic-container {
text-align: center;
margin: 1rem auto;
}
.clear-button {
max-width: 120px;
margin-left: auto;
}
.chatbot-container {
border-radius: 10px;
box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
background-color: white;
}
/* Styling for the microphone button */
#mic-btn {
width: 150px !important;
margin: 0 auto !important;
}
#mic-btn .wrap {
display: flex;
justify-content: center;
}
/* Make the mic button more prominent and attractive */
#mic-btn button.record-button {
width: 60px !important;
height: 60px !important;
border-radius: 50% !important;
background-color: #3B82F6 !important;
color: white !important;
font-size: 24px !important;
display: flex !important;
align-items: center !important;
justify-content: center !important;
margin: 0 auto !important;
border: none !important;
box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1) !important;
transition: all 0.2s ease !important;
margin-bottom: 10px !important;
}
#mic-btn button.record-button:hover {
transform: scale(1.05) !important;
box-shadow: 0 6px 8px rgba(0, 0, 0, 0.15) !important;
}
/* Hide the audio controls */
#mic-btn .audio-controls {
display: none !important;
}
/* Hide the audio playback */
#mic-btn audio {
display: none !important;
}

View File

@@ -0,0 +1,559 @@
import os
import glob
import requests
import json
from dotenv import load_dotenv
from openai import OpenAI
import gradio as gr
from pypdf import PdfReader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings
from langchain.chains import ConversationalRetrievalChain
from langchain_openai import ChatOpenAI
# Initialization
load_dotenv(override=True)
openai_api_key = os.getenv('OPENAI_API_KEY')
if openai_api_key:
print(f"OpenAI API Key exists and begins {openai_api_key[:8]}")
else:
print("OpenAI API Key not set")
# Get Google Places API Key - used for location search
google_api_key = os.getenv('GOOGLE_PLACES_API_KEY')
if google_api_key:
print(f"Google Places API Key exists and begins {google_api_key[:8]}")
else:
print("Google Places API Key not set. Location search will be disabled.")
MODEL = "gpt-4o-mini"
openai = OpenAI()
# Functions for RAG implementation
def read_pdf(file_path):
"""Read a PDF file and extract text content."""
pdf_reader = PdfReader(file_path)
text = ""
for page in pdf_reader.pages:
text += page.extract_text() or ""
return text
def load_knowledge_base():
"""Load all PDFs from the knowledge-base directory and create a vector store."""
# Create the knowledge-base directory if it doesn't exist
os.makedirs("knowledge-base", exist_ok=True)
# Get all PDF files in the knowledge-base directory
pdf_files = glob.glob("knowledge-base/*.pdf")
if not pdf_files:
print("No PDF files found in the knowledge-base directory.")
return None
# Read and concatenate all PDF content
all_content = ""
for pdf_file in pdf_files:
print(f"Processing: {pdf_file}")
content = read_pdf(pdf_file)
all_content += content + "\n\n"
# Split text into chunks
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=1000,
chunk_overlap=200,
length_function=len
)
chunks = text_splitter.split_text(all_content)
# Create vector store
embeddings = OpenAIEmbeddings()
vector_store = FAISS.from_texts(chunks, embeddings)
print(f"Created vector store with {len(chunks)} chunks from {len(pdf_files)} PDF files")
return vector_store
# Initialize vector store
vector_store = load_knowledge_base()
if vector_store:
# Create retrieval chain
llm = ChatOpenAI(model=MODEL)
retrieval_chain = ConversationalRetrievalChain.from_llm(
llm=llm,
retriever=vector_store.as_retriever(search_kwargs={"k": 3}),
return_source_documents=False
)
print("RAG system initialized successfully")
else:
print("RAG system not initialized. Please add PDF files to the knowledge-base directory.")
retrieval_chain = None
#audio generation
from pydub import AudioSegment
from pydub.playback import play
from io import BytesIO
def talker(message):
response=openai.audio.speech.create(
model="tts-1",
voice="onyx",
input=message
)
audio_stream=BytesIO(response.content)
audio=AudioSegment.from_file(audio_stream, format="mp3")
play(audio)
def search_attractions(location):
"""Search for tourist attractions in a specified location using Google Places API."""
if not google_api_key:
return {"error": "Google Places API Key not set. Location search disabled."}
try:
# First get the place_id for the location
geocode_url = f"https://maps.googleapis.com/maps/api/geocode/json?address={location}&key={google_api_key}"
geocode_response = requests.get(geocode_url)
geocode_data = geocode_response.json()
if geocode_data["status"] != "OK" or len(geocode_data["results"]) == 0:
return {"error": f"Location not found: {location}"}
# Get coordinates
location_data = geocode_data["results"][0]
lat = location_data["geometry"]["location"]["lat"]
lng = location_data["geometry"]["location"]["lng"]
# Search for attractions
places_url = "https://maps.googleapis.com/maps/api/place/nearbysearch/json"
params = {
"location": f"{lat},{lng}",
"radius": 5000, # 5km radius
"type": "tourist_attraction",
"key": google_api_key
}
places_response = requests.get(places_url, params=params)
places_data = places_response.json()
# Format the results
attractions = []
if places_data["status"] == "OK" and "results" in places_data:
for place in places_data["results"][:10]: # Limit to top 10 results
attractions.append({
"name": place["name"],
"rating": place.get("rating", "Not rated"),
"vicinity": place.get("vicinity", "No address available"),
"types": place.get("types", [])
})
return {
"location": location_data["formatted_address"],
"coordinates": {"lat": lat, "lng": lng},
"attractions": attractions
}
except Exception as e:
return {"error": f"Error searching for attractions: {str(e)}"}
def get_attraction_details(location, attraction_name):
"""Get more detailed information about a specific attraction."""
if not google_api_key:
return {"error": "Google Places API Key not set. Location search disabled."}
try:
# Search for the specific place
place_url = "https://maps.googleapis.com/maps/api/place/findplacefromtext/json"
params = {
"input": f"{attraction_name} in {location}",
"inputtype": "textquery",
"fields": "place_id,name,formatted_address,rating,user_ratings_total,types,opening_hours,photos",
"key": google_api_key
}
place_response = requests.get(place_url, params=params)
place_data = place_response.json()
if place_data["status"] != "OK" or len(place_data["candidates"]) == 0:
return {"error": f"Attraction not found: {attraction_name} in {location}"}
place_id = place_data["candidates"][0]["place_id"]
# Get detailed place information
details_url = "https://maps.googleapis.com/maps/api/place/details/json"
details_params = {
"place_id": place_id,
"fields": "name,formatted_address,rating,reviews,opening_hours,website,price_level,formatted_phone_number,photos",
"key": google_api_key
}
details_response = requests.get(details_url, params=details_params)
details_data = details_response.json()
if details_data["status"] != "OK":
return {"error": f"Could not get details for: {attraction_name}"}
return details_data["result"]
except Exception as e:
return {"error": f"Error getting attraction details: {str(e)}"}
system_message = "You are a helpful assistant for tourists visiting a city."
system_message += "Help the user and give him or her good explanation about the cities or places."
system_message += "Talk about history, geography and current conditions."
system_message += "Start with a short explanation about three lines and when the user wants explain more."
system_message += "Use the retrieved information from knowledge base when available to give detailed and accurate information."
system_message += "When the user asks about attractions in a specific location, use the provided attractions data to give recommendations."
#gradio handles the history of user messages and the assistant responses
def extract_location(message):
"""Extract location information from a message using OpenAI."""
try:
prompt = [
{"role": "system", "content": "Extract the location mentioned in the user's query. If no location is explicitly mentioned, return 'None'. Return only the location name without any explanation."},
{"role": "user", "content": message}
]
response = openai.chat.completions.create(
model="gpt-3.5-turbo", # Using a smaller model for simple location extraction
messages=prompt,
temperature=0.1,
max_tokens=50
)
location = response.choices[0].message.content.strip()
return None if location.lower() in ['none', 'no location mentioned', 'no location', 'not specified'] else location
except Exception as e:
print(f"Error extracting location: {str(e)}")
return None
def chat(history):
# Extract just the content from the message history for RAG
chat_history = []
messages = [{"role": "system", "content": system_message}]
for i in range(0, len(history), 2):
if i+1 < len(history):
user_msg = history[i]["content"]
ai_msg = history[i+1]["content"] if i+1 < len(history) else ""
chat_history.append((user_msg, ai_msg))
messages.append({"role": "user", "content": user_msg})
if ai_msg:
messages.append({"role": "assistant", "content": ai_msg})
# Get the latest user message
latest_user_message = history[-1]["content"] if history and history[-1]["role"] == "user" else ""
# First check if we have a preset current_location
location = None
if current_location and "attractions" in latest_user_message.lower():
# User is asking about attractions and we have a set location
location = current_location
print(f"Using preset location: {location}")
else:
# Try to extract location from the message
extracted_location = extract_location(latest_user_message)
if extracted_location:
location = extracted_location
print(f"Extracted location from message: {location}")
# If we have a location and the API key, search for attractions
if location and google_api_key:
# This is likely a location-based query about attractions
print(f"Searching for attractions in: {location}")
# Get attraction data
attractions_data = search_attractions(location)
# If there's an error or no attractions found
if "error" in attractions_data or (
"attractions" in attractions_data and len(attractions_data["attractions"]) == 0
):
error_msg = attractions_data.get("error", f"No attractions found in {location}")
print(f"Location search error: {error_msg}")
# Continue with regular processing but include the error info
updated_msg = f"I tried to find attractions in {location}, but {error_msg.lower()}. Let me provide general information instead.\n\n{latest_user_message}"
messages.append({"role": "system", "content": updated_msg})
else:
# Add the attraction information to the context
attraction_context = f"Information about {location}: {attractions_data['location']}\n\nTop attractions:"
for i, attraction in enumerate(attractions_data["attractions"], 1):
attraction_context += f"\n{i}. {attraction['name']} - Rating: {attraction['rating']} - {attraction['vicinity']}"
# Suggest specific attraction details if the user mentioned one
if "attractions" in attractions_data and attractions_data["attractions"]:
for attraction in attractions_data["attractions"]:
attraction_name = attraction["name"].lower()
if attraction_name in latest_user_message.lower():
print(f"Getting details for specific attraction: {attraction['name']}")
attraction_details = get_attraction_details(location, attraction["name"])
if "error" not in attraction_details:
details_str = f"\n\nDetails for {attraction['name']}:\n"
details_str += f"Address: {attraction_details.get('formatted_address', 'Not available')}\n"
details_str += f"Rating: {attraction_details.get('rating', 'Not rated')} ({attraction_details.get('user_ratings_total', 0)} reviews)\n"
if "reviews" in attraction_details and attraction_details["reviews"]:
details_str += f"Sample review: \"{attraction_details['reviews'][0]['text']}\"\n"
if "opening_hours" in attraction_details and "weekday_text" in attraction_details["opening_hours"]:
details_str += "Opening hours:\n"
for hours in attraction_details["opening_hours"]["weekday_text"]:
details_str += f"- {hours}\n"
if "website" in attraction_details:
details_str += f"Website: {attraction_details['website']}\n"
attraction_context += details_str
# Add this context to the messages
messages.append({"role": "system", "content": f"Use this location information in your response: {attraction_context}"})
# If there's a current location set, add it to the context even if not asking for attractions
elif current_location and google_api_key and not location:
# Add a note about the current location setting
messages.append({
"role": "system",
"content": f"The user has set their current location to {current_location}. " +
"Consider this when responding, especially for questions about 'here', 'local', or nearby attractions."
})
# Use RAG if available, otherwise use the standard OpenAI API
if retrieval_chain and latest_user_message:
try:
rag_response = retrieval_chain.invoke({
"question": latest_user_message,
"chat_history": chat_history[:-1] if chat_history else []
})
reply = rag_response["answer"]
print(reply)
except Exception as e:
print(f"Error using RAG: {str(e)}")
# Fallback to standard API
response = openai.chat.completions.create(model=MODEL, messages=messages)
reply = response.choices[0].message.content
else:
# Standard OpenAI API
response = openai.chat.completions.create(model=MODEL, messages=messages)
reply = response.choices[0].message.content
history += [{"role":"assistant", "content":reply}]
talker(reply)
return history
def transcribe_audio(audio_path):
try:
# Check if audio_path is valid
if audio_path is None:
return "No audio detected. Please record again."
# Open the audio file
with open(audio_path, "rb") as audio_file:
transcript = openai.audio.transcriptions.create(
model="whisper-1",
file=audio_file
)
return transcript.text
except Exception as e:
return f"Error during transcription: {str(e)}"
##################Interface with Gradio##############################
theme = gr.themes.Soft(
primary_hue="blue",
secondary_hue="indigo",
neutral_hue="slate",
font=[gr.themes.GoogleFont("Poppins"), "ui-sans-serif", "system-ui", "sans-serif"]
)
# Load CSS from external file
with open('style.css', 'r') as f:
css = f.read()
# Store the current location globally to use in queries
current_location = None
def refresh_knowledge_base():
"""Reload the knowledge base and update the retrieval chain."""
global vector_store, retrieval_chain
vector_store = load_knowledge_base()
if vector_store:
# Create retrieval chain
llm = ChatOpenAI(model=MODEL)
retrieval_chain = ConversationalRetrievalChain.from_llm(
llm=llm,
retriever=vector_store.as_retriever(search_kwargs={"k": 3}),
return_source_documents=False
)
return "Knowledge base refreshed successfully!"
else:
return "No PDF files found in the knowledge-base directory."
def set_location(location):
"""Set the current location for the assistant."""
global current_location
if not location or location.strip() == "":
return "Please enter a valid location."
# Verify the location exists using the Google Maps API
if google_api_key:
geocode_url = f"https://maps.googleapis.com/maps/api/geocode/json?address={location}&key={google_api_key}"
try:
geocode_response = requests.get(geocode_url)
geocode_data = geocode_response.json()
if geocode_data["status"] != "OK" or len(geocode_data["results"]) == 0:
return f"Location not found: {location}. Please enter a valid location."
# Get the formatted location name
current_location = geocode_data["results"][0]["formatted_address"]
# Get preliminary attraction data for the location
attractions_data = search_attractions(current_location)
if "error" not in attractions_data and "attractions" in attractions_data:
num_attractions = len(attractions_data["attractions"])
return f"Location set to: {current_location}. Found {num_attractions} nearby attractions."
else:
return f"Location set to: {current_location}. No attractions data available."
except Exception as e:
current_location = location # Fall back to user input
return f"Location set to: {location}. Error verifying location: {str(e)}"
else:
current_location = location # No API key, just use the user input
return f"Location set to: {location}. (Google API not configured for verification)"
with gr.Blocks(theme=theme, css=css) as ui:
with gr.Column(elem_classes="container"):
gr.Markdown("# 🌍 Tourist Assistant", elem_classes="title")
gr.Markdown("Ask about any city, landmark, or destination around the world", elem_classes="subtitle")
with gr.Blocks() as demo:
gr.Image("travel.jpg", show_label=False, height=150, container=False, interactive=False)
with gr.Column(elem_classes="chatbot-container"):
chatbot = gr.Chatbot(
height=400,
type="messages",
bubble_full_width=False,
show_copy_button=True,
elem_id="chatbox"
)
with gr.Row(elem_classes="mic-container"):
audio_input = gr.Audio(
type="filepath",
label="🎤 Hold the record button and ask your question",
sources=["microphone"],
streaming=False,
interactive=True,
autoplay=False,
show_download_button=False,
show_share_button=False,
elem_id="mic-button"
)
with gr.Row():
entry = gr.Textbox(
label="",
placeholder="Or type your question here or use the microphone below...",
container=False,
lines=2,
scale=10
)
with gr.Row():
with gr.Column(scale=3):
location_input = gr.Textbox(
label="Set Current Location",
placeholder="e.g., Paris, France or London, UK",
interactive=True
)
with gr.Column(scale=1):
location_btn = gr.Button("Set Location", variant="primary", size="sm")
with gr.Column(scale=1):
attractions_btn = gr.Button("Nearby Attractions", variant="secondary", size="sm")
with gr.Row():
with gr.Column(scale=1):
refresh_btn = gr.Button("🔄 Refresh Knowledge Base", variant="primary", size="sm")
refresh_status = gr.Textbox(label="Status", interactive=False)
with gr.Column(scale=1, elem_classes="clear-button"):
clear = gr.Button("Clear", variant="secondary", size="sm")
def transcribe_and_submit(audio_path):
transcription = transcribe_audio(audio_path)
history = chatbot.value if chatbot.value else []
history += [{"role":"user", "content":transcription}]
return transcription, history, history, None
audio_input.stop_recording(
fn=transcribe_and_submit,
inputs=[audio_input],
outputs=[entry, chatbot, chatbot, audio_input]
).then(
chat, inputs=chatbot, outputs=[chatbot]
)
def do_entry(message, history):
history += [{"role":"user", "content":message}]
return "", history
entry.submit(do_entry, inputs=[entry, chatbot], outputs=[entry, chatbot]).then(
chat, inputs=chatbot, outputs=[chatbot]
)
clear.click(lambda: None, inputs=None, outputs=chatbot, queue=False)
refresh_btn.click(refresh_knowledge_base, inputs=None, outputs=refresh_status)
# Add location status to show the result
location_status = gr.Textbox(label="Location Status", interactive=False)
# Connect the location button to set the location
location_btn.click(
set_location,
inputs=location_input,
outputs=location_status
)
# Add a separate function to clear the input field
def clear_location_input():
return ""
location_btn.click(
clear_location_input,
inputs=None,
outputs=location_input
)
# Add a function to handle asking about nearby attractions
def ask_about_attractions(history):
global current_location
if not current_location:
history += [{"role":"user", "content":"Tell me about attractions near me"}]
history += [{"role":"assistant", "content":"You haven't set a location yet. Please use the 'Set Current Location' field above to set your location first."}]
return history
history += [{"role":"user", "content":f"What are some attractions to visit in {current_location}?"}]
return chat(history)
# Connect the attractions button to ask about attractions
attractions_btn.click(ask_about_attractions, inputs=chatbot, outputs=chatbot)
ui.launch(inbrowser=True)

Binary file not shown.

After

Width:  |  Height:  |  Size: 37 KiB