Merge branch 'main' of github.com:ed-donner/llm_engineering

This commit is contained in:
Edward Donner
2025-04-05 10:01:06 -04:00
15 changed files with 5009 additions and 0 deletions

View File

@@ -0,0 +1,567 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "c79dc33e-1a3b-4601-a8f2-219b7a9b6d88",
"metadata": {},
"source": [
"# Company Brochure - Relevant Links and Custom Tone\n",
"\n",
"Using GPT to generate a company brochure with the relevant links functionality and the ability to choose the desired tone."
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "e32f4aa7-6fc4-4dc9-8058-58e6a7f329c5",
"metadata": {},
"outputs": [],
"source": [
"# Imports\n",
"\n",
"import os\n",
"import requests\n",
"import json\n",
"from typing import List\n",
"from dotenv import load_dotenv\n",
"from bs4 import BeautifulSoup\n",
"from IPython.display import Markdown, display, update_display\n",
"from openai import OpenAI\n",
"import gradio as gr"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "d1d65a21-bbba-44ff-a2be-85bf2055a493",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"OpenAI API Key set and good to go.\n"
]
}
],
"source": [
"# Load environment variables in a file called .env\n",
"\n",
"load_dotenv(override=True)\n",
"openai_api_key = os.getenv('OPENAI_API_KEY')\n",
"anthropic_api_key = os.getenv('ANTHROPIC_API_KEY')\n",
"google_api_key = os.getenv('GOOGLE_API_KEY')\n",
"\n",
"if openai_api_key:\n",
" print(\"OpenAI API Key set and good to go.\")\n",
"else:\n",
" print(\"OpenAI API Key not set. :(\")"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "c5db63fe-5da8-496e-9b37-139598d600a7",
"metadata": {},
"outputs": [],
"source": [
"# Setting up the OpenAI object\n",
"\n",
"openai = OpenAI()\n",
"gpt_model = 'gpt-4o-mini'"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "535da52f-b280-48ce-aa8b-f82f9f9805d9",
"metadata": {},
"outputs": [],
"source": [
"# A class to represent a Webpage\n",
"\n",
"# Some websites need you to use proper headers when fetching them:\n",
"headers = {\n",
" \"User-Agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36\"\n",
"}\n",
"\n",
"class Website:\n",
" \"\"\"\n",
" A utility class to represent a Website that we have scraped, now with links\n",
" \"\"\"\n",
"\n",
" def __init__(self, url):\n",
" self.url = url\n",
" response = requests.get(url, headers=headers)\n",
" self.body = response.content\n",
" soup = BeautifulSoup(self.body, 'html.parser')\n",
" self.title = soup.title.string if soup.title else \"No title found\"\n",
" if soup.body:\n",
" for irrelevant in soup.body([\"script\", \"style\", \"img\", \"input\"]):\n",
" irrelevant.decompose()\n",
" self.text = soup.body.get_text(separator=\"\\n\", strip=True)\n",
" else:\n",
" self.text = \"\"\n",
" links = [link.get('href') for link in soup.find_all('a')]\n",
" self.links = [link for link in links if link]\n",
"\n",
" def get_contents(self):\n",
" return f\"Webpage Title:\\n{self.title}\\nWebpage Contents:\\n{self.text}\\n\\n\""
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "8d5757c4-95f4-4038-8ed4-8c81da5112b0",
"metadata": {},
"outputs": [],
"source": [
"link_system_prompt = \"You are provided with a list of links found on a webpage. \\\n",
"You are able to decide which of the links would be most relevant to include in a brochure about the company, \\\n",
"such as links to an About page, or a Company page, or Careers/Jobs pages.\\n\"\n",
"link_system_prompt += \"You should respond in JSON as in this example:\"\n",
"link_system_prompt += \"\"\"\n",
"{\n",
" \"links\": [\n",
" {\"type\": \"about page\", \"url\": \"https://full.url/goes/here/about\"},\n",
" {\"type\": \"careers page\": \"url\": \"https://another.full.url/careers\"}\n",
" ]\n",
"}\n",
"\"\"\""
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "d5fd31ac-7c81-454a-a1dc-4c58bd3db246",
"metadata": {},
"outputs": [],
"source": [
"def get_links_user_prompt(website):\n",
" user_prompt = f\"Here is the list of links on the website of {website.url} - \"\n",
" user_prompt += \"please decide which of these are relevant web links for a brochure about the company, respond with the full https URL in JSON format. \\\n",
"Do not include Terms of Service, Privacy, email links.\\n\"\n",
" user_prompt += \"Links (some might be relative links):\\n\"\n",
" user_prompt += \"\\n\".join(website.links)\n",
" return user_prompt"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "e8b67492-1ba4-4aad-a588-39116128fa18",
"metadata": {},
"outputs": [],
"source": [
"def gpt_get_links(url):\n",
" website = Website(url)\n",
" response = openai.chat.completions.create(\n",
" model= gpt_model,\n",
" messages=[\n",
" {\"role\": \"system\", \"content\": link_system_prompt},\n",
" {\"role\": \"user\", \"content\": get_links_user_prompt(website)}\n",
" ],\n",
" response_format={\"type\": \"json_object\"}\n",
" )\n",
" result = response.choices[0].message.content\n",
" return json.loads(result)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "e8846e7a-ace2-487e-a0a8-fccb389f2eb9",
"metadata": {},
"outputs": [],
"source": [
"# This function provides uses the get_contents method in the Website Class as well as GPT to find relevant links.\n",
"\n",
"def get_all_details(url):\n",
" result = \"Landing page:\\n\"\n",
" result += Website(url).get_contents()\n",
" links = gpt_get_links(url)\n",
" print(\"Found links:\", links)\n",
" for link in links[\"links\"]:\n",
" result += f\"\\n\\n{link['type']}\\n\"\n",
" result += Website(link[\"url\"]).get_contents()\n",
" return result"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "18b42319-8342-4b9c-bef6-8b72acf92ab3",
"metadata": {},
"outputs": [],
"source": [
"def get_brochure_user_prompt(company_name, url):\n",
" user_prompt = f\"You are looking at a company called: {company_name}\\n\"\n",
" user_prompt += f\"Here are the contents of its landing page and other relevant pages; \\\n",
" use this information to build a short brochure of the company in markdown.\\n\"\n",
" \n",
" user_prompt += get_all_details(url)\n",
" user_prompt = user_prompt[:5_000] # Truncate if more than 5,000 characters\n",
" return user_prompt"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "d7748293-a616-41de-93cb-89f65cc5c73d",
"metadata": {},
"outputs": [],
"source": [
"# Let's create a call that streams back results\n",
"# If you'd like a refresher on Generators (the \"yield\" keyword),\n",
"# Please take a look at the Intermediate Python notebook in week1 folder.\n",
"\n",
"def stream_brochure(company_name, url, tone):\n",
"\n",
" system_message = f\"You are an assistant that analyzes the content of several relevant pages from a company website \\\n",
" and creates a short brochure about the company for prospective customers, investors, and recruits. \\\n",
" Include details of company culture, customers and careers/jobs if you have the information. \\\n",
" Respond in markdown, and use a {tone.lower()} tone throughout the brochure.\"\n",
"\n",
" \n",
" messages = [\n",
" {\"role\": \"system\", \"content\": system_message},\n",
" {\"role\": \"user\", \"content\": get_brochure_user_prompt(company_name, url)}\n",
" ]\n",
" stream = openai.chat.completions.create(\n",
" model=gpt_model,\n",
" messages=messages,\n",
" stream=True\n",
" )\n",
" result = \"\"\n",
" for chunk in stream:\n",
" result += chunk.choices[0].delta.content or \"\"\n",
" yield result"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "15222832-06e0-4452-a8e1-59b9b1755488",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"* Running on local URL: http://127.0.0.1:7860\n",
"\n",
"To create a public link, set `share=True` in `launch()`.\n"
]
},
{
"data": {
"text/html": [
"<div><iframe src=\"http://127.0.0.1:7860/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": []
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Found links: {'links': [{'type': 'about page', 'url': 'https://www.snowflake.com/about/events/'}, {'type': 'company page', 'url': 'https://www.snowflake.com/en/company/overview/about-snowflake/'}, {'type': 'company leadership page', 'url': 'https://www.snowflake.com/en/company/overview/leadership-and-board/'}, {'type': 'careers page', 'url': 'https://careers.snowflake.com/us/en'}, {'type': 'company ESG page', 'url': 'https://www.snowflake.com/en/company/overview/esg/'}, {'type': 'company ventures page', 'url': 'https://www.snowflake.com/en/company/overview/snowflake-ventures/'}, {'type': 'end data disparity page', 'url': 'https://www.snowflake.com/en/company/overview/end-data-disparity/'}]}\n",
"Found links: {'links': [{'type': 'about page', 'url': 'https://www.snowflake.com/about/events/'}, {'type': 'about page', 'url': 'https://www.snowflake.com/company/overview/about-snowflake/'}, {'type': 'leadership page', 'url': 'https://www.snowflake.com/company/overview/leadership-and-board/'}, {'type': 'careers page', 'url': 'https://careers.snowflake.com/us/en'}, {'type': 'investor relations', 'url': 'https://investors.snowflake.com/overview/default.aspx'}, {'type': 'ESG page', 'url': 'https://www.snowflake.com/company/overview/esg/'}, {'type': 'snowflake ventures', 'url': 'https://www.snowflake.com/company/overview/snowflake-ventures/'}, {'type': 'end data disparity', 'url': 'https://www.snowflake.com/company/overview/end-data-disparity/'}]}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Traceback (most recent call last):\n",
" File \"/opt/anaconda3/envs/llms/lib/python3.11/site-packages/urllib3/connectionpool.py\", line 464, in _make_request\n",
" self._validate_conn(conn)\n",
" File \"/opt/anaconda3/envs/llms/lib/python3.11/site-packages/urllib3/connectionpool.py\", line 1093, in _validate_conn\n",
" conn.connect()\n",
" File \"/opt/anaconda3/envs/llms/lib/python3.11/site-packages/urllib3/connection.py\", line 741, in connect\n",
" sock_and_verified = _ssl_wrap_socket_and_match_hostname(\n",
" ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
" File \"/opt/anaconda3/envs/llms/lib/python3.11/site-packages/urllib3/connection.py\", line 920, in _ssl_wrap_socket_and_match_hostname\n",
" ssl_sock = ssl_wrap_socket(\n",
" ^^^^^^^^^^^^^^^^\n",
" File \"/opt/anaconda3/envs/llms/lib/python3.11/site-packages/urllib3/util/ssl_.py\", line 460, in ssl_wrap_socket\n",
" ssl_sock = _ssl_wrap_socket_impl(sock, context, tls_in_tls, server_hostname)\n",
" ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
" File \"/opt/anaconda3/envs/llms/lib/python3.11/site-packages/urllib3/util/ssl_.py\", line 504, in _ssl_wrap_socket_impl\n",
" return ssl_context.wrap_socket(sock, server_hostname=server_hostname)\n",
" ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
" File \"/opt/anaconda3/envs/llms/lib/python3.11/ssl.py\", line 517, in wrap_socket\n",
" return self.sslsocket_class._create(\n",
" ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
" File \"/opt/anaconda3/envs/llms/lib/python3.11/ssl.py\", line 1104, in _create\n",
" self.do_handshake()\n",
" File \"/opt/anaconda3/envs/llms/lib/python3.11/ssl.py\", line 1382, in do_handshake\n",
" self._sslobj.do_handshake()\n",
"ssl.SSLCertVerificationError: [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1006)\n",
"\n",
"During handling of the above exception, another exception occurred:\n",
"\n",
"Traceback (most recent call last):\n",
" File \"/opt/anaconda3/envs/llms/lib/python3.11/site-packages/urllib3/connectionpool.py\", line 787, in urlopen\n",
" response = self._make_request(\n",
" ^^^^^^^^^^^^^^^^^^^\n",
" File \"/opt/anaconda3/envs/llms/lib/python3.11/site-packages/urllib3/connectionpool.py\", line 488, in _make_request\n",
" raise new_e\n",
"urllib3.exceptions.SSLError: [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1006)\n",
"\n",
"The above exception was the direct cause of the following exception:\n",
"\n",
"Traceback (most recent call last):\n",
" File \"/opt/anaconda3/envs/llms/lib/python3.11/site-packages/requests/adapters.py\", line 667, in send\n",
" resp = conn.urlopen(\n",
" ^^^^^^^^^^^^^\n",
" File \"/opt/anaconda3/envs/llms/lib/python3.11/site-packages/urllib3/connectionpool.py\", line 841, in urlopen\n",
" retries = retries.increment(\n",
" ^^^^^^^^^^^^^^^^^^\n",
" File \"/opt/anaconda3/envs/llms/lib/python3.11/site-packages/urllib3/util/retry.py\", line 519, in increment\n",
" raise MaxRetryError(_pool, url, reason) from reason # type: ignore[arg-type]\n",
" ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
"urllib3.exceptions.MaxRetryError: HTTPSConnectionPool(host='petrofac.com', port=443): Max retries exceeded with url: / (Caused by SSLError(SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1006)')))\n",
"\n",
"During handling of the above exception, another exception occurred:\n",
"\n",
"Traceback (most recent call last):\n",
" File \"/opt/anaconda3/envs/llms/lib/python3.11/site-packages/gradio/queueing.py\", line 625, in process_events\n",
" response = await route_utils.call_process_api(\n",
" ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
" File \"/opt/anaconda3/envs/llms/lib/python3.11/site-packages/gradio/route_utils.py\", line 322, in call_process_api\n",
" output = await app.get_blocks().process_api(\n",
" ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
" File \"/opt/anaconda3/envs/llms/lib/python3.11/site-packages/gradio/blocks.py\", line 2103, in process_api\n",
" result = await self.call_function(\n",
" ^^^^^^^^^^^^^^^^^^^^^^^^^\n",
" File \"/opt/anaconda3/envs/llms/lib/python3.11/site-packages/gradio/blocks.py\", line 1662, in call_function\n",
" prediction = await utils.async_iteration(iterator)\n",
" ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
" File \"/opt/anaconda3/envs/llms/lib/python3.11/site-packages/gradio/utils.py\", line 735, in async_iteration\n",
" return await anext(iterator)\n",
" ^^^^^^^^^^^^^^^^^^^^^\n",
" File \"/opt/anaconda3/envs/llms/lib/python3.11/site-packages/gradio/utils.py\", line 729, in __anext__\n",
" return await anyio.to_thread.run_sync(\n",
" ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
" File \"/opt/anaconda3/envs/llms/lib/python3.11/site-packages/anyio/to_thread.py\", line 56, in run_sync\n",
" return await get_async_backend().run_sync_in_worker_thread(\n",
" ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
" File \"/opt/anaconda3/envs/llms/lib/python3.11/site-packages/anyio/_backends/_asyncio.py\", line 2461, in run_sync_in_worker_thread\n",
" return await future\n",
" ^^^^^^^^^^^^\n",
" File \"/opt/anaconda3/envs/llms/lib/python3.11/site-packages/anyio/_backends/_asyncio.py\", line 962, in run\n",
" result = context.run(func, *args)\n",
" ^^^^^^^^^^^^^^^^^^^^^^^^\n",
" File \"/opt/anaconda3/envs/llms/lib/python3.11/site-packages/gradio/utils.py\", line 712, in run_sync_iterator_async\n",
" return next(iterator)\n",
" ^^^^^^^^^^^^^^\n",
" File \"/opt/anaconda3/envs/llms/lib/python3.11/site-packages/gradio/utils.py\", line 873, in gen_wrapper\n",
" response = next(iterator)\n",
" ^^^^^^^^^^^^^^\n",
" File \"/var/folders/yc/m81x80gn66j4fbm15pk5gmfr0000gn/T/ipykernel_39727/601932735.py\", line 15, in stream_brochure\n",
" {\"role\": \"user\", \"content\": get_brochure_user_prompt(company_name, url)}\n",
" ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
" File \"/var/folders/yc/m81x80gn66j4fbm15pk5gmfr0000gn/T/ipykernel_39727/3764629295.py\", line 6, in get_brochure_user_prompt\n",
" user_prompt += get_all_details(url)\n",
" ^^^^^^^^^^^^^^^^^^^^\n",
" File \"/var/folders/yc/m81x80gn66j4fbm15pk5gmfr0000gn/T/ipykernel_39727/2913862724.py\", line 5, in get_all_details\n",
" result += Website(url).get_contents()\n",
" ^^^^^^^^^^^^\n",
" File \"/var/folders/yc/m81x80gn66j4fbm15pk5gmfr0000gn/T/ipykernel_39727/1579423502.py\", line 15, in __init__\n",
" response = requests.get(url, headers=headers)\n",
" ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
" File \"/opt/anaconda3/envs/llms/lib/python3.11/site-packages/requests/api.py\", line 73, in get\n",
" return request(\"get\", url, params=params, **kwargs)\n",
" ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
" File \"/opt/anaconda3/envs/llms/lib/python3.11/site-packages/requests/api.py\", line 59, in request\n",
" return session.request(method=method, url=url, **kwargs)\n",
" ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
" File \"/opt/anaconda3/envs/llms/lib/python3.11/site-packages/requests/sessions.py\", line 589, in request\n",
" resp = self.send(prep, **send_kwargs)\n",
" ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
" File \"/opt/anaconda3/envs/llms/lib/python3.11/site-packages/requests/sessions.py\", line 703, in send\n",
" r = adapter.send(request, **kwargs)\n",
" ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
" File \"/opt/anaconda3/envs/llms/lib/python3.11/site-packages/requests/adapters.py\", line 698, in send\n",
" raise SSLError(e, request=request)\n",
"requests.exceptions.SSLError: HTTPSConnectionPool(host='petrofac.com', port=443): Max retries exceeded with url: / (Caused by SSLError(SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1006)')))\n",
"Traceback (most recent call last):\n",
" File \"/opt/anaconda3/envs/llms/lib/python3.11/site-packages/urllib3/connectionpool.py\", line 464, in _make_request\n",
" self._validate_conn(conn)\n",
" File \"/opt/anaconda3/envs/llms/lib/python3.11/site-packages/urllib3/connectionpool.py\", line 1093, in _validate_conn\n",
" conn.connect()\n",
" File \"/opt/anaconda3/envs/llms/lib/python3.11/site-packages/urllib3/connection.py\", line 741, in connect\n",
" sock_and_verified = _ssl_wrap_socket_and_match_hostname(\n",
" ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
" File \"/opt/anaconda3/envs/llms/lib/python3.11/site-packages/urllib3/connection.py\", line 920, in _ssl_wrap_socket_and_match_hostname\n",
" ssl_sock = ssl_wrap_socket(\n",
" ^^^^^^^^^^^^^^^^\n",
" File \"/opt/anaconda3/envs/llms/lib/python3.11/site-packages/urllib3/util/ssl_.py\", line 460, in ssl_wrap_socket\n",
" ssl_sock = _ssl_wrap_socket_impl(sock, context, tls_in_tls, server_hostname)\n",
" ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
" File \"/opt/anaconda3/envs/llms/lib/python3.11/site-packages/urllib3/util/ssl_.py\", line 504, in _ssl_wrap_socket_impl\n",
" return ssl_context.wrap_socket(sock, server_hostname=server_hostname)\n",
" ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
" File \"/opt/anaconda3/envs/llms/lib/python3.11/ssl.py\", line 517, in wrap_socket\n",
" return self.sslsocket_class._create(\n",
" ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
" File \"/opt/anaconda3/envs/llms/lib/python3.11/ssl.py\", line 1104, in _create\n",
" self.do_handshake()\n",
" File \"/opt/anaconda3/envs/llms/lib/python3.11/ssl.py\", line 1382, in do_handshake\n",
" self._sslobj.do_handshake()\n",
"ssl.SSLCertVerificationError: [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1006)\n",
"\n",
"During handling of the above exception, another exception occurred:\n",
"\n",
"Traceback (most recent call last):\n",
" File \"/opt/anaconda3/envs/llms/lib/python3.11/site-packages/urllib3/connectionpool.py\", line 787, in urlopen\n",
" response = self._make_request(\n",
" ^^^^^^^^^^^^^^^^^^^\n",
" File \"/opt/anaconda3/envs/llms/lib/python3.11/site-packages/urllib3/connectionpool.py\", line 488, in _make_request\n",
" raise new_e\n",
"urllib3.exceptions.SSLError: [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1006)\n",
"\n",
"The above exception was the direct cause of the following exception:\n",
"\n",
"Traceback (most recent call last):\n",
" File \"/opt/anaconda3/envs/llms/lib/python3.11/site-packages/requests/adapters.py\", line 667, in send\n",
" resp = conn.urlopen(\n",
" ^^^^^^^^^^^^^\n",
" File \"/opt/anaconda3/envs/llms/lib/python3.11/site-packages/urllib3/connectionpool.py\", line 841, in urlopen\n",
" retries = retries.increment(\n",
" ^^^^^^^^^^^^^^^^^^\n",
" File \"/opt/anaconda3/envs/llms/lib/python3.11/site-packages/urllib3/util/retry.py\", line 519, in increment\n",
" raise MaxRetryError(_pool, url, reason) from reason # type: ignore[arg-type]\n",
" ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
"urllib3.exceptions.MaxRetryError: HTTPSConnectionPool(host='petrofac.com', port=443): Max retries exceeded with url: / (Caused by SSLError(SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1006)')))\n",
"\n",
"During handling of the above exception, another exception occurred:\n",
"\n",
"Traceback (most recent call last):\n",
" File \"/opt/anaconda3/envs/llms/lib/python3.11/site-packages/gradio/queueing.py\", line 625, in process_events\n",
" response = await route_utils.call_process_api(\n",
" ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
" File \"/opt/anaconda3/envs/llms/lib/python3.11/site-packages/gradio/route_utils.py\", line 322, in call_process_api\n",
" output = await app.get_blocks().process_api(\n",
" ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
" File \"/opt/anaconda3/envs/llms/lib/python3.11/site-packages/gradio/blocks.py\", line 2103, in process_api\n",
" result = await self.call_function(\n",
" ^^^^^^^^^^^^^^^^^^^^^^^^^\n",
" File \"/opt/anaconda3/envs/llms/lib/python3.11/site-packages/gradio/blocks.py\", line 1662, in call_function\n",
" prediction = await utils.async_iteration(iterator)\n",
" ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
" File \"/opt/anaconda3/envs/llms/lib/python3.11/site-packages/gradio/utils.py\", line 735, in async_iteration\n",
" return await anext(iterator)\n",
" ^^^^^^^^^^^^^^^^^^^^^\n",
" File \"/opt/anaconda3/envs/llms/lib/python3.11/site-packages/gradio/utils.py\", line 729, in __anext__\n",
" return await anyio.to_thread.run_sync(\n",
" ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
" File \"/opt/anaconda3/envs/llms/lib/python3.11/site-packages/anyio/to_thread.py\", line 56, in run_sync\n",
" return await get_async_backend().run_sync_in_worker_thread(\n",
" ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
" File \"/opt/anaconda3/envs/llms/lib/python3.11/site-packages/anyio/_backends/_asyncio.py\", line 2461, in run_sync_in_worker_thread\n",
" return await future\n",
" ^^^^^^^^^^^^\n",
" File \"/opt/anaconda3/envs/llms/lib/python3.11/site-packages/anyio/_backends/_asyncio.py\", line 962, in run\n",
" result = context.run(func, *args)\n",
" ^^^^^^^^^^^^^^^^^^^^^^^^\n",
" File \"/opt/anaconda3/envs/llms/lib/python3.11/site-packages/gradio/utils.py\", line 712, in run_sync_iterator_async\n",
" return next(iterator)\n",
" ^^^^^^^^^^^^^^\n",
" File \"/opt/anaconda3/envs/llms/lib/python3.11/site-packages/gradio/utils.py\", line 873, in gen_wrapper\n",
" response = next(iterator)\n",
" ^^^^^^^^^^^^^^\n",
" File \"/var/folders/yc/m81x80gn66j4fbm15pk5gmfr0000gn/T/ipykernel_39727/601932735.py\", line 15, in stream_brochure\n",
" {\"role\": \"user\", \"content\": get_brochure_user_prompt(company_name, url)}\n",
" ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
" File \"/var/folders/yc/m81x80gn66j4fbm15pk5gmfr0000gn/T/ipykernel_39727/3764629295.py\", line 6, in get_brochure_user_prompt\n",
" user_prompt += get_all_details(url)\n",
" ^^^^^^^^^^^^^^^^^^^^\n",
" File \"/var/folders/yc/m81x80gn66j4fbm15pk5gmfr0000gn/T/ipykernel_39727/2913862724.py\", line 5, in get_all_details\n",
" result += Website(url).get_contents()\n",
" ^^^^^^^^^^^^\n",
" File \"/var/folders/yc/m81x80gn66j4fbm15pk5gmfr0000gn/T/ipykernel_39727/1579423502.py\", line 15, in __init__\n",
" response = requests.get(url, headers=headers)\n",
" ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
" File \"/opt/anaconda3/envs/llms/lib/python3.11/site-packages/requests/api.py\", line 73, in get\n",
" return request(\"get\", url, params=params, **kwargs)\n",
" ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
" File \"/opt/anaconda3/envs/llms/lib/python3.11/site-packages/requests/api.py\", line 59, in request\n",
" return session.request(method=method, url=url, **kwargs)\n",
" ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
" File \"/opt/anaconda3/envs/llms/lib/python3.11/site-packages/requests/sessions.py\", line 589, in request\n",
" resp = self.send(prep, **send_kwargs)\n",
" ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
" File \"/opt/anaconda3/envs/llms/lib/python3.11/site-packages/requests/sessions.py\", line 703, in send\n",
" r = adapter.send(request, **kwargs)\n",
" ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
" File \"/opt/anaconda3/envs/llms/lib/python3.11/site-packages/requests/adapters.py\", line 698, in send\n",
" raise SSLError(e, request=request)\n",
"requests.exceptions.SSLError: HTTPSConnectionPool(host='petrofac.com', port=443): Max retries exceeded with url: / (Caused by SSLError(SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1006)')))\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Found links: {'links': [{'type': 'about page', 'url': 'https://www.petrofac.com/who-we-are/'}, {'type': 'what we do page', 'url': 'https://www.petrofac.com/who-we-are/what-we-do/'}, {'type': 'careers page', 'url': 'https://www.petrofac.com/careers/'}, {'type': 'our structure page', 'url': 'https://www.petrofac.com/who-we-are/our-structure/'}, {'type': 'energy transition page', 'url': 'https://www.petrofac.com/who-we-are/energy-transition/'}, {'type': 'sustainability and ESG page', 'url': 'https://www.petrofac.com/who-we-are/sustainability-and-esg/'}, {'type': 'investor relations page', 'url': 'https://www.petrofac.com/investors/'}, {'type': 'services page', 'url': 'https://www.petrofac.com/services/'}, {'type': 'where we operate page', 'url': 'https://www.petrofac.com/where-we-operate/'}]}\n"
]
}
],
"source": [
"view = gr.Interface(\n",
" fn=stream_brochure,\n",
" inputs=[\n",
" gr.Textbox(label=\"Company name:\"),\n",
" gr.Textbox(label=\"Landing page URL including http:// or https://\"),\n",
" gr.Textbox(label=\"Tone:\")],\n",
" outputs=[gr.Markdown(label=\"Brochure:\")],\n",
" flagging_mode=\"never\"\n",
")\n",
"view.launch(inbrowser=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "70d6398c-21dd-44f8-ba7d-0204414dffa0",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.11"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@@ -0,0 +1,275 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "ddfa9ae6-69fe-444a-b994-8c4c5970a7ec",
"metadata": {},
"source": [
"# Project - Airline AI Assistant\n",
"\n",
"We'll now bring together what we've learned to make an AI Customer Support assistant for an Airline"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "8b50bbe2-c0b1-49c3-9a5c-1ba7efa2bcb4",
"metadata": {},
"outputs": [],
"source": [
"# imports\n",
"\n",
"import os\n",
"import json\n",
"from dotenv import load_dotenv\n",
"from openai import OpenAI\n",
"import gradio as gr"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "747e8786-9da8-4342-b6c9-f5f69c2e22ae",
"metadata": {},
"outputs": [],
"source": [
"# Initialization\n",
"\n",
"load_dotenv(override=True)\n",
"\n",
"openai_api_key = os.getenv('OPENAI_API_KEY')\n",
"if openai_api_key:\n",
" print(f\"OpenAI API Key exists and be\\\\gins {openai_api_key[:8]}\")\n",
"else:\n",
" print(\"OpenAI API Key not set\")\n",
" \n",
"MODEL = \"gpt-4o-mini\"\n",
"openai = OpenAI()\n",
"\n",
"# As an alternative, if you'd like to use Ollama instead of OpenAI\n",
"# Check that Ollama is running for you locally (see week1/day2 exercise) then uncomment these next 2 lines\n",
"# MODEL = \"llama3.2\"\n",
"# openai = OpenAI(base_url='http://localhost:11434/v1', api_key='ollama')\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0a521d84-d07c-49ab-a0df-d6451499ed97",
"metadata": {},
"outputs": [],
"source": [
"system_message = \"You are a helpful assistant for an Airline called FlightAI. \"\n",
"system_message += \"Give short, courteous answers, no more than 1 sentence. \"\n",
"system_message += \"Always be accurate. If you don't know the answer, say so.\""
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "61a2a15d-b559-4844-b377-6bd5cb4949f6",
"metadata": {},
"outputs": [],
"source": [
"# This function looks rather simpler than the one from my video, because we're taking advantage of the latest Gradio updates\n",
"\n",
"def chat(message, history):\n",
" messages = [\n",
" {\"role\": \"system\", \"content\": system_message}\n",
" ] + history + [\n",
" {\"role\": \"user\", \"content\": message}\n",
" ]\n",
" response = openai.chat.completions.create(model=MODEL, messages=messages)\n",
" return response.choices[0].message.content\n",
"\n",
"gr.ChatInterface(fn=chat, type=\"messages\").launch()"
]
},
{
"cell_type": "markdown",
"id": "36bedabf-a0a7-4985-ad8e-07ed6a55a3a4",
"metadata": {},
"source": [
"## Tools\n",
"\n",
"Tools are an incredibly powerful feature provided by the frontier LLMs.\n",
"\n",
"With tools, you can write a function, and have the LLM call that function as part of its response.\n",
"\n",
"Sounds almost spooky.. we're giving it the power to run code on our machine?\n",
"\n",
"Well, kinda."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0696acb1-0b05-4dc2-80d5-771be04f1fb2",
"metadata": {},
"outputs": [],
"source": [
"# Let's start by making a useful function\n",
"\n",
"ticket_prices = {\"london\": \"$799\", \"paris\": \"$899\", \"tokyo\": \"$1400\", \"berlin\": \"$499\"}\n",
"\n",
"def get_ticket_price(destination_city):\n",
" print(f\"Tool get_ticket_price called for {destination_city}\")\n",
" city = destination_city.lower()\n",
" return ticket_prices.get(city, \"Unknown\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "80ca4e09-6287-4d3f-997d-fa6afbcf6c85",
"metadata": {},
"outputs": [],
"source": [
"get_ticket_price(\"Berlin\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4afceded-7178-4c05-8fa6-9f2085e6a344",
"metadata": {},
"outputs": [],
"source": [
"# There's a particular dictionary structure that's required to describe our function:\n",
"\n",
"price_function = {\n",
" \"name\": \"get_ticket_price\",\n",
" \"description\": \"Get the price of a return ticket to the destination city. Call this whenever you need to know the ticket price, for example when a customer asks 'How much is a ticket to this city'\",\n",
" \"parameters\": {\n",
" \"type\": \"object\",\n",
" \"properties\": {\n",
" \"destination_city\": {\n",
" \"type\": \"string\",\n",
" \"description\": \"The city that the customer wants to travel to\",\n",
" },\n",
" },\n",
" \"required\": [\"destination_city\"],\n",
" \"additionalProperties\": False\n",
" }\n",
"}"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "bdca8679-935f-4e7f-97e6-e71a4d4f228c",
"metadata": {},
"outputs": [],
"source": [
"# And this is included in a list of tools:\n",
"\n",
"tools = [{\"type\": \"function\", \"function\": price_function}]"
]
},
{
"cell_type": "markdown",
"id": "c3d3554f-b4e3-4ce7-af6f-68faa6dd2340",
"metadata": {},
"source": [
"## Getting OpenAI to use our Tool\n",
"\n",
"There's some fiddly stuff to allow OpenAI \"to call our tool\"\n",
"\n",
"What we actually do is give the LLM the opportunity to inform us that it wants us to run the tool.\n",
"\n",
"Here's how the new chat function looks:"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ad32321f-083a-4462-a6d6-7bb3b0f5d10a",
"metadata": {},
"outputs": [],
"source": [
"# We have to write that function handle_tool_call:\n",
"\n",
"def handle_tool_call(message): \n",
" responses = []\n",
" for tool_call in message.tool_calls: \n",
" if tool_call.function.name == \"get_ticket_price\":\n",
" arguments = json.loads(tool_call.function.arguments)\n",
" city = arguments.get('destination_city')\n",
" price = get_ticket_price(city)\n",
" response = {\n",
" \"role\": \"tool\",\n",
" \"content\": json.dumps({\"destination_city\": city,\"price\": price}),\n",
" \"tool_call_id\": tool_call.id\n",
" }\n",
" responses.append(response)\n",
" return responses"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ce9b0744-9c78-408d-b9df-9f6fd9ed78cf",
"metadata": {},
"outputs": [],
"source": [
"def chat(message, history):\n",
" messages = [\n",
" {\"role\": \"system\", \"content\": system_message}\n",
" ] + history + [\n",
" {\"role\": \"user\", \"content\": message}\n",
" ]\n",
" response = openai.chat.completions.create(model=MODEL, messages=messages, tools=tools)\n",
"\n",
" # Tool usage\n",
" if response.choices[0].finish_reason==\"tool_calls\":\n",
" message = response.choices[0].message\n",
" responses = handle_tool_call(message)\n",
" messages.append(message) # That's the assistant asking us to run a tool\n",
" for response in responses:\n",
" messages.append(response) # That's the result of the tool calls\n",
" response = openai.chat.completions.create(model=MODEL, messages=messages)\n",
" \n",
" return response.choices[0].message.content"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f4be8a71-b19e-4c2f-80df-f59ff2661f14",
"metadata": {},
"outputs": [],
"source": [
"gr.ChatInterface(fn=chat, type=\"messages\").launch()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "8dc18486-4d6b-4cbf-a6b8-16d08d7c4f54",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.13.2"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@@ -0,0 +1,167 @@
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
}
},
"cells": [
{
"cell_type": "markdown",
"source": [
"Import libraries as needed & keep your gemini api key ready"
],
"metadata": {
"id": "2UAcHYzT6ikw"
}
},
{
"cell_type": "code",
"source": [
"#!pip install gradio"
],
"metadata": {
"id": "XW0IY4xK6JZ1"
},
"execution_count": 14,
"outputs": []
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"id": "dwoPNMMP4ZSh"
},
"outputs": [],
"source": [
"from google import genai\n",
"from google.genai import types\n",
"from google.colab import userdata\n",
"\n"
]
},
{
"cell_type": "code",
"source": [
"def get_trip_itinerary(budget: int) -> str:\n",
" \"\"\"\n",
" Returns a trip itinerary based on the given budget.\n",
" \"\"\"\n",
" itinerary_dict: Dict[int, str] = {\n",
" 500: \"Paris: 3-day budget trip covering Eiffel Tower, Louvre, and Seine River Cruise.\",\n",
" 1000: \"Tokyo: 5-day adventure covering Shibuya, Akihabara, Mount Fuji day trip.\",\n",
" 2000: \"New York: 7-day luxury stay covering Times Square, Broadway show, and helicopter tour.\",\n",
" 3000: \"Dubai: 7-day ultra-luxury trip with Burj Khalifa VIP tour, desert safari, and yacht cruise.\",\n",
" }\n",
"\n",
" return itinerary_dict.get(budget, \"No itinerary found for this budget. Try another amount!\")\n"
],
"metadata": {
"id": "cnYD07T24ueV"
},
"execution_count": 3,
"outputs": []
},
{
"cell_type": "code",
"source": [
"from google.genai import types\n",
"\n",
"config = types.GenerateContentConfig(tools=[get_trip_itinerary])\n",
"\n",
"from google import genai\n",
"\n",
"client = genai.Client(api_key=userdata.get('gemini_api'))\n",
"\n",
"response = client.models.generate_content(\n",
" model='gemini-2.0-flash',\n",
" config=config,\n",
" contents='Based on the user budget suggest trip itinerary'\n",
")\n"
],
"metadata": {
"id": "3WRUXvD45VFC"
},
"execution_count": 7,
"outputs": []
},
{
"cell_type": "code",
"source": [
"import gradio as gr\n",
"\n",
"# Chat function using Gemini\n",
"chat = client.chats.create(model='gemini-2.0-flash', config=config)\n",
"\n",
"def chat_with_ai(user_input: str):\n",
" response = chat.send_message(user_input)\n",
" return response.text\n",
"\n",
"# Gradio Chat Interface\n",
"demo = gr.Interface(fn=chat_with_ai, inputs=\"text\", outputs=\"text\", title=\"AI Trip Planner\")\n",
"\n",
"demo.launch()\n"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 645
},
"id": "5fE700z96DHs",
"outputId": "3e35423c-8b2b-4868-8113-00d9d3a7a2ba"
},
"execution_count": 13,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).\n",
"\n",
"Colab notebook detected. To show errors in colab notebook, set debug=True in launch()\n",
"* Running on public URL: https://079a23f363400da700.gradio.live\n",
"\n",
"This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)\n"
]
},
{
"output_type": "display_data",
"data": {
"text/plain": [
"<IPython.core.display.HTML object>"
],
"text/html": [
"<div><iframe src=\"https://079a23f363400da700.gradio.live\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
]
},
"metadata": {}
},
{
"output_type": "execute_result",
"data": {
"text/plain": []
},
"metadata": {},
"execution_count": 13
}
]
},
{
"cell_type": "code",
"source": [],
"metadata": {
"id": "XC9zzq8X5u8m"
},
"execution_count": null,
"outputs": []
}
]
}