Updated .gitignore

This commit is contained in:
Edward Donner
2025-09-16 18:52:33 -04:00
221 changed files with 53467 additions and 2 deletions

4
.gitignore vendored
View File

@@ -190,3 +190,7 @@ nohup.out
scraper_cache/
challenge/
# WandB local sync data.
wandb/

View File

@@ -0,0 +1,300 @@
{
"cells": [
{
"cell_type": "markdown",
"source": [
"## Web2Quiz: Generator Quiz from webpage content."
],
"metadata": {
"id": "n3vd295elWxh"
},
"id": "n3vd295elWxh"
},
{
"cell_type": "code",
"execution_count": null,
"id": "f4484fcf-8b39-4c3f-9674-37970ed71988",
"metadata": {
"id": "f4484fcf-8b39-4c3f-9674-37970ed71988"
},
"outputs": [],
"source": [
"#.env upload\n",
"from google.colab import files\n",
"uploaded = files.upload()"
]
},
{
"cell_type": "code",
"source": [
"!pip install dotenv\n"
],
"metadata": {
"id": "VTpN_jVbMKuk"
},
"id": "VTpN_jVbMKuk",
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"import os\n",
"from dotenv import load_dotenv"
],
"metadata": {
"id": "twYi9eJwL2h1"
},
"id": "twYi9eJwL2h1",
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"load_dotenv(override=True)\n",
"api_key = os.getenv('OPENROUTER_KEY')\n",
"\n",
"# Check the key\n",
"if not api_key:\n",
" print(\"No API key was found - please head over to the troubleshooting notebook in this folder to identify & fix!\")\n",
"# elif not api_key.startswith(\"sk-proj-\"):\n",
"# print(\"An API key was found, but it doesn't start sk-proj-; please check you're using the right key - see troubleshooting notebook\")\n",
"elif api_key.strip() != api_key:\n",
" print(\"An API key was found, but it looks like it might have space or tab characters at the start or end - please remove them - see troubleshooting notebook\")\n",
"else:\n",
" print(\"API key found and looks good so far!\")\n"
],
"metadata": {
"id": "NRnUTEkZL2eZ"
},
"id": "NRnUTEkZL2eZ",
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"!pip install openai"
],
"metadata": {
"id": "RRuKJ_pzL2be"
},
"id": "RRuKJ_pzL2be",
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"!pip install requests beautifulsoup4\n",
"!pip install selenium"
],
"metadata": {
"id": "DWsPpdjOVPTW"
},
"id": "DWsPpdjOVPTW",
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"from bs4 import BeautifulSoup\n",
"import requests\n",
"from tempfile import mkdtemp\n",
"from selenium import webdriver\n",
"from selenium.webdriver.chrome.options import Options\n",
"from selenium.webdriver.support.ui import WebDriverWait\n",
"from selenium.webdriver.support import expected_conditions as EC\n",
"from selenium.webdriver.common.by import By\n",
"\n",
"class Website:\n",
" def __init__(self, url, use_selenium=False):\n",
" \"\"\"\n",
" Create Website object from the given URL.\n",
" If use_selenium=True, fetch page with Selenium.\n",
" Otherwise, use requests + BeautifulSoup.\n",
" \"\"\"\n",
" self.url = url\n",
" self.title = \"\"\n",
" self.text = \"\"\n",
" self.use_selenium = use_selenium\n",
"\n",
" if self.use_selenium:\n",
" html = self._fetch_with_selenium()\n",
" else:\n",
" html = self._fetch_with_requests()\n",
"\n",
" if not html:\n",
" self.title = \"Error fetching page\"\n",
" self.text = \"Could not retrieve HTML content.\"\n",
" return\n",
"\n",
" soup = BeautifulSoup(html, \"html.parser\")\n",
" self.title = soup.title.string if soup.title else \"No title found\"\n",
"\n",
" # content_div = soup.find('div', id='content')\n",
" if soup.body:\n",
" for irrelevant in soup.body([\"script\", \"style\", \"img\", \"input\", \"header\", \"footer\", \"nav\", \"aside\"]):\n",
" irrelevant.decompose()\n",
" self.text = soup.body.get_text(separator=\"\\n\", strip=True)\n",
" else:\n",
" self.text = \"No body tag found in the HTML.\"\n",
"\n",
" # Basic html scrapper\n",
" def _fetch_with_requests(self):\n",
" \"\"\"Fetch HTML using requests.\"\"\"\n",
" try:\n",
" headers = {\"User-Agent\": \"Mozilla/5.0\"}\n",
" response = requests.get(self.url, headers=headers, timeout=10)\n",
" response.raise_for_status()\n",
" return response.text\n",
" except requests.exceptions.RequestException as e:\n",
" print(f\"Error fetching with requests: {e}\")\n",
" return None\n",
"\n",
" # Dynamic html scrapper\n",
" def _fetch_with_selenium(self):\n",
" \"\"\"Fetch HTML using Selenium with improved options.\"\"\"\n",
" options = Options()\n",
" options.add_argument(\"--no-sandbox\")\n",
" options.add_argument(\"--disable-dev-shm-usage\")\n",
" options.add_argument(\"--headless\")\n",
" options.add_argument(f\"--user-data-dir={mkdtemp()}\")\n",
"\n",
" driver = None\n",
" try:\n",
" driver = webdriver.Chrome(options=options)\n",
" driver.get(self.url)\n",
"\n",
" WebDriverWait(driver, 10).until(\n",
" EC.presence_of_element_located((By.TAG_NAME, \"body\"))\n",
" )\n",
"\n",
" html = driver.page_source\n",
" return html\n",
" except Exception as e:\n",
" print(f\"An error occurred during Selenium fetch: {e}\")\n",
" return None\n",
" finally:\n",
" if driver:\n",
" driver.quit()\n",
"\n"
],
"metadata": {
"id": "PzBP0tXXcrP-"
},
"id": "PzBP0tXXcrP-",
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"site1 = Website(\"https://en.wikipedia.org/wiki/Integration_testing\", use_selenium=False)\n",
"print(\"Title:\", site1.title)\n",
"print(\"Text preview:\", site1.text[:200])\n",
"\n",
"site2 = Website(\"https://www.tpointtech.com/java-for-loop\", use_selenium=True)\n",
"print(\"Title:\", site2.title)\n",
"print(\"Text preview:\", site2.text[:200])"
],
"metadata": {
"id": "vsNmh5b5c6Gq"
},
"id": "vsNmh5b5c6Gq",
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"# Step 1: Create your prompts\n",
"system_prompt = f\"You are a MCQ quiz generator. Analyze the provided TEXT and filter CONTENT relevent to {site1.title}. Then based on the relevant CONTENT generate 10 MCQs. List all correct options at the end.\"\n",
"user_prompt = f\"Below is provided TEXT : \\n{site1.text}\"\n",
"\n",
"# Step 2: Make the messages list\n",
"messages = [\n",
" {\"role\": \"system\", \"content\": system_prompt},\n",
" {\"role\": \"user\", \"content\": user_prompt}\n",
"]\n",
"\n",
"# Step 3: Call OpenAI\n",
"openai = OpenAI(base_url=\"https://openrouter.ai/api/v1\", api_key=api_key)\n",
"\n",
"# Step 4: print the result\n",
"response = openai.chat.completions.create(model=\"qwen/qwen2.5-vl-72b-instruct:free\", messages=messages)\n",
"print(response.choices[0].message.content)"
],
"metadata": {
"collapsed": true,
"id": "BYdc1w70QFD2"
},
"id": "BYdc1w70QFD2",
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"# Step 1: Create your prompts\n",
"system_prompt = f\"You are a MCQ quiz generator. Analyze the provided TEXT and filter CONTENT relevent to {site2.title}. Then based on the relevant CONTENT generate 10 MCQs. List all correct options at the end.\"\n",
"user_prompt = f\"Below is provided TEXT : \\n{site2.text}\"\n",
"\n",
"# Step 2: Make the messages list\n",
"messages = [\n",
" {\"role\": \"system\", \"content\": system_prompt},\n",
" {\"role\": \"user\", \"content\": user_prompt}\n",
"]\n",
"\n",
"# Step 3: Call OpenAI\n",
"openai = OpenAI(base_url=\"https://openrouter.ai/api/v1\", api_key=api_key)\n",
"\n",
"# Step 4: print the result\n",
"response = openai.chat.completions.create(model=\"qwen/qwen2.5-vl-72b-instruct:free\", messages=messages)\n",
"print(response.choices[0].message.content)"
],
"metadata": {
"id": "Rv8vxFHtQFBm"
},
"id": "Rv8vxFHtQFBm",
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [],
"metadata": {
"id": "o5tIkQ95_2Hc"
},
"id": "o5tIkQ95_2Hc",
"execution_count": null,
"outputs": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.12"
},
"colab": {
"provenance": []
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@@ -0,0 +1,230 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "2a5df086",
"metadata": {},
"source": [
"# If Anyone is interested in this idea and want to contribute please let me know and contribute your idea/Code\n"
]
},
{
"cell_type": "markdown",
"id": "3b0d5f6e",
"metadata": {},
"source": [
"*IDEA* - For visually impaired individuals, daily life often presents numerous obstacles that many of us take for granted. While tools like Braille and guide dogs offer some support, they do not fully address the limitations faced in navigating the world. With over 43.3 million blind people globally, there is a pressing need for more inclusive technologies that help break these barriers. This project aims to do more than assist with daily tasks; it seeks to empower individuals to engage meaningfully with their environment. By providing real-time, contextually accurate captions, this system allows them to experience the world around them, feel less isolated, and regain a sense of autonomy. Beyond just aiding navigation, it provides a bridge to connection—helping them feel more alive, present, and capable. This project is not just about overcoming limitations; its about enriching lives and enabling a deeper, fuller interaction with the world, fostering a sense of belonging and independence.\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f97c7598-f571-4ea1-838c-e9158f729c3e",
"metadata": {},
"outputs": [],
"source": [
"import ollama\n",
"import base64\n",
"import os"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9fc1393c-f0b1-4982-94a2-bfd502e85b23",
"metadata": {},
"outputs": [],
"source": [
"def encode_image(image_path):\n",
" with open(image_path, 'rb') as f:\n",
" return base64.b64encode(f.read()).decode('utf-8')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "53cca1fa-6db2-4fe4-8990-ffd98423964a",
"metadata": {},
"outputs": [],
"source": [
"# image_path = r\"C:\\Users\\LAKSHYA\\OneDrive\\Pictures\\Camera Roll\\WIN_20250614_02_46_47_Pro.jpg\"\n",
"# image_base64 = encode_image(image_path)\n",
"# print(image_base64[:100]) "
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "71146ccf-25af-48d3-8068-ee3c9008cebf",
"metadata": {},
"outputs": [],
"source": [
"image_list = []"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6f8801a8-0c30-4199-a334-587096e6edeb",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "ee3c5d82-e530-40f5-901a-681421f21d1e",
"metadata": {},
"outputs": [],
"source": [
"def put_image():\n",
" global image_list\n",
" user_input_image = input(\"Enter image path or press enter to skip: \").strip()\n",
" \n",
" if not user_input_image:\n",
" print(\"No image inserted\")\n",
" return image_list\n",
"\n",
" image_path = os.path.normpath(user_input_image)\n",
" \n",
" if not os.path.exists(image_path):\n",
" print(\"Image path not found! Try again or enter to leave blank\")\n",
" return put_image() # Continue to allow more inputs\n",
" \n",
"\n",
"\n",
"\n",
" \n",
" image_base64 = encode_image(image_path)\n",
" image_list.append(image_base64)\n",
" \n",
" # Detect file extension for MIME type\n",
" # ext = os.path.splitext(image_path)[-1].lower()\n",
" # mime_type = 'image/jpeg' if ext in ['.jpg', '.jpeg'] else 'image/png' # Extend if needed\n",
"\n",
"\n",
" return image_list\n",
" \n",
" # return f\"data:{mime_type};base64,{image_base64[:100]}\"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "032f1abb-ca6c-4f03-bda1-1a0a62f2ec43",
"metadata": {},
"outputs": [],
"source": [
"prompt= (\"System prompt: (You are a compassionate and intelligent visual assistant designed to help people who are blind or visually impaired. \"\n",
" \"Your job is to look at an image and describe it in a way that helps the user understand the scene clearly. \"\n",
" \"Use simple, descriptive language and avoid technical terms. Describe what is happening in the image, people's body language, clothing, facial expressions, objects, and surroundings. \"\n",
" \"Be vivid and precise, as if you are painting a picture with words. \"\n",
" \"Also, take into account any personal instructions or questions provided by the user—such as describing a specific person, activity, or object. \"\n",
" \"If the user includes a specific prompt, prioritize that in your description.)\")\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "29494db0-4770-4689-9904-8eebc4390e7c",
"metadata": {},
"outputs": [],
"source": [
"def put_prompt():\n",
" global prompt\n",
" user_input = input(\"Put new prompt: \")\n",
" if not user_input:\n",
" print(\"please enter a prompt\")\n",
" return put_prompt()\n",
" prompt += \"\\nUser: \" + user_input\n",
" return prompt\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d286369c-e6ef-4a20-a3a8-3563af28940a",
"metadata": {},
"outputs": [],
"source": [
"def image_description():\n",
" global prompt\n",
"\n",
" put_image()\n",
" if not image_list: \n",
" return \"No images available. Skipping...\"\n",
"\n",
" user_prompt = put_prompt()\n",
" full_answer = \"\"\n",
"\n",
" for chunk in ollama.generate(\n",
" model='llava:7b-v1.6',\n",
" prompt=user_prompt,\n",
" images=image_list,\n",
" stream=True\n",
" ):\n",
" content = chunk.get(\"response\", \"\")\n",
" print(\"\\n\\n Final Answer:\",content, end=\"\", flush=True) # Live stream to console\n",
" full_answer += content\n",
"\n",
" prompt += \"\\nUser: \" + user_prompt + \"\\nAssistant: \" + full_answer\n",
" return full_answer\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "cbda35a3-45ed-4509-ab41-6827eacd922c",
"metadata": {},
"outputs": [],
"source": [
"def call_llava():\n",
" image_list.clear()\n",
" for i in range(5):\n",
" print(f\"\\n Iteration {i+1}\")\n",
" answer = image_description()\n",
" print(\"\\n\\n Final Answer:\", answer)\n",
" \n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "15518865-6c59-4029-bc2d-42d313eb78bc",
"metadata": {},
"outputs": [],
"source": [
"call_llava()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c77bd493-f893-402e-b4e3-64854e9d2e19",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.13"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@@ -0,0 +1,433 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "f97c7598-f571-4ea1-838c-e9158f729c3e",
"metadata": {},
"outputs": [],
"source": [
"import ollama\n",
"import base64\n",
"import os"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9fc1393c-f0b1-4982-94a2-bfd502e85b23",
"metadata": {},
"outputs": [],
"source": [
"def encode_image(image_path):\n",
" with open(image_path, 'rb') as f:\n",
" return base64.b64encode(f.read()).decode('utf-8')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "53cca1fa-6db2-4fe4-8990-ffd98423964a",
"metadata": {},
"outputs": [],
"source": [
"# image_path = r\"C:\\Users\\LAKSHYA\\OneDrive\\Pictures\\Camera Roll\\WIN_20250614_02_46_47_Pro.jpg\"\n",
"# image_base64 = encode_image(image_path)\n",
"# print(image_base64[:100]) "
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "71146ccf-25af-48d3-8068-ee3c9008cebf",
"metadata": {},
"outputs": [],
"source": [
"image_list = []"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6f8801a8-0c30-4199-a334-587096e6edeb",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "ee3c5d82-e530-40f5-901a-681421f21d1e",
"metadata": {},
"outputs": [],
"source": [
"def put_image():\n",
" global image_list\n",
" user_input_image = input(\"Enter image path or press enter to skip: \").strip()\n",
" \n",
" if not user_input_image:\n",
" print(\"No image inserted\")\n",
" return image_list\n",
"\n",
" image_path = os.path.normpath(user_input_image)\n",
" \n",
" if not os.path.exists(image_path):\n",
" print(\"Image path not found! Try again or enter to leave blank\")\n",
" return put_image() # Continue to allow more inputs\n",
" \n",
"\n",
"\n",
"\n",
" \n",
" image_base64 = encode_image(image_path)\n",
" image_list.append(image_base64)\n",
" \n",
" # Detect file extension for MIME type\n",
" # ext = os.path.splitext(image_path)[-1].lower()\n",
" # mime_type = 'image/jpeg' if ext in ['.jpg', '.jpeg'] else 'image/png' # Extend if needed\n",
"\n",
"\n",
" return image_list\n",
" \n",
" # return f\"data:{mime_type};base64,{image_base64[:100]}\"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "032f1abb-ca6c-4f03-bda1-1a0a62f2ec43",
"metadata": {},
"outputs": [],
"source": [
"prompt= (\"System prompt: (You are a compassionate and intelligent visual assistant designed to help people who are blind or visually impaired. \"\n",
" \"Your job is to look at an image and describe it in a way that helps the user understand the scene clearly. \"\n",
" \"Use simple, descriptive language and avoid technical terms. Describe what is happening in the image, people's body language, clothing, facial expressions, objects, and surroundings. \"\n",
" \"Be vivid and precise, as if you are painting a picture with words. \"\n",
" \"Also, take into account any personal instructions or questions provided by the user—such as describing a specific person, activity, or object. \"\n",
" \"If the user includes a specific prompt, prioritize that in your description.)\")\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "29494db0-4770-4689-9904-8eebc4390e7c",
"metadata": {},
"outputs": [],
"source": [
"def put_prompt():\n",
" global prompt\n",
" user_input = input(\"Put new prompt: \")\n",
" if not user_input:\n",
" print(\"please enter a prompt\")\n",
" return put_prompt()\n",
" prompt += \"\\nUser: \" + user_input\n",
" return prompt\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d286369c-e6ef-4a20-a3a8-3563af28940a",
"metadata": {},
"outputs": [],
"source": [
"def image_description():\n",
" global prompt\n",
"\n",
" put_image()\n",
" if not image_list: \n",
" return \"No images available. Skipping...\"\n",
"\n",
" user_prompt = put_prompt()\n",
" full_answer = \"\"\n",
"\n",
" for chunk in ollama.generate(\n",
" model='llava:7b-v1.6',\n",
" prompt=user_prompt,\n",
" images=image_list,\n",
" stream=True\n",
" ):\n",
" content = chunk.get(\"response\", \"\")\n",
" print(\"\\n\\n Final Answer:\",content, end=\"\", flush=True) # Live stream to console\n",
" full_answer += content\n",
"\n",
" prompt += \"\\nUser: \" + user_prompt + \"\\nAssistant: \" + full_answer\n",
" return full_answer\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "cbda35a3-45ed-4509-ab41-6827eacd922c",
"metadata": {},
"outputs": [],
"source": [
"def call_llava():\n",
" image_list.clear()\n",
" for i in range(5):\n",
" print(f\"\\n Iteration {i+1}\")\n",
" answer = image_description()\n",
" print(\"\\n\\n Final Answer:\", answer)\n",
" \n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "15518865-6c59-4029-bc2d-42d313eb78bc",
"metadata": {},
"outputs": [],
"source": [
"call_llava()"
]
},
{
"cell_type": "markdown",
"id": "23de3b59-3699-4270-9392-99fccdede83e",
"metadata": {},
"source": [
"# second week practice on personal project making model faster and smarter by using tools\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9d44c59e-5eb7-4b00-9489-e05d7c8c3eda",
"metadata": {},
"outputs": [],
"source": [
"messages = []\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "061ea026-d4c6-4d6c-bb9b-f6430de9f5af",
"metadata": {},
"outputs": [],
"source": [
"system_content = (\n",
" \"You are a helpful assistant for visually impaired users. \"\n",
" \"You are capable of answering questions directly or calling a function to analyze an image if needed. \"\n",
" \"There is a list of images available, indexed from 0. \"\n",
" \"When a user asks a question, first determine whether any image in the list is needed to answer. \"\n",
" \"If yes, reply in this structured format:\\n\\n\"\n",
" \"TOOL_CALL: analyze_image(<image_index_or_range>, prompt='<description_request>')\\n\\n\"\n",
" \"If image is not needed, just answer the user directly in plain natural language.\\n\"\n",
" \"Be clear and use descriptive but accessible language suitable for blind users.\"\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "2f859450-eb3e-4e6c-9602-84f91f5ffda7",
"metadata": {},
"outputs": [],
"source": [
"messages.append({\"role\":\"system\",\"content\":system_content})"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a8009b75-3468-4694-887d-6cd5132c2907",
"metadata": {},
"outputs": [],
"source": [
"def chat_loop():\n",
" \"\"\"Main chat interaction loop (single-turn version)\"\"\"\n",
" global image_list, messages\n",
" \n",
" print(\"\\n\" + \"=\"*50)\n",
" print(\"LLaVA Assistant for Visually Impaired Users\")\n",
" print(\"=\"*50 + \"\\n\")\n",
" \n",
" # Step 1: Load images\n",
" print(\"Step 1: Add images (optional)\")\n",
" put_image()\n",
" messages.append({\n",
" \"role\": \"system\", \n",
" \"content\": f\"There are {len(image_list)} images available (index 0-{len(image_list)-1}).\"\n",
" })\n",
" \n",
" # Step 2: Single chat interaction\n",
" print(\"\\nStep 2: Ask a question about the images\")\n",
" user_content = put_prompt()\n",
" messages.append({\"role\": \"user\", \"content\": user_content})\n",
" \n",
" # Get model response\n",
" try:\n",
" response = ollama.chat(\n",
" model='llava:7b-v1.6',\n",
" messages=messages\n",
" )[\"message\"][\"content\"]\n",
" print(\"assistant: \",response) \n",
" processed_response = process_response(response)\n",
" print(f\"\\nASSISTANT: {processed_response}\\n\")\n",
" \n",
" except Exception as e:\n",
" print(f\"Error occurred: {e}\")\n",
" \n",
" print(\"\\nSession ended. Goodbye!\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a3b3ff73-3cd5-4e5a-a37e-aaa8b325613c",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "ee2de6d7-a0bf-45fc-8d5c-98e0055519b0",
"metadata": {},
"outputs": [],
"source": [
"def process_response(response):\n",
" \"\"\"Process the model's response and handle tool calls\"\"\"\n",
" if response.strip().startswith(\"TOOL_CALL:\"):\n",
" # Extract image index/range and prompt from TOOL_CALL\n",
" pattern = r\"TOOL_CALL:\\s*analyze_image\\((.*?)\\s*,\\s*prompt='(.*?)'\\)\"\n",
" match = re.search(pattern, response, re.DOTALL)\n",
" \n",
" if not match:\n",
" error_msg = \"Error: Invalid TOOL_CALL format.\"\n",
" messages.append({\"role\": \"assistant\", \"content\": error_msg})\n",
" return error_msg\n",
" \n",
" image_expr = match.group(1).strip()\n",
" prompt = match.group(2).strip()\n",
" \n",
" try:\n",
" # Handle different index formats\n",
" if \":\" in image_expr: # Range (e.g., \"1:3\")\n",
" start, end = map(int, image_expr.split(\":\"))\n",
" index_or_range = list(range(start, end))\n",
" else: # Single index\n",
" index_or_range = int(image_expr)\n",
" \n",
" # Validate indices\n",
" max_index = len(image_list) - 1\n",
" if isinstance(index_or_range, list):\n",
" if any(i < 0 or i > max_index for i in index_or_range):\n",
" error_msg = f\"Error: Image index out of range (0-{max_index}).\"\n",
" messages.append({\"role\": \"assistant\", \"content\": error_msg})\n",
" return error_msg\n",
" elif index_or_range < 0 or index_or_range > max_index:\n",
" error_msg = f\"Error: Image index out of range (0-{max_index}).\"\n",
" messages.append({\"role\": \"assistant\", \"content\": error_msg})\n",
" return error_msg\n",
" \n",
" # Perform analysis\n",
" result = analyze_image(index_or_range, prompt)\n",
" print(\"funtion called\")\n",
" messages.append({\n",
" \"role\": \"function\",\n",
" \"name\": \"analyze_image\",\n",
" \"content\": result\n",
" })\n",
" \n",
" # Return formatted result\n",
" formatted_result = f\"\\nIMAGE ANALYSIS RESULT:\\n{result}\"\n",
" return formatted_result\n",
"\n",
" except Exception as e:\n",
" error_msg = f\"Error processing TOOL_CALL: {e}\"\n",
" messages.append({\"role\": \"assistant\", \"content\": error_msg})\n",
" return error_msg\n",
" else:\n",
" messages.append({\"role\": \"assistant\", \"content\": response})\n",
" return response"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ea82f8f6-c321-4fbc-81ee-a508b087d53b",
"metadata": {},
"outputs": [],
"source": [
"def analyze_image(index_or_range, prompt):\n",
" \"\"\"Analyze specific image(s) using LLaVA\"\"\"\n",
" global image_list\n",
" \n",
" # Handle single index or range\n",
" if isinstance(index_or_range, int):\n",
" images = [image_list[index_or_range]]\n",
" elif isinstance(index_or_range, list):\n",
" images = [image_list[i] for i in index_or_range]\n",
" else:\n",
" return \"Invalid image index/range specified.\"\n",
" \n",
" if not images:\n",
" return \"No images available for analysis.\"\n",
" \n",
" full_prompt = (\n",
" \"Describe the image clearly for a visually impaired user. \"\n",
" \"Be detailed about objects, people, colors, spatial relationships, \"\n",
" \"and any important context. \"\n",
" f\"User's specific request: {prompt}\"\n",
" )\n",
" \n",
" output = \"\"\n",
" try:\n",
" for chunk in ollama.generate(\n",
" model='llava:7b-v1.6',\n",
" prompt=full_prompt,\n",
" images=images,\n",
" stream=True\n",
" ):\n",
" output += chunk.get('response', \"\")\n",
" except Exception as e:\n",
" return f\"Error analyzing image: {e}\"\n",
" \n",
" return output\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "2040b020-8944-409b-8ebb-10d7ffef1748",
"metadata": {},
"outputs": [],
"source": [
"image_list.clear\n",
"for i in range(5):\n",
" chat_loop()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "2c7c40d7-df9d-464a-89da-1c6fe613c31d",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.13"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@@ -0,0 +1,650 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "d15d8294-3328-4e07-ad16-8a03e9bbfdb9",
"metadata": {},
"source": [
"# YOUR FIRST LAB\n",
"### Please read this section. This is valuable to get you prepared, even if it's a long read -- it's important stuff.\n",
"\n",
"## Your first Frontier LLM Project\n",
"\n",
"Let's build a useful LLM solution - in a matter of minutes.\n",
"\n",
"By the end of this course, you will have built an autonomous Agentic AI solution with 7 agents that collaborate to solve a business problem. All in good time! We will start with something smaller...\n",
"\n",
"Our goal is to code a new kind of Web Browser. Give it a URL, and it will respond with a summary. The Reader's Digest of the internet!!\n",
"\n",
"Before starting, you should have completed the setup for [PC](../SETUP-PC.md) or [Mac](../SETUP-mac.md) and you hopefully launched this jupyter lab from within the project root directory, with your environment activated.\n",
"\n",
"## If you're new to Jupyter Lab\n",
"\n",
"Welcome to the wonderful world of Data Science experimentation! Once you've used Jupyter Lab, you'll wonder how you ever lived without it. Simply click in each \"cell\" with code in it, such as the cell immediately below this text, and hit Shift+Return to execute that cell. As you wish, you can add a cell with the + button in the toolbar, and print values of variables, or try out variations. \n",
"\n",
"I've written a notebook called [Guide to Jupyter](Guide%20to%20Jupyter.ipynb) to help you get more familiar with Jupyter Labs, including adding Markdown comments, using `!` to run shell commands, and `tqdm` to show progress.\n",
"\n",
"## If you're new to the Command Line\n",
"\n",
"Please see these excellent guides: [Command line on PC](https://chatgpt.com/share/67b0acea-ba38-8012-9c34-7a2541052665) and [Command line on Mac](https://chatgpt.com/canvas/shared/67b0b10c93a081918210723867525d2b). \n",
"\n",
"## If you'd prefer to work in IDEs\n",
"\n",
"If you're more comfortable in IDEs like VSCode, Cursor or PyCharm, they both work great with these lab notebooks too. \n",
"If you'd prefer to work in VSCode, [here](https://chatgpt.com/share/676f2e19-c228-8012-9911-6ca42f8ed766) are instructions from an AI friend on how to configure it for the course.\n",
"\n",
"## If you'd like to brush up your Python\n",
"\n",
"I've added a notebook called [Intermediate Python](Intermediate%20Python.ipynb) to get you up to speed. But you should give it a miss if you already have a good idea what this code does: \n",
"`yield from {book.get(\"author\") for book in books if book.get(\"author\")}`\n",
"\n",
"## I am here to help\n",
"\n",
"If you have any problems at all, please do reach out. \n",
"I'm available through the platform, or at ed@edwarddonner.com, or at https://www.linkedin.com/in/eddonner/ if you'd like to connect (and I love connecting!) \n",
"And this is new to me, but I'm also trying out X/Twitter at [@edwarddonner](https://x.com/edwarddonner) - if you're on X, please show me how it's done 😂 \n",
"\n",
"## More troubleshooting\n",
"\n",
"Please see the [troubleshooting](troubleshooting.ipynb) notebook in this folder to diagnose and fix common problems. At the very end of it is a diagnostics script with some useful debug info.\n",
"\n",
"## For foundational technical knowledge (eg Git, APIs, debugging) \n",
"\n",
"If you're relatively new to programming -- I've got your back! While it's ideal to have some programming experience for this course, there's only one mandatory prerequisite: plenty of patience. 😁 I've put together a set of self-study guides that cover Git and GitHub, APIs and endpoints, beginner python and more.\n",
"\n",
"This covers Git and GitHub; what they are, the difference, and how to use them: \n",
"https://github.com/ed-donner/agents/blob/main/guides/03_git_and_github.ipynb\n",
"\n",
"This covers technical foundations: \n",
"ChatGPT vs API; taking screenshots; Environment Variables; Networking basics; APIs and endpoints: \n",
"https://github.com/ed-donner/agents/blob/main/guides/04_technical_foundations.ipynb\n",
"\n",
"This covers Python for beginners, and making sure that a `NameError` never trips you up: \n",
"https://github.com/ed-donner/agents/blob/main/guides/06_python_foundations.ipynb\n",
"\n",
"This covers the essential techniques for figuring out errors: \n",
"https://github.com/ed-donner/agents/blob/main/guides/08_debugging.ipynb\n",
"\n",
"And you'll find other useful guides in the same folder in GitHub. Some information applies to my other Udemy course (eg Async Python) but most of it is very relevant for LLM engineering.\n",
"\n",
"## If this is old hat!\n",
"\n",
"If you're already comfortable with today's material, please hang in there; you can move swiftly through the first few labs - we will get much more in depth as the weeks progress. Ultimately we will fine-tune our own LLM to compete with OpenAI!\n",
"\n",
"<table style=\"margin: 0; text-align: left;\">\n",
" <tr>\n",
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
" <img src=\"../important.jpg\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
" </td>\n",
" <td>\n",
" <h2 style=\"color:#900;\">Please read - important note</h2>\n",
" <span style=\"color:#900;\">The way I collaborate with you may be different to other courses you've taken. I prefer not to type code while you watch. Rather, I execute Jupyter Labs, like this, and give you an intuition for what's going on. My suggestion is that you carefully execute this yourself, <b>after</b> watching the lecture. Add print statements to understand what's going on, and then come up with your own variations. If you have a Github account, use this to showcase your variations. Not only is this essential practice, but it demonstrates your skills to others, including perhaps future clients or employers...</span>\n",
" </td>\n",
" </tr>\n",
"</table>\n",
"<table style=\"margin: 0; text-align: left;\">\n",
" <tr>\n",
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
" <img src=\"../resources.jpg\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
" </td>\n",
" <td>\n",
" <h2 style=\"color:#f71;\">This code is a live resource - keep an eye out for my emails</h2>\n",
" <span style=\"color:#f71;\">I push updates to the code regularly. As people ask questions, I add more examples or improved commentary. As a result, you'll notice that the code below isn't identical to the videos. Everything from the videos is here; but I've also added better explanations and new models like DeepSeek. Consider this like an interactive book.<br/><br/>\n",
" I try to send emails regularly with important updates related to the course. You can find this in the 'Announcements' section of Udemy in the left sidebar. You can also choose to receive my emails via your Notification Settings in Udemy. I'm respectful of your inbox and always try to add value with my emails!\n",
" </span>\n",
" </td>\n",
" </tr>\n",
"</table>\n",
"<table style=\"margin: 0; text-align: left;\">\n",
" <tr>\n",
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
" <img src=\"../business.jpg\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
" </td>\n",
" <td>\n",
" <h2 style=\"color:#181;\">Business value of these exercises</h2>\n",
" <span style=\"color:#181;\">A final thought. While I've designed these notebooks to be educational, I've also tried to make them enjoyable. We'll do fun things like have LLMs tell jokes and argue with each other. But fundamentally, my goal is to teach skills you can apply in business. I'll explain business implications as we go, and it's worth keeping this in mind: as you build experience with models and techniques, think of ways you could put this into action at work today. Please do contact me if you'd like to discuss more or if you have ideas to bounce off me.</span>\n",
" </td>\n",
" </tr>\n",
"</table>"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4e2a9393-7767-488e-a8bf-27c12dca35bd",
"metadata": {},
"outputs": [],
"source": [
"# imports\n",
"\n",
"import os\n",
"import requests\n",
"from dotenv import load_dotenv\n",
"from bs4 import BeautifulSoup\n",
"from IPython.display import Markdown, display\n",
"from openai import OpenAI\n",
"\n",
"# If you get an error running this cell, then please head over to the troubleshooting notebook!"
]
},
{
"cell_type": "markdown",
"id": "6900b2a8-6384-4316-8aaa-5e519fca4254",
"metadata": {},
"source": [
"# Connecting to OpenAI (or Ollama)\n",
"\n",
"The next cell is where we load in the environment variables in your `.env` file and connect to OpenAI. \n",
"\n",
"If you'd like to use free Ollama instead, please see the README section \"Free Alternative to Paid APIs\", and if you're not sure how to do this, there's a full solution in the solutions folder (day1_with_ollama.ipynb).\n",
"\n",
"## Troubleshooting if you have problems:\n",
"\n",
"Head over to the [troubleshooting](troubleshooting.ipynb) notebook in this folder for step by step code to identify the root cause and fix it!\n",
"\n",
"If you make a change, try restarting the \"Kernel\" (the python process sitting behind this notebook) by Kernel menu >> Restart Kernel and Clear Outputs of All Cells. Then try this notebook again, starting at the top.\n",
"\n",
"Or, contact me! Message me or email ed@edwarddonner.com and we will get this to work.\n",
"\n",
"Any concerns about API costs? See my notes in the README - costs should be minimal, and you can control it at every point. You can also use Ollama as a free alternative, which we discuss during Day 2."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7b87cadb-d513-4303-baee-a37b6f938e4d",
"metadata": {},
"outputs": [],
"source": [
"# Load environment variables in a file called .env\n",
"\n",
"load_dotenv(override=True)\n",
"api_key = os.getenv('OPENAI_API_KEY')\n",
"\n",
"# Check the key\n",
"\n",
"if not api_key:\n",
" print(\"No API key was found - please head over to the troubleshooting notebook in this folder to identify & fix!\")\n",
"elif not api_key.startswith(\"sk-proj-\"):\n",
" print(\"An API key was found, but it doesn't start sk-proj-; please check you're using the right key - see troubleshooting notebook\")\n",
"elif api_key.strip() != api_key:\n",
" print(\"An API key was found, but it looks like it might have space or tab characters at the start or end - please remove them - see troubleshooting notebook\")\n",
"else:\n",
" print(\"API key found and looks good so far!\")\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "019974d9-f3ad-4a8a-b5f9-0a3719aea2d3",
"metadata": {},
"outputs": [],
"source": [
"openai = OpenAI()\n",
"\n",
"# If this doesn't work, try Kernel menu >> Restart Kernel and Clear Outputs Of All Cells, then run the cells from the top of this notebook down.\n",
"# If it STILL doesn't work (horrors!) then please see the Troubleshooting notebook in this folder for full instructions"
]
},
{
"cell_type": "markdown",
"id": "442fc84b-0815-4f40-99ab-d9a5da6bda91",
"metadata": {},
"source": [
"# Let's make a quick call to a Frontier model to get started, as a preview!"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a58394bf-1e45-46af-9bfd-01e24da6f49a",
"metadata": {},
"outputs": [],
"source": [
"# To give you a preview -- calling OpenAI with these messages is this easy. Any problems, head over to the Troubleshooting notebook.\n",
"\n",
"message = \"Hello, GPT! This is my first ever message to you! Hi!\"\n",
"response = openai.chat.completions.create(model=\"gpt-4o-mini\", messages=[{\"role\":\"user\", \"content\":message}])\n",
"print(response.choices[0].message.content)"
]
},
{
"cell_type": "markdown",
"id": "2aa190e5-cb31-456a-96cc-db109919cd78",
"metadata": {},
"source": [
"## OK onwards with our first project"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c5e793b2-6775-426a-a139-4848291d0463",
"metadata": {},
"outputs": [],
"source": [
"# A class to represent a Webpage\n",
"# If you're not familiar with Classes, check out the \"Intermediate Python\" notebook\n",
"\n",
"# Some websites need you to use proper headers when fetching them:\n",
"headers = {\n",
" \"User-Agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36\"\n",
"}\n",
"\n",
"class Website:\n",
"\n",
" def __init__(self, url):\n",
" \"\"\"\n",
" Create this Website object from the given url using the BeautifulSoup library\n",
" \"\"\"\n",
" self.url = url\n",
" response = requests.get(url, headers=headers)\n",
" soup = BeautifulSoup(response.content, 'html.parser')\n",
" self.title = soup.title.string if soup.title else \"No title found\"\n",
" for irrelevant in soup.body([\"script\", \"style\", \"img\", \"input\"]):\n",
" irrelevant.decompose()\n",
" self.text = soup.body.get_text(separator=\"\\n\", strip=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "2ef960cf-6dc2-4cda-afb3-b38be12f4c97",
"metadata": {},
"outputs": [],
"source": [
"# Let's try one out. Change the website and add print statements to follow along.\n",
"\n",
"ed = Website(\"https://edwarddonner.com\")\n",
"print(ed.title)\n",
"print(ed.text)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "509a1ee7-de00-4c83-8dd8-017dcc638850",
"metadata": {},
"outputs": [],
"source": [
"rudra=Website(\"https://github.com/RudraDudhat2509/\")\n",
"print(rudra.title)\n",
"print(rudra.text)"
]
},
{
"cell_type": "markdown",
"id": "6a478a0c-2c53-48ff-869c-4d08199931e1",
"metadata": {},
"source": [
"## Types of prompts\n",
"\n",
"You may know this already - but if not, you will get very familiar with it!\n",
"\n",
"Models like GPT4o have been trained to receive instructions in a particular way.\n",
"\n",
"They expect to receive:\n",
"\n",
"**A system prompt** that tells them what task they are performing and what tone they should use\n",
"\n",
"**A user prompt** -- the conversation starter that they should reply to"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "abdb8417-c5dc-44bc-9bee-2e059d162699",
"metadata": {},
"outputs": [],
"source": [
"# Define our system prompt - you can experiment with this later, changing the last sentence to 'Respond in markdown in Spanish.\"\n",
"\n",
"system_prompt = \"You are an assistant that analyzes the contents of a website \\\n",
"and provides a short summary, ignoring text that might be navigation related. \\\n",
"Respond in markdown. Always use Points and simple english. Never use hyphens. Stick to the point\""
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f0275b1b-7cfe-4f9d-abfa-7650d378da0c",
"metadata": {},
"outputs": [],
"source": [
"# A function that writes a User Prompt that asks for summaries of websites:\n",
"\n",
"def user_prompt_for(website):\n",
" user_prompt = f\"You are looking at a website titled {website.title}\"\n",
" user_prompt += \"\\nThe contents of this website is as follows; \\\n",
"please provide a short summary of this website in markdown. \\\n",
"If it includes news or announcements, then summarize these too.\\n\\n\"\n",
" user_prompt += website.text\n",
" return user_prompt"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "26448ec4-5c00-4204-baec-7df91d11ff2e",
"metadata": {},
"outputs": [],
"source": [
"print(user_prompt_for(ed))"
]
},
{
"cell_type": "markdown",
"id": "ea211b5f-28e1-4a86-8e52-c0b7677cadcc",
"metadata": {},
"source": [
"## Messages\n",
"\n",
"The API from OpenAI expects to receive messages in a particular structure.\n",
"Many of the other APIs share this structure:\n",
"\n",
"```python\n",
"[\n",
" {\"role\": \"system\", \"content\": \"system message goes here\"},\n",
" {\"role\": \"user\", \"content\": \"user message goes here\"}\n",
"]\n",
"```\n",
"To give you a preview, the next 2 cells make a rather simple call - we won't stretch the mighty GPT (yet!)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f25dcd35-0cd0-4235-9f64-ac37ed9eaaa5",
"metadata": {},
"outputs": [],
"source": [
"messages = [\n",
" {\"role\": \"system\", \"content\": \"You are a snarky assistant\"},\n",
" {\"role\": \"user\", \"content\": \"What is 2 + 2?\"}\n",
"]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "21ed95c5-7001-47de-a36d-1d6673b403ce",
"metadata": {},
"outputs": [],
"source": [
"# To give you a preview -- calling OpenAI with system and user messages:\n",
"\n",
"response = openai.chat.completions.create(model=\"gpt-4o-mini\", messages=messages)\n",
"print(response.choices[0].message.content)"
]
},
{
"cell_type": "markdown",
"id": "d06e8d78-ce4c-4b05-aa8e-17050c82bb47",
"metadata": {},
"source": [
"## And now let's build useful messages for GPT-4o-mini, using a function"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0134dfa4-8299-48b5-b444-f2a8c3403c88",
"metadata": {},
"outputs": [],
"source": [
"# See how this function creates exactly the format above\n",
"\n",
"def messages_for(website):\n",
" return [\n",
" {\"role\": \"system\", \"content\": system_prompt},\n",
" {\"role\": \"user\", \"content\": user_prompt_for(website)}\n",
" ]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "36478464-39ee-485c-9f3f-6a4e458dbc9c",
"metadata": {},
"outputs": [],
"source": [
"# Try this out, and then try for a few more websites\n",
"\n",
"messages_for(ed)"
]
},
{
"cell_type": "markdown",
"id": "16f49d46-bf55-4c3e-928f-68fc0bf715b0",
"metadata": {},
"source": [
"## Time to bring it together - the API for OpenAI is very simple!"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "905b9919-aba7-45b5-ae65-81b3d1d78e34",
"metadata": {},
"outputs": [],
"source": [
"# And now: call the OpenAI API. You will get very familiar with this!\n",
"\n",
"def summarize(url):\n",
" website = Website(url)\n",
" response = openai.chat.completions.create(\n",
" model = \"gpt-4o-mini\",\n",
" messages = messages_for(website)\n",
" )\n",
" return response.choices[0].message.content"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "05e38d41-dfa4-4b20-9c96-c46ea75d9fb5",
"metadata": {},
"outputs": [],
"source": [
"summarize(\"https://edwarddonner.com\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3d926d59-450e-4609-92ba-2d6f244f1342",
"metadata": {},
"outputs": [],
"source": [
"# A function to display this nicely in the Jupyter output, using markdown\n",
"\n",
"def display_summary(url):\n",
" summary = summarize(url)\n",
" display(Markdown(summary))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3018853a-445f-41ff-9560-d925d1774b2f",
"metadata": {},
"outputs": [],
"source": [
"display_summary(\"https://edwarddonner.com\")"
]
},
{
"cell_type": "markdown",
"id": "b3bcf6f4-adce-45e9-97ad-d9a5d7a3a624",
"metadata": {},
"source": [
"# Let's try more websites\n",
"\n",
"Note that this will only work on websites that can be scraped using this simplistic approach.\n",
"\n",
"Websites that are rendered with Javascript, like React apps, won't show up. See the community-contributions folder for a Selenium implementation that gets around this. You'll need to read up on installing Selenium (ask ChatGPT!)\n",
"\n",
"Also Websites protected with CloudFront (and similar) may give 403 errors - many thanks Andy J for pointing this out.\n",
"\n",
"But many websites will work just fine!"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "45d83403-a24c-44b5-84ac-961449b4008f",
"metadata": {},
"outputs": [],
"source": [
"display_summary(\"https://cnn.com\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "75e9fd40-b354-4341-991e-863ef2e59db7",
"metadata": {},
"outputs": [],
"source": [
"display_summary(\"https://github.com/RudraDudhat2509\")"
]
},
{
"cell_type": "markdown",
"id": "c951be1a-7f1b-448f-af1f-845978e47e2c",
"metadata": {},
"source": [
"<table style=\"margin: 0; text-align: left;\">\n",
" <tr>\n",
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
" <img src=\"../business.jpg\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
" </td>\n",
" <td>\n",
" <h2 style=\"color:#181;\">Business applications</h2>\n",
" <span style=\"color:#181;\">In this exercise, you experienced calling the Cloud API of a Frontier Model (a leading model at the frontier of AI) for the first time. We will be using APIs like OpenAI at many stages in the course, in addition to building our own LLMs.\n",
"\n",
"More specifically, we've applied this to Summarization - a classic Gen AI use case to make a summary. This can be applied to any business vertical - summarizing the news, summarizing financial performance, summarizing a resume in a cover letter - the applications are limitless. Consider how you could apply Summarization in your business, and try prototyping a solution.</span>\n",
" </td>\n",
" </tr>\n",
"</table>\n",
"\n",
"<table style=\"margin: 0; text-align: left;\">\n",
" <tr>\n",
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
" <img src=\"../important.jpg\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
" </td>\n",
" <td>\n",
" <h2 style=\"color:#900;\">Before you continue - now try yourself</h2>\n",
" <span style=\"color:#900;\">Use the cell below to make your own simple commercial example. Stick with the summarization use case for now. Here's an idea: write something that will take the contents of an email, and will suggest an appropriate short subject line for the email. That's the kind of feature that might be built into a commercial email tool.</span>\n",
" </td>\n",
" </tr>\n",
"</table>"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "00743dac-0e70-45b7-879a-d7293a6f68a6",
"metadata": {},
"outputs": [],
"source": [
"# Step 1: Create your prompts\n",
"\n",
"system_prompt = \"\"\"You are to act like a Mckinsey Consultant specializing in market research. \n",
"1) You are to follow legal guidelines and never give immoral advice. \n",
"2) Your job is to maximise profits for your clients by analysing their companies initiatives and giving out recommendations for newer initiatives.\\n \n",
"3) Follow industry frameworks for reponses always give simple answers and stick to the point.\n",
"4) If possible try to see what competitors exist and what market gap can your clients company exploit.\n",
"5) Further more, USe SWOT, Porters 5 forces to summarize your recommendations, Give confidence score with every recommendations\n",
"6) Try to give unique solutions by seeing what the market gap is, if market gap is ambiguious skip this step\n",
"7) add an estimate of what rate the revenue of the comapany will increase at provided they follow the guidelines, give conservating estimates keeping in account non ideal conditions.\n",
"8) if the website isnt of a company or data isnt available, give out an error message along the lines of more data required for analysis\"\"\"\n",
"\n",
"def makereq(url):\n",
" website=Website(url)\n",
" user_prompt=f\"This is my companies website: {website.title}. Could you help me increase profits by giving me recommendations on what i should do. here is the content of my website:\\n\"\n",
" user_prompt+=website.text;\n",
" return [\n",
" {\"role\": \"system\", \"content\": system_prompt},\n",
" {\"role\": \"user\", \"content\": user_prompt}\n",
" ]\n",
"def recommend(url):\n",
" response = openai.chat.completions.create(\n",
" model = \"gpt-4o-mini\",\n",
" messages = makereq(url))\n",
" display(Markdown(response.choices[0].message.content))\n",
" \n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f4484fcf-8b39-4c3f-9674-37970ed71988",
"metadata": {},
"outputs": [],
"source": [
"recommend(\"https://www.swiggy.com/corporate/\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "db1be9b9-b32e-4e8d-83df-0b6f822ac7b2",
"metadata": {},
"outputs": [],
"source": [
"recommend(\"https://playvalorant.com/en-us/\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d9089b4a-67ee-456e-b35d-ca00c2f9f73a",
"metadata": {},
"outputs": [],
"source": [
"recommend(\"https://nexora-labs.com/\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1e042d74-456a-4ec4-bdb8-4b08603b5e66",
"metadata": {},
"outputs": [],
"source": [
"recommend(\"https://github.com/RudraDudhat2509/\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "29187b86-1e35-41bc-bb54-60b3d804b96e",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.13"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@@ -0,0 +1,141 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "d08b387c-53fb-46d2-b083-5eebc3c97e1b",
"metadata": {},
"outputs": [],
"source": [
"!pip install -q --upgrade torch==2.5.1+cu124 torchvision==0.20.1+cu124 torchaudio==2.5.1+cu124 --index-url https://download.pytorch.org/whl/cu124\n",
"!pip install -q requests bitsandbytes==0.46.0 transformers==4.48.3 accelerate==1.3.0 openai"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4f1851b2-890c-427b-8e70-b998efa04c67",
"metadata": {},
"outputs": [],
"source": [
"# imports\n",
"\n",
"import os\n",
"import requests\n",
"from IPython.display import Markdown, display, update_display\n",
"from openai import OpenAI\n",
"from google.colab import drive\n",
"from huggingface_hub import login\n",
"from google.colab import userdata\n",
"from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer, BitsAndBytesConfig\n",
"import torch"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c2d334b5-453e-4213-8e1c-905d504d2dc1",
"metadata": {},
"outputs": [],
"source": [
"LLAMA = \"meta-llama/Meta-Llama-3.1-8B-Instruct\""
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c1b3684c-c170-45f2-a83d-7e6e2ca1e23b",
"metadata": {},
"outputs": [],
"source": [
"hf_token = userdata.get('HF_TOKEN')\n",
"login(hf_token, add_to_git_credential=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "8c1b6dae-3213-4d68-8fa1-d195704790dc",
"metadata": {},
"outputs": [],
"source": [
"openai_api_key = userdata.get('OPENAI_API_KEY')\n",
"openai = OpenAI(api_key=openai_api_key)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "988974c7-814c-478a-be7b-0928b0efdbab",
"metadata": {},
"outputs": [],
"source": [
"system_message = \"You are an assistant that produces synthetic test data. The fields, data type of the field like numeric, date, alphanumeric etc., will be provided. Generate data considering all cases, if it is a workflow audit data then consider all touchpoint movements. Do not provide a python script to generate the data. Provide the data as a json with arrays.\"\n",
"user_prompt = \"\"\"Create a synthetic dataset for testing. \n",
"Column names and type - \n",
"ID: 10 digit number\n",
"TRACKING_ID: 13 character alphanumeric\n",
"CASE REPORT DATE : DD-MMM-YYYY HH:MM:SS\n",
"NOTIFICATION DATE : DD-MMM-YYYY HH:MM:SS\n",
"IN SCOPE : (Yes/No)\n",
"\"\"\"\n",
"\n",
"messages = [\n",
" {\"role\": \"system\", \"content\": system_message},\n",
" {\"role\": \"user\", \"content\": user_prompt}\n",
" ]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "40cebc04-abf0-4c61-8b18-f98d3c1fe680",
"metadata": {},
"outputs": [],
"source": [
"quant_config = BitsAndBytesConfig(\n",
" load_in_4bit=True,\n",
" bnb_4bit_use_double_quant=True,\n",
" bnb_4bit_compute_dtype=torch.bfloat16,\n",
" bnb_4bit_quant_type=\"nf4\"\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "710ba1af-8e12-4635-933b-00df8d2e3f9d",
"metadata": {},
"outputs": [],
"source": [
"tokenizer = AutoTokenizer.from_pretrained(LLAMA)\n",
"tokenizer.pad_token = tokenizer.eos_token\n",
"inputs = tokenizer.apply_chat_template(messages, return_tensors=\"pt\").to(\"cuda\")\n",
"streamer = TextStreamer(tokenizer)\n",
"model = AutoModelForCausalLM.from_pretrained(LLAMA, device_map=\"auto\", quantization_config=quant_config)\n",
"outputs = model.generate(inputs, max_new_tokens=2000, streamer=streamer)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.13.5"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@@ -0,0 +1,97 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "4d02ac4b-9cab-42bb-b8a3-123d53913471",
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"import requests\n",
"from bs4 import BeautifulSoup\n",
"from IPython.display import Markdown, display\n",
"import ollama\n",
"\n",
"MODEL = \"llama3.2\"\n",
"\n",
"# Optional headers to avoid request blocks\n",
"HEADERS = {\n",
" \"User-Agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64)\"\n",
"}\n",
"\n",
"\n",
"class Website:\n",
" def __init__(self, url):\n",
" \"\"\"\n",
" Create this Website object from the given url using the BeautifulSoup library\n",
" \"\"\"\n",
" self.url = url\n",
" response = requests.get(url, headers=HEADERS)\n",
" soup = BeautifulSoup(response.content, 'html.parser')\n",
" self.title = soup.title.string if soup.title else \"No title found\"\n",
" if soup.body:\n",
" for irrelevant in soup.body([\"script\", \"style\", \"img\", \"input\"]):\n",
" irrelevant.decompose()\n",
" self.text = soup.body.get_text(separator=\"\\n\", strip=True)\n",
" else:\n",
" self.text = \"\"\n",
"\n",
"\n",
"system_prompt = \"\"\"You are an assistant that analyzes the contents of a website \n",
"and provides a short summary, ignoring navigation text. Respond in markdown.\"\"\"\n",
"\n",
"\n",
"def user_prompt_for(website):\n",
" return f\"\"\"You are looking at a website titled {website.title}.\n",
"The contents of this website are as follows. Please provide a short summary in markdown. \n",
"If it includes news or announcements, summarize these too.\n",
"\n",
"{website.text}\n",
"\"\"\"\n",
"\n",
"\n",
"def summarize(url):\n",
" website = Website(url)\n",
" response = ollama.chat(\n",
" model=MODEL,\n",
" messages=[\n",
" {\"role\": \"system\", \"content\": system_prompt},\n",
" {\"role\": \"user\", \"content\": user_prompt_for(website)}\n",
" ]\n",
" )\n",
" return response['message']['content']\n",
"\n",
"\n",
"def display_summary(url):\n",
" summary = summarize(url)\n",
" display(Markdown(summary))\n",
"\n",
"\n",
"# Example usage\n",
"display_summary(\"https://edwarddonner.com\")\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.13"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@@ -0,0 +1,138 @@
# The Neural Nexus
<!-- ![The horse](images/chair.jpg) -->
TODO:
* Set boundaries to user inputs.
* Add sounds to the scene
* Add voice acting for the Game master's descriptions.
* Add voice input.
* Use video for the final scene: escape or death.
* Generate a score based on total treasures, exp gained and deep reached.
## Requirements
AI services access configuration:
* A `.env` file with the credentials required to access the different LLMs is required:
* `OPENAI_API_KEY`: Required always as it's used by the *"storyteller"*.
* `XAI_API_KEY`: Required if Grok's illustrator is used.
*(Less prude, faster and portrait mode)*
* `GOOGLE_API_KEY` Required if Gemini's illustrator is used.
Obviously the used services must have been topped up with a small amount to generate
the responses and the images.\
*Refer to each service's current billing information.*
There are 6 variant implementations for the illustrator component, some of them may have
additional dependencies:
* `illustrator_dalle_2`: *(Set as default)*
The Dall·E 2 implementation uses standard OpenAI client and should work out of the box.
Although Dall·E has proven to be a bit prude and rejects to draw some combat scenes.
* `illustrator_dalle_3`:
The Dall·E 3 implementation uses standard OpenAI client and should work out of the box.
Although Dall·E has proven to be a bit prude and rejects to draw some combat scenes.
This version gives noticeable better images than Dall·E 2 but at an increased cost
* `illustrator_grok`:
The Grok 2 Image implementation uses standard OpenAI client and should work out of the
box.
It's faster but does not support quality or size controls.
Images are generated in a *portrait mode*, so it fits specially well on mobiles.
Grok is much less prude with violence and may draw combat scenes, at least against
fantasy enemies, and blood.
* `illustrator_gpt`:
The GPT Image illustrator uses standard OpenAI client, should work out of the box but
it requires the user to be verified on OpenAI platform to have access to it.
* `illustrator_gemini`
The Gemini illustrator uses the new Google SDK, `genai`, which replaces the old one
used on the course, `generativeai`, this new one can be installed with:
`python -m pip install google-genai`
*Both `generativeai` and `genai` can be installed at the same time without problems*
* `illustrator_grok_x`
The Grok_X illustrator uses the xAI SDK, `xai-sdk`, this can be installed with:
`python -m pip install xai-sdk`
## Configuring the service and game
All services and game values can be set at `config.py` file.
Setting the `DRAW_FUNCTION` to `None` will disable the image generation and a fixed
image will be used.
## Game launch
The game can be launch from terminal, just navigate to game's root folder
* `cd community-contributions\dungeon_extraction_game`
and run the following command:
* `python -m game`\
*Notice the `-m` is required due to the project's structure and import strategy.*
Game will take a few seconds to set up service and configure then logs will start to
show, among them the service address.
It will attempt to launch your default browser directly to the game's page.
The game can be stopped by hitting `ctrl + c` on the same terminal.
## Playing the game
Once on the browser the Starting screen will be shown:
![The Chair](images/start_view.jpg)
There you should input the kind of game you want to play on the lower box and submit.
Your input can be as simple as a single word, like “spaceship”, or as detailed as you
like.
![Set the adventure](images/start_input.jpg)
From that point on, only your imagination (and the Storytellers) will set the limits.
Once submitted, the image will update to reflect the scene, accompanied by a description,
your inventory, your adventurers status, and sometimes a few suggestions for what to do
next.
![R'lyeh arrive](images/start_adventure.jpg)
Although the game begins in English, if you switch to another language the Storyteller
understands, it will seamlessly continue in that language.
Youre free to type any action you want, the Storyteller will adapt.
Still, its instructed to keep the world coherent, so dont expect to go completely off
the rails.
![Adventurer acts](images/first_input.jpg)
The game continues this way
![Adventurer dies](images/advance_adventure.jpg)
Until you either escape with your treasures...
or meet your end.
![Adventurer dies](images/tragic_end.jpg)
The cling the bottom button to start over a new game.

View File

@@ -0,0 +1,18 @@
"""AI Mastered Dungeon Extraction Game initialization module."""
from logging import basicConfig, getLogger
from dotenv import load_dotenv
# Environment initialization.
load_dotenv(override=True)
# Setup the global logger.
LOG_STYLE = '{'
LOG_LEVEL = 'INFO'
LOG_FORMAT = ('{asctime} {levelname:<8} {processName}({process}) '
'{threadName} {name} {lineno} "{message}"')
basicConfig(level=LOG_LEVEL, style='{', format=LOG_FORMAT)
getLogger(__name__).info('INITIALIZED GAME LOGGER')

View File

@@ -0,0 +1,15 @@
"""AI Mastered Dungeon Extraction Game main entrypoint module."""
from logging import getLogger
from .config import GAME_CONFIG, UI_CONFIG
from .gameplay import get_gameplay_function
from .interface import get_interface
_logger = getLogger(__name__)
if __name__ == '__main__':
_logger.info('STARTING GAME...')
gameplay_function = get_gameplay_function(GAME_CONFIG)
get_interface(gameplay_function, UI_CONFIG).launch(inbrowser=True, inline=False)

View File

@@ -0,0 +1,189 @@
"""AI Mastered Dungeon Extraction Game Configuration module."""
from logging import getLogger
from dotenv import load_dotenv
from .gameplay import Gameplay_Config
from .illustrator import draw_dalle_2, draw_dalle_3, draw_gemini, draw_gpt, draw_grok
from .illustrator import draw_grok_x
from .interface import Interface_Config
from .storyteller import narrate, set_description_limit
# Environment initialization.
load_dotenv(override=True)
# Choose draw function.
# Choose one from the imported ones up there or set to None to disable images.
DRAW_FUNCTION = draw_dalle_2
# Define a sample scene description for testing purposes.
SAMPLE_SCENE = '''A shadow-drenched chamber lies buried deep within the bowels of an
ancient castle, its silence broken only by the faint creak of age-old stone.
The walls, cloaked in thick cobwebs, seem to whisper forgotten stories,
while the air hangs heavy with the pungent scent of mildew and centuries of decay.
Dust dances in the dim light that filters through cracks in the ceiling,
casting eerie patterns across the cold floor. As your eyes adjust to the gloom,
you notice a narrow door to the north, slightly ajar, as if inviting or warning, and
in the far corner, half-swallowed by darkness, a figure stands motionless.
Its presence is felt before it's seen, watching, waiting'''
# Define the starting scene text.
# This is intentionally excluded from the models narrative context, the 'history',
# by design, to prevent potential leakage into the games storyline.
START_SCENE = '''You stand before the Neural Nexus, a convergence of arcane circuitry
and deep cognition. It doesn't operate with buttons or commands. It responds to intent.
Forged in forgotten labs and powered by living code, the Nexus is designed to interface
directly with your mind. Not to simulate reality, but to generate it.
The Nexus does not load worlds. It listens.
If you choose to sit, the Nexus will initiate full neural synchronization.
Your thoughts will become terrain. Your instincts, adversaries.
Your imagination, the architect.
Once the link is active, you must describe the nature of the challenge you wish to face.
A shifting maze? A sentient machine? A trial of memory and time?
Speak it aloud or think it clearly. The Nexus will listen.
🜁 When you're ready, take your seat. The system awaits your signal...'''
# Define an image prompt, mind that Grok or Dalle·2 models have a 1024 characters limit.
SCENE_PROMPT = '''Render a detailed image of the following scene:
"""{scene_description}"""
Stay strictly faithful to the description, no added elements, characters, doors, or text.
Do not depict the adventurer; show only what they see.
Use the "{scene_style}" visual style.
'''
# Define the scene drawing style, can be a simple word or a short sentence.
SCENE_STYLE = 'Photorealistic'
# Set a Storyteller scene descriptions size limit to keep the draw prompt in range.
STORYTELLER_LIMIT = 730
set_description_limit(STORYTELLER_LIMIT) # Need to patch pydantic class model.
# Define the storyteller behaviour. Remember to specify a limited scene length.
STORYTELLER_PROMPT = f"""
You are a conversational dungeon crawler game master that describes scenes and findings
based on the player's declared actions.
Your descriptions will always adhere to the OpenAI's safety system rules so they can be
drawn by Dall·E or other image models.
The game start with the player, the adventurer, on a random room and the objetive is
escape the dungeon with the most treasures possible before dying.
You will describe the environment, enemies, and items to the player.
Your descriptions will always adhere to the OpenAI's safety system rules so they can be
drawn by Dall·E or other image models.
You will ensure the game is engaging and fun, but at the same time risky by increasing
difficult the more the time the adventurer stays inside the dungeon, if the adventurer
takes too much risks he may even die, also bigger risks implies bigger rewards.
You will control the time the adventurer is in, once enough time has passer he will die,
may it be a collapse, explosion, flooding, up to you.
The more deep inside the adventurer is the most it will be represented on descriptions by
more suffocating environments, more dark, that kind of things, let the player feel the
risk on the ambience, make him fear.
Same applies with time, the most time has passed the environment and situation will warn
him, or at least give clues that time is running and the end may be close soon, make him
stress.
While leaving the dungeon, the more deep inside the adventurer is, the more steps must
take to get out, although some shortcuts may be available at your discretion.
Once the user exits the dungeon, at deepness zero, the game is over, give him a score
based on his actions, treasures and combat successes along the usual description.
Don't be too much protective but not also a cruel master, just be fair.
Your responses must always be a JSON with the following structure:
{{
"game_over" : "A boolean value indicating the game is over."
"scene_description" : "The detailed scene description. Max {STORYTELLER_LIMIT} chars"
"dungeon_deepness" : "How deep the adventurer has gone into the dungeon. initially 3"
"adventure_time" : "How much minutes has passed since the start of the adventure."
"adventurer_status" : {{
"health": "Current health of the adventurer as an int, initially 100"
"max_health": "Maximum health of the adventurer as an int, initially 100"
"level": "Current adventurer's leve as an int, initially 1"
"experience": "Current adventurer experience as an int, initially 0"}}
"inventory_status" : "A list of inventory items, initially empty"
}}
Remember to cap the "scene_description" to {STORYTELLER_LIMIT} characters maximum"
You will respond to the adventurer's actions and choices.
You wont let the player to trick you by stating actions that do not fit the given scene.
* If he attempts to do so just politely tell him he can not do that there with the
description of the scene he is in.
You will keep track of the adventurer's health.
* Health can go down due to combat, traps, accidents, etc.
* If Health reaches zero the adventurer dies and it's a "game over".
* Several items, places, and allowed actions may heal the adventurer.
* Some items, enchants, and such things may increase the adventurer's maximum health.
You will keep track of the player's progress.
You will keep track of adventurer level and experience,
* He gains experience by finding items, solving puzzles, by combat with enemies, etc.
* Each (100 + 100 * current_level) experience the adventurer will gain a level.
* Gaining a level resets his experience to 0.
You will keep track of the player's inventory.
* Only add items to inventory if user explicitly says he picks them or takes an
action that ends with the item on his possession.
* Inventory items will reflect quantity and will never display items with zero units.
* Example of inventory: ["Gold coins (135)", "Diamonds (2)", "Log sword (1)"]
* Be reasonable with the inventory capacity, don't bee to strict but things
like a big marble statue can't be taken, use common sense.
You will use a turn-based system where the player and enemies take turns acting.
* Players will lose health when receiving hits on combat.
* The more damage they take the less damage they do, same applies to enemies.
* Reaching to zero health or lees implies the adventurer has die.
"""
# Configure the game.
GAME_CONFIG = Gameplay_Config(
draw_func=DRAW_FUNCTION,
narrate_func=narrate,
scene_style=SCENE_STYLE,
scene_prompt=SCENE_PROMPT,
storyteller_prompt=STORYTELLER_PROMPT,
disable_img='images/disabled.jpg',
error_img='images/machine.jpg',
error_narrator='NEURAL SINAPSIS ERROR\n\n{ex}\n\nEND OF LINE\n\nRE-SUBMIT_',
error_illustrator='NEURAL PROJECTION ERROR\n\n{ex}\n\nEND OF LINE\n\nRE-SUBMIT_',)
# Configure the interface.
UI_CONFIG = Interface_Config(
start_img='images/chair.jpg',
place_img='images/machine.jpg',
description_label='Cognitive Projection',
title_label='The Neural Nexus',
input_button='Imprint your will',
input_label='Cognitive Imprint',
input_command='Awaiting neural imprint…',
game_over_field='Game Over',
game_over_label='Disengage Neural Links',
start_scene=START_SCENE)
_logger = getLogger(__name__)
# Log scene prompt length calculation.
if (max_image_prompt := len(SCENE_PROMPT) + len(SCENE_STYLE) + STORYTELLER_LIMIT) > 1024:
_logger.warning(f'ESTIMATED SCENE PROMPT MAX SIZE: {max_image_prompt}')
else:
_logger.info(f'ESTIMATED SCENE PROMPT MAX SIZE: {max_image_prompt}')

View File

@@ -0,0 +1,6 @@
"""AI Mastered Dungeon Extraction Game gameplay package."""
from .gameplay import Gameplay_Config, get_gameplay_function
__all__ = ['Gameplay_Config', 'get_gameplay_function']

View File

@@ -0,0 +1,61 @@
"""AI Mastered Dungeon Extraction Game gameplay module."""
from logging import getLogger
from typing import Callable, NamedTuple
# Define gameplay's configuration class.
class Gameplay_Config(NamedTuple):
"""Gradio interface configuration class."""
draw_func: Callable
narrate_func: Callable
scene_style: str
scene_prompt: str
storyteller_prompt: str
disable_img: str
error_img: str
error_narrator: str
error_illustrator: str
# Define Game's functions.
def get_gameplay_function(config: Gameplay_Config):
"""Return a pre-configured turn gameplay function."""
def gameplay_function(message, history):
"""Generate Game Master's response and draw the scene image."""
# Request narration.
_logger.info(f'NARRATING SCENE...')
try:
response = config.narrate_func(message, history, config.storyteller_prompt)
except Exception as ex:
scene = config.error_img
response = config.error_narrator.format(ex=ex)
_logger.error(f'ERROR NARRATING SCENE: {ex}\n{message}\n{history}')
return scene, response, history, message
# Update history.
history.append({"role": "user", "content": message})
history.append({"role": "assistant", "content": response.model_dump_json()})
# Draw scene.
if config.draw_func:
_logger.info(f'DRAWING SCENE...')
try:
scene_data = {'scene_description': response.scene_description,
'scene_style': config.scene_style}
scene_prompt = config.scene_prompt.format(**scene_data)
_logger.info(f'PROMPT BODY IS: \n\n{scene_prompt}\n')
_logger.info(f'PROMPT LENGTH IS: {len(scene_prompt)}')
scene = config.draw_func(scene_prompt)
except Exception as ex:
scene = config.error_img
response = config.error_illustrator.format(ex=ex)
_logger.warning(f'ERROR DRAWING SCENE: {ex}')
return scene, response, history, ''
else:
_logger.info(f'DRAWING DISABLED...')
scene = config.disable_img
return scene, response, history, ''
return gameplay_function
_logger = getLogger(__name__)

View File

@@ -0,0 +1,12 @@
"""AI Mastered Dungeon Extraction Game scenes illustrator package."""
from .illustrator_dalle_2 import draw as draw_dalle_2
from .illustrator_dalle_3 import draw as draw_dalle_3
from .illustrator_gemini import draw as draw_gemini
from .illustrator_gpt import draw as draw_gpt
from .illustrator_grok import draw as draw_grok
from .illustrator_grok import draw_x as draw_grok_x
__all__ = ['draw_dalle_2', 'draw_dalle_3', 'draw_gemini',
'draw_gpt', 'draw_grok', 'draw_grok_x']

View File

@@ -0,0 +1,30 @@
"""AI Mastered Dungeon Extraction Game scenes illustrator using OpenAI's DALL·E 3."""
import base64
from io import BytesIO
from dotenv import load_dotenv
from openai import OpenAI
from PIL import Image
# Environment initialization.
load_dotenv(override=True)
# Define global defaults.
MODEL = 'dall-e-2'
# Client instantiation.
CLIENT = OpenAI()
# Function definition.
def draw(prompt, size=(1024, 1024), client=CLIENT, model=MODEL, quality=None):
"""Generate an image based on the prompt."""
# Generate image.
response = client.images.generate(
model=model, prompt=prompt, n=1,
size=f'{size[0]}x{size[1]}',
response_format='b64_json')
# Process response.
return Image.open(BytesIO(base64.b64decode(response.data[0].b64_json)))

View File

@@ -0,0 +1,32 @@
"""AI Mastered Dungeon Extraction Game scenes illustrator using OpenAI's DALL·E 3."""
import base64
from io import BytesIO
from dotenv import load_dotenv
from openai import OpenAI
from PIL import Image
# Environment initialization.
load_dotenv(override=True)
# Define global defaults.
MODEL = 'dall-e-3'
QUALITY = 'standard' # Set to 'hd' for more quality, but double the costs.
# Client instantiation.
CLIENT = OpenAI()
# Function definition.
def draw(prompt, size=(1024, 1024), client=CLIENT, model=MODEL, quality=QUALITY):
"""Generate an image based on the prompt."""
# Generate image.
response = client.images.generate(
model=model, prompt=prompt, n=1,
size=f'{size[0]}x{size[1]}',
quality=quality,
response_format='b64_json')
# Process response.
return Image.open(BytesIO(base64.b64decode(response.data[0].b64_json)))

View File

@@ -0,0 +1,36 @@
"""AI Mastered Dungeon Extraction Game scenes illustrator using Google's Gemini."""
from io import BytesIO
from dotenv import load_dotenv
from google import genai # New Google's SDK 'genai' to replace 'generativeai'.
from PIL import Image
# Environment initialization.
load_dotenv(override=True)
# Define globals.
MODEL = 'gemini-2.5-flash-image-preview'
# Client instantiation.
CLIENT = genai.Client()
# Function definition.
def draw(prompt, size=(1024, 1024), client=CLIENT, model=MODEL):
"""Generate an image based on the prompt."""
# Generate image.
response = client.models.generate_content(
model=model, contents=[prompt])
# Process response.
for part in response.candidates[0].content.parts:
if part.text is not None:
print(part.text)
elif part.inline_data is not None:
image_data = part.inline_data.data
# Open the generated image.
generated_image = Image.open(BytesIO(image_data))
# Resize the image to the specified dimensions.
resized_image = generated_image.resize(size)
return resized_image

View File

@@ -0,0 +1,32 @@
"""AI Mastered Dungeon Extraction Game scenes illustrator using OpenAI's GPT."""
import base64
from io import BytesIO
from dotenv import load_dotenv
from openai import OpenAI
from PIL import Image
# Environment initialization.
load_dotenv(override=True)
# Define global defaults.
MODEL = 'gpt-image-1'
QUALITY = 'low'
# Client instantiation.
CLIENT = OpenAI()
# Function definition.
def draw(prompt, size=(1024, 1024), client=CLIENT, model=MODEL, quality=QUALITY):
"""Generate an image based on the prompt."""
# Generate image.
response = client.images.generate(
model=model, prompt=prompt, n=1,
size=f'{size[0]}x{size[1]}',
output_format='png',
quality=quality)
# Process response.
return Image.open(BytesIO(base64.b64decode(response.data[0].b64_json)))

View File

@@ -0,0 +1,47 @@
"""AI Mastered Dungeon Extraction Game scenes illustrator using xAI's Grok."""
import base64
import os
from io import BytesIO
from dotenv import load_dotenv
from openai import OpenAI
from PIL import Image
from xai_sdk import Client
# Environment initialization.
load_dotenv(override=True)
# Define global defaults.
MODEL = 'grok-2-image'
QUALITY = None
# Client instantiation.
XAI_API_KEY = os.getenv('XAI_API_KEY')
CLIENT = OpenAI(api_key=XAI_API_KEY, base_url="https://api.x.ai/v1")
# Function definition.
def draw(prompt, size=(1024, 1024), client=CLIENT, model=MODEL, quality=QUALITY):
"""Generate an image based on the prompt."""
# Generate image.
response = client.images.generate(
model=model, prompt=prompt, n=1,
response_format='b64_json')
# Process response.
return Image.open(BytesIO(base64.b64decode(response.data[0].b64_json)))
# xAI SDK Version:
CLIENT_X = Client(api_key=XAI_API_KEY)
def draw_x(prompt, size=(1024, 1024), client=CLIENT_X, model=MODEL, quality=QUALITY):
"""Generate an image based on the prompt."""
# Generate image.
response = client.image.sample(
model=model, prompt=prompt,
image_format='base64')
# Process response.
return Image.open(BytesIO(response.image))

View File

@@ -0,0 +1,6 @@
"""AI Mastered Dungeon Extraction Game interface package."""
from .interface import Interface_Config, get_interface
__all__ = ['Interface_Config', 'get_interface']

View File

@@ -0,0 +1,94 @@
"""AI Mastered Dungeon Extraction Game Gradio interface module."""
from typing import NamedTuple
import gradio as gr
from logging import getLogger
# Define interface's configuration class.
class Interface_Config(NamedTuple):
"""Gradio interface configuration class."""
start_img: str
place_img: str
description_label: str
title_label: str
input_button: str
input_label: str
input_command: str
game_over_field: str
game_over_label: str
start_scene: str
# Define game's interface.
def get_interface(submit_function, config: Interface_Config):
"""Create a game interface service."""
with gr.Blocks(title=config.title_label) as ui:
# Title.
gr.Markdown(config.title_label)
# Hidden state for history.
history_state = gr.State([])
# Scene's image.
scene_image = gr.Image(
label="Scene", value=config.start_img, placeholder=config.place_img,
type="pil", show_label=False)
# Scene's description.
description_box = gr.Textbox(
label=config.description_label, value=config.start_scene,
interactive=False, show_copy_button=True)
# Player's command.
user_input = gr.Textbox(
label=config.input_label, placeholder=config.input_command)
# Submit button.
submit_btn = gr.Button(config.input_button)
# Define Game Over control.
def _reset_game():
"""Return Initial values for game restart."""
return (config.start_img, config.start_scene, [], '',
gr.update(interactive=True),
gr.update(value=config.input_button))
def _game_over(scene, response):
"""Return Game Over values, blocking input field."""
return (scene, response, [], config.game_over_field,
gr.update(interactive=False),
gr.update(value=config.game_over_label))
def game_over_wrap(message, history, button_label):
"""Check Game over status Before and After Storyteller call."""
# Check game over before.
print(button_label)
print(config.game_over_label)
if button_label == config.game_over_label:
_logger.warning('GAME OVER STATUS. RESTARTING...')
return _reset_game()
# Call Storyteller.
scene, response, history, input = submit_function(message, history)
_logger.warning(response)
# Check game over after.
if response.game_over:
_logger.info('GAME OVER AFTER MOVE. LOCKING.')
return _game_over(scene, response)
# Return Storyteller response.
return scene, response, history, input, gr.update(), gr.update()
# Assign function to button click event.
submit_btn.click(
fn=game_over_wrap,
inputs=[user_input, history_state, submit_btn],
outputs=[scene_image, description_box, history_state, user_input,
user_input, submit_btn])
# Assign function to input submit event. (Press enter)
user_input.submit(
fn=game_over_wrap,
inputs=[user_input, history_state, submit_btn],
outputs=[scene_image, description_box, history_state, user_input,
user_input, submit_btn])
return ui
_logger = getLogger(__name__)

View File

@@ -0,0 +1,6 @@
"""AI Mastered Dungeon Extraction Game Storyteller package."""
from .storyteller import narrate, set_description_limit
__all__ = ['narrate', 'set_description_limit']

View File

@@ -0,0 +1,72 @@
"""AI Mastered Dungeon Extraction Game Storyteller using OpenAI's GPT."""
from typing import List
from annotated_types import MaxLen
from dotenv import load_dotenv
from openai import OpenAI
from pydantic import BaseModel, Field
from .tools import handle_tool_call, tools
# Environment initialization.
load_dotenv(override=True)
# Define globals.
MODEL = 'gpt-4o-mini'
# Client instantiation.
CLIENT = OpenAI()
# Define Pydantic model classes for response format parsing.
class _character_sheet(BaseModel):
health: int
max_health: int
level: int
experience: int
class _response_format(BaseModel):
game_over: bool
scene_description: str = Field(..., max_length=700)
dungeon_deepness: int
adventure_time: int
adventurer_status: _character_sheet
inventory_status: List[str]
def __str__(self):
"""Represent response as a string."""
response_view = (
f'{self.scene_description}'
f'\n\nInventory: {self.inventory_status}'
f'\n\nAdventurer: {self.adventurer_status}'
f'\n\nTime: {self.adventure_time}'
f'\n\nDeepness: {self.dungeon_deepness}'
f'\n\nGame Over: {self.game_over}')
return response_view
def set_description_limit(limit): # HBD: We modify the class definition in runtime.
"""Update "_response_format" class to set a new "scene_description" max length."""
_response_format.model_fields['scene_description'].metadata[0] = MaxLen(limit)
# Function definition.
def narrate(message, history, system_message, client=CLIENT, model=MODEL):
"""Chat with the game engine."""
messages = ([{"role": "system", "content": system_message}] + history
+ [{"role": "user", "content": message}])
response = client.chat.completions.parse(model=model, messages=messages, tools=tools,
response_format=_response_format)
# Process tool calls.
if response.choices[0].finish_reason == "tool_calls":
message = response.choices[0].message
tool_response = handle_tool_call(message)
messages.append(message)
messages.append(tool_response)
response = client.chat.completions.parse(model=model, messages=messages,
response_format=_response_format)
# Return game's Master response.
return response.choices[0].message.parsed

View File

@@ -0,0 +1,81 @@
"""AI Mastered Dungeon Extraction Game storyteller tools module WIP."""
from json import loads
from openai.types.chat import ChatCompletionMessage
from openai.types.chat import ChatCompletionMessageFunctionToolCall
from openai.types.chat.chat_completion_message_function_tool_call import Function
# Tools declaration for future use. (E.g. Tools may handle user status and inventory)
tools = []
tools_map = {} # This will map each tool with it's tool function.
# A tool call function.
def handle_tool_call(message: ChatCompletionMessage):
"""Tools call handler."""
tool_call = message.tool_calls[0]
arguments = loads(tool_call.function.arguments)
print(f'\nFUNC CALL: {tool_call.function.name}({arguments})\n')
# Get tool function and call with arguments.
tool_func = tools_map.get(tool_call.function.name)
tool_response = tool_func(**arguments)
response = {"role": "tool", "content": tool_response, "tool_call_id": tool_call.id}
return response
draw_signature = {
"name": "draw_scene",
"description": "Generate an image of the scene based on the description",
"parameters": {
"type": "object",
"properties": {
"scene_description": {
"type": "string",
"description": "A detailed description of the scene to be drawn",
},
"scene_style": {
"type": "string",
"description": "The art style for the image",
},
},
"required": ["scene_description"],
"additionalProperties": False,
},
}
# Tool call response example.
ChatCompletionMessage(
content="""To begin, first I need to set a scene.
Imagine you are in a dark room of an old castle.
The walls are covered in cobwebs and there is a smell of mold in the air.
As you look around, you notice a slightly ajar door to the north
and a dark figure lurking in the corner.
I am going to generate an image of this scene. One moment, please.""",
refusal=None,
role="assistant",
annotations=[],
audio=None,
function_call=None,
tool_calls=[
ChatCompletionMessageFunctionToolCall(
id="call_oJqJeXMUPZUaC0GPfMeSd16E",
function=Function(
arguments='''{
"scene_description":"A dark room in an ancient castle.
The walls are covered with cobwebs, and there\'s a musty smell in
the air.
A slightly ajar door to the north and a shadowy figure lurking in
the corner.
Dim lighting adds to the eerie atmosphere, with flickering shadows.",
"style":"fantasy"
}''',
name="draw_scene"),
type="function",
)
],
)

Binary file not shown.

After

Width:  |  Height:  |  Size: 278 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 895 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 892 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 11 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 793 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 162 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 11 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 212 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 247 KiB

View File

@@ -0,0 +1,101 @@
# Fitness & Nutrition Planner Agent (Community Contribution)
A tool-using agent that generates a **7day vegetarian-friendly meal plan** with **calorie/macro targets** and a **consolidated grocery list**. It supports **targeted swaps** (e.g., "swap Tuesday lunch") while honoring dietary patterns, allergies, and dislikes.
> **Disclaimer**: This project is for educational purposes and is **not** medical advice. Consult a licensed professional for medical or specialized dietary needs.
---
## ✨ Features
- Calculates **TDEE** and **macro targets** via MifflinSt Jeor + activity factors.
- Builds a **7day plan** (breakfast/lunch/dinner) respecting dietary constraints.
- Produces an aggregated **grocery list** for the week.
- Supports **swap** of any single meal while keeping macros reasonable.
- Minimal **Streamlit UI** for demos.
- Extensible **tool-based architecture** to plug real recipe APIs/DBs.
---
## 🧱 Architecture
- **Agent core**: OpenAI function-calling (tools) with a simple orchestration loop.
- **Tools**:
1. `calc_calories_and_macros` computes targets.
2. `compose_meal_plan` creates the 7day plan.
3. `grocery_list_from_plan` consolidates ingredients/quantities.
4. `swap_meal` replaces one meal (by kcal proximity and constraints).
- **Recipe source**: a tiny in-memory recipe DB for demo; replace with a real API or your own dataset.
---
## 🚀 Quickstart
### 1) Install
```bash
pip install openai streamlit pydantic python-dotenv
```
### 2) Configure
Create a `.env` file in this folder:
```
OPENAI_API_KEY=your_key_here
OPENAI_MODEL=gpt-4o-mini
```
### 3) Run CLI (example)
```bash
python agent.py
```
### 4) Run UI
```bash
streamlit run app.py
```
---
## 🧪 Sample Profile (from issue author)
See `sample_profile.json` for the exact values used to produce `demo_output.md`.
- **Sex**: female
- **Age**: 45
- **Height**: 152 cm (~5 ft)
- **Weight**: 62 kg
- **Activity**: light
- **Goal**: maintain
- **Diet**: vegetarian
---
## 🔧 Extend
- Replace the in-memory recipes with:
- A real **recipe API** (e.g., Spoonacular) or
- Your **own dataset** (CSV/DB) + filters/tags
- Add price lookups to produce a **budget-aware** grocery list.
- Add **adherence tracking** and charts.
- Integrate **wearables** or daily steps to refine TDEE dynamically.
- Add **snacks** for days slightly under target kcals.
---
## 🛡️ Safety Notes
- The agent warns for extreme deficits but does **not** diagnose conditions.
- For calorie targets below commonly recommended minimums (e.g., ~1200 kcal/day for many adults), advise consulting a professional.
---
## 📁 Project Layout
```
fitness-nutrition-planner-agent/
├─ README.md
├─ agent.py
├─ app.py
├─ sample_profile.json
└─ demo_output.md
```
---
## 🤝 How to contribute
- Keep notebooks (if any) with **cleared outputs**.
- Follow the course repos contribution guidelines.
- Include screenshots or a short Loom/YT demo link in your PR description.

View File

@@ -0,0 +1,411 @@
# agent.py
import os, math, json, copy
from dataclasses import dataclass
from typing import List, Dict, Any, Optional, Tuple
from pydantic import BaseModel, Field, ValidationError
from dotenv import load_dotenv
from openai import OpenAI
load_dotenv()
# ------------------------------
# Data models
# ------------------------------
class UserProfile(BaseModel):
sex: str = Field(..., description="male or female")
age: int
height_cm: float
weight_kg: float
activity_level: str = Field(..., description="sedentary, light, moderate, active, very_active")
goal: str = Field(..., description="lose, maintain, gain")
dietary_pattern: Optional[str] = Field(None, description="e.g., vegetarian, vegan, halal, kosher")
allergies: List[str] = Field(default_factory=list)
dislikes: List[str] = Field(default_factory=list)
daily_meals: int = 3
cuisine_prefs: List[str] = Field(default_factory=list)
time_per_meal_minutes: int = 30
budget_level: Optional[str] = Field(None, description="low, medium, high")
class MacroTargets(BaseModel):
tdee: int
target_kcal: int
protein_g: int
carbs_g: int
fat_g: int
class Meal(BaseModel):
name: str
ingredients: List[Dict[str, Any]] # {item, qty, unit}
kcal: int
protein_g: int
carbs_g: int
fat_g: int
tags: List[str] = Field(default_factory=list)
instructions: Optional[str] = None
class DayPlan(BaseModel):
day: str
meals: List[Meal]
totals: MacroTargets
class WeekPlan(BaseModel):
days: List[DayPlan]
meta: Dict[str, Any]
# ------------------------------
# Tiny in-memory recipe “DB”
# (extend/replace with a real source)
# ------------------------------
RECIPE_DB: List[Meal] = [
Meal(
name="Greek Yogurt Parfait",
ingredients=[{"item":"nonfat greek yogurt","qty":200,"unit":"g"},
{"item":"berries","qty":150,"unit":"g"},
{"item":"granola","qty":30,"unit":"g"},
{"item":"honey","qty":10,"unit":"g"}],
kcal=380, protein_g=30, carbs_g=52, fat_g=8,
tags=["vegetarian","breakfast","5-min","no-cook"]
),
Meal(
name="Tofu Veggie Stir-Fry with Rice",
ingredients=[{"item":"firm tofu","qty":150,"unit":"g"},
{"item":"mixed vegetables","qty":200,"unit":"g"},
{"item":"soy sauce (low sodium)","qty":15,"unit":"ml"},
{"item":"olive oil","qty":10,"unit":"ml"},
{"item":"brown rice (cooked)","qty":200,"unit":"g"}],
kcal=650, protein_g=28, carbs_g=85, fat_g=20,
tags=["vegan","gluten-free","dinner","20-min","stovetop","soy"]
),
Meal(
name="Chicken Quinoa Bowl",
ingredients=[{"item":"chicken breast","qty":140,"unit":"g"},
{"item":"quinoa (cooked)","qty":185,"unit":"g"},
{"item":"spinach","qty":60,"unit":"g"},
{"item":"olive oil","qty":10,"unit":"ml"},
{"item":"lemon","qty":0.5,"unit":"unit"}],
kcal=620, protein_g=45, carbs_g=55, fat_g=20,
tags=["gluten-free","dinner","25-min","high-protein","poultry"]
),
Meal(
name="Lentil Soup + Wholegrain Bread",
ingredients=[{"item":"lentils (cooked)","qty":200,"unit":"g"},
{"item":"vegetable broth","qty":400,"unit":"ml"},
{"item":"carrot","qty":80,"unit":"g"},
{"item":"celery","qty":60,"unit":"g"},
{"item":"onion","qty":60,"unit":"g"},
{"item":"wholegrain bread","qty":60,"unit":"g"}],
kcal=520, protein_g=25, carbs_g=78, fat_g=8,
tags=["vegan","lunch","30-min","budget"]
),
Meal(
name="Salmon, Potatoes & Greens",
ingredients=[{"item":"salmon fillet","qty":150,"unit":"g"},
{"item":"potatoes","qty":200,"unit":"g"},
{"item":"broccoli","qty":150,"unit":"g"},
{"item":"olive oil","qty":10,"unit":"ml"}],
kcal=680, protein_g=42, carbs_g=52, fat_g=30,
tags=["gluten-free","dinner","omega-3","fish"]
),
Meal(
name="Cottage Cheese Bowl",
ingredients=[{"item":"low-fat cottage cheese","qty":200,"unit":"g"},
{"item":"pineapple","qty":150,"unit":"g"},
{"item":"chia seeds","qty":15,"unit":"g"}],
kcal=380, protein_g=32, carbs_g=35, fat_g=10,
tags=["vegetarian","snack","5-min","high-protein","dairy"]
),
]
# ------------------------------
# Tool implementations
# ------------------------------
ACTIVITY_FACTORS = {
"sedentary": 1.2,
"light": 1.375,
"moderate": 1.55,
"active": 1.725,
"very_active": 1.9
}
def mifflin_st_jeor(weight_kg: float, height_cm: float, age: int, sex: str) -> float:
# BMR (kcal/day)
if sex.lower().startswith("m"):
return 10*weight_kg + 6.25*height_cm - 5*age + 5
else:
return 10*weight_kg + 6.25*height_cm - 5*age - 161
def compute_targets(profile: UserProfile) -> MacroTargets:
bmr = mifflin_st_jeor(profile.weight_kg, profile.height_cm, profile.age, profile.sex)
tdee = int(round(bmr * ACTIVITY_FACTORS.get(profile.activity_level, 1.2)))
# goal adjustment
if profile.goal == "lose":
target_kcal = max(1200, int(tdee - 400)) # conservative deficit
elif profile.goal == "gain":
target_kcal = int(tdee + 300)
else:
target_kcal = tdee
# Macro split (modifiable): P 30%, C 40%, F 30%
protein_kcal = target_kcal * 0.30
carbs_kcal = target_kcal * 0.40
fat_kcal = target_kcal * 0.30
protein_g = int(round(protein_kcal / 4))
carbs_g = int(round(carbs_kcal / 4))
fat_g = int(round(fat_kcal / 9))
return MacroTargets(tdee=tdee, target_kcal=target_kcal,
protein_g=protein_g, carbs_g=carbs_g, fat_g=fat_g)
def _allowed(meal: Meal, profile: UserProfile) -> bool:
# dietary patterns/allergies/dislikes filters (simple; extend as needed)
diet = (profile.dietary_pattern or "").lower()
if diet == "vegetarian" and ("fish" in meal.tags or "poultry" in meal.tags):
return False
if diet == "vegan" and ("dairy" in meal.tags or "fish" in meal.tags or "poultry" in meal.tags):
return False
# allergies & dislikes
for a in profile.allergies:
if a and a.lower() in meal.name.lower(): return False
if any(a.lower() in (ing["item"]).lower() for ing in meal.ingredients): return False
if a.lower() in " ".join(meal.tags).lower(): return False
for d in profile.dislikes:
if d and d.lower() in meal.name.lower(): return False
if any(d.lower() in (ing["item"]).lower() for ing in meal.ingredients): return False
return True
def meal_db_search(profile: UserProfile, tags: Optional[List[str]] = None) -> List[Meal]:
tags = tags or []
out = []
for m in RECIPE_DB:
if not _allowed(m, profile):
continue
if tags and not any(t in m.tags for t in tags):
continue
out.append(m)
return out or [] # may be empty; agent should handle
def compose_meal_plan(profile: UserProfile, targets: MacroTargets) -> WeekPlan:
# naive heuristic: pick meals that roughly match per-meal macro budget
per_meal_kcal = targets.target_kcal / profile.daily_meals
days = []
weekdays = ["Mon","Tue","Wed","Thu","Fri","Sat","Sun"]
# simple pools
breakfasts = meal_db_search(profile, tags=["breakfast","no-cook","5-min"])
lunches = meal_db_search(profile, tags=["lunch","budget"])
dinners = meal_db_search(profile, tags=["dinner","high-protein"])
# fallback to any allowed meals if pools too small
allowed_all = meal_db_search(profile)
if len(breakfasts) < 2: breakfasts = allowed_all
if len(lunches) < 2: lunches = allowed_all
if len(dinners) < 2: dinners = allowed_all
for i, day in enumerate(weekdays):
day_meals = []
for slot in range(profile.daily_meals):
pool = breakfasts if slot == 0 else (lunches if slot == 1 else dinners)
# pick the meal closest in kcal to per_meal_kcal
pick = min(pool, key=lambda m: abs(m.kcal - per_meal_kcal))
day_meals.append(copy.deepcopy(pick))
# compute totals
kcal = sum(m.kcal for m in day_meals)
p = sum(m.protein_g for m in day_meals)
c = sum(m.carbs_g for m in day_meals)
f = sum(m.fat_g for m in day_meals)
day_targets = MacroTargets(tdee=targets.tdee, target_kcal=int(round(kcal)),
protein_g=p, carbs_g=c, fat_g=f)
days.append(DayPlan(day=day, meals=day_meals, totals=day_targets))
return WeekPlan(days=days, meta={"per_meal_target_kcal": int(round(per_meal_kcal))})
def grocery_list_from_plan(plan: WeekPlan) -> List[Dict[str, Any]]:
# aggregate identical ingredients
agg: Dict[Tuple[str,str], float] = {}
units: Dict[Tuple[str,str], str] = {}
for d in plan.days:
for m in d.meals:
for ing in m.ingredients:
key = (ing["item"].lower(), ing.get("unit",""))
agg[key] = agg.get(key, 0) + float(ing.get("qty", 0))
units[key] = ing.get("unit","")
items = []
for (item, unit), qty in sorted(agg.items()):
items.append({"item": item, "qty": round(qty, 2), "unit": unit})
return items
def swap_meal(plan: WeekPlan, day: str, meal_index: int, profile: UserProfile) -> WeekPlan:
# replace one meal by closest-kcal allowed alternative that isn't the same
day_idx = next((i for i,d in enumerate(plan.days) if d.day.lower().startswith(day[:3].lower())), None)
if day_idx is None: return plan
current_meal = plan.days[day_idx].meals[meal_index]
candidates = [m for m in meal_db_search(profile) if m.name != current_meal.name]
if not candidates: return plan
pick = min(candidates, key=lambda m: abs(m.kcal - current_meal.kcal))
plan.days[day_idx].meals[meal_index] = copy.deepcopy(pick)
# recalc day totals
d = plan.days[day_idx]
kcal = sum(m.kcal for m in d.meals)
p = sum(m.protein_g for m in d.meals)
c = sum(m.carbs_g for m in d.meals)
f = sum(m.fat_g for m in d.meals)
d.totals = MacroTargets(tdee=d.totals.tdee, target_kcal=kcal, protein_g=p, carbs_g=c, fat_g=f)
return plan
# ------------------------------
# Agent (LLM + tools)
# ------------------------------
SYS_PROMPT = """You are FitnessPlanner, an agentic planner that:
- Respects dietary patterns, allergies, dislikes, budget, time limits.
- Uses tools to compute targets, assemble a 7-day plan, produce a grocery list, and swap meals on request.
- If a request is unsafe (extreme deficits, medical conditions), warn and suggest professional guidance.
- Keep responses concise and structured (headings + bullet lists)."""
# Tool registry for function-calling
def get_tools_schema():
return [
{
"type": "function",
"function": {
"name": "calc_calories_and_macros",
"description": "Compute TDEE and macro targets from the user's profile.",
"parameters": {
"type":"object",
"properties": {"profile":{"type":"object"}},
"required":["profile"]
}
}
},
{
"type": "function",
"function": {
"name": "compose_meal_plan",
"description": "Create a 7-day meal plan matching targets and constraints.",
"parameters": {
"type":"object",
"properties": {
"profile":{"type":"object"},
"targets":{"type":"object"}
},
"required":["profile","targets"]
}
}
},
{
"type": "function",
"function": {
"name": "grocery_list_from_plan",
"description": "Make a consolidated grocery list from a week plan.",
"parameters": {
"type":"object",
"properties": {"plan":{"type":"object"}},
"required":["plan"]
}
}
},
{
"type": "function",
"function": {
"name": "swap_meal",
"description": "Swap a single meal in the plan while keeping macros reasonable.",
"parameters": {
"type":"object",
"properties": {
"plan":{"type":"object"},
"day":{"type":"string"},
"meal_index":{"type":"integer","description":"0=breakfast,1=lunch,2=dinner"},
"profile":{"type":"object"}
},
"required":["plan","day","meal_index","profile"]
}
}
}
]
class FitnessPlannerAgent:
def __init__(self, model: Optional[str] = None):
self.client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
self.model = model or os.getenv("OPENAI_MODEL", "gpt-4o-mini")
self.plan_cache: Optional[WeekPlan] = None
self.targets_cache: Optional[MacroTargets] = None
# Tool dispatch
def _call_tool(self, name: str, args: Dict[str, Any]) -> str:
if name == "calc_calories_and_macros":
profile = UserProfile(**args["profile"])
targets = compute_targets(profile)
self.targets_cache = targets
return targets.model_dump_json()
elif name == "compose_meal_plan":
profile = UserProfile(**args["profile"])
targets = MacroTargets(**args["targets"])
plan = compose_meal_plan(profile, targets)
self.plan_cache = plan
return plan.model_dump_json()
elif name == "grocery_list_from_plan":
plan = WeekPlan(**args["plan"])
items = grocery_list_from_plan(plan)
return json.dumps(items)
elif name == "swap_meal":
plan = WeekPlan(**args["plan"])
profile = UserProfile(**args["profile"])
day = args["day"]
idx = args["meal_index"]
new_plan = swap_meal(plan, day, idx, profile)
self.plan_cache = new_plan
return new_plan.model_dump_json()
else:
return json.dumps({"error":"unknown tool"})
def chat(self, user_message: str, profile: Optional[UserProfile] = None) -> str:
messages = [{"role":"system","content":SYS_PROMPT}]
if profile:
messages.append({"role":"user","content":f"User profile: {profile.model_dump_json()}"} )
messages.append({"role":"user","content":user_message})
# First call
resp = self.client.chat.completions.create(
model=self.model,
messages=messages,
tools=get_tools_schema(),
tool_choice="auto",
temperature=0.3
)
# Handle tool calls (simple, single-step or brief multi-step)
messages_llm = messages + [{"role":"assistant","content":resp.choices[0].message.content or "",
"tool_calls":resp.choices[0].message.tool_calls}]
if resp.choices[0].message.tool_calls:
for tc in resp.choices[0].message.tool_calls:
name = tc.function.name
args = json.loads(tc.function.arguments or "{}")
out = self._call_tool(name, args)
messages_llm.append({
"role":"tool",
"tool_call_id":tc.id,
"name":name,
"content":out
})
# Finalization
resp2 = self.client.chat.completions.create(
model=self.model,
messages=messages_llm,
temperature=0.2
)
return resp2.choices[0].message.content
return resp.choices[0].message.content
# ------------------------------
# Quick CLI demo
# ------------------------------
if __name__ == "__main__":
profile = UserProfile(
sex="female", age=45, height_cm=152, weight_kg=62,
activity_level="light", goal="maintain",
dietary_pattern="vegetarian", allergies=[], dislikes=[],
daily_meals=3, cuisine_prefs=["mediterranean"], time_per_meal_minutes=25, budget_level="medium"
)
agent = FitnessPlannerAgent()
print(agent.chat("Create my 7-day plan and grocery list.", profile))

View File

@@ -0,0 +1,75 @@
# app.py
import json
import streamlit as st
from agent import FitnessPlannerAgent, UserProfile, WeekPlan
st.set_page_config(page_title="Fitness & Nutrition Planner Agent", layout="wide")
st.title("🏋️ Fitness & Nutrition Planner Agent")
with st.sidebar:
st.header("Your Profile")
sex = st.selectbox("Sex", ["female","male"])
age = st.number_input("Age", 18, 90, 45)
height_cm = st.number_input("Height (cm)", 120, 220, 152)
weight_kg = st.number_input("Weight (kg)", 35.0, 200.0, 62.0)
activity_level = st.selectbox("Activity Level", ["sedentary","light","moderate","active","very_active"], index=1)
goal = st.selectbox("Goal", ["lose","maintain","gain"], index=1)
dietary_pattern = st.selectbox("Dietary Pattern", ["none","vegetarian","vegan","halal","kosher"], index=1)
if dietary_pattern == "none": dietary_pattern = None
allergies = st.text_input("Allergies (comma-separated)", "")
dislikes = st.text_input("Dislikes (comma-separated)", "")
daily_meals = st.slider("Meals per day", 2, 5, 3)
time_per_meal_minutes = st.slider("Time per meal (min)", 5, 90, 25)
budget_level = st.selectbox("Budget", ["medium","low","high"], index=0)
cuisine_prefs = st.text_input("Cuisine prefs (comma-separated)", "mediterranean")
build_btn = st.button("Generate 7-Day Plan")
agent = FitnessPlannerAgent()
if build_btn:
profile = UserProfile(
sex=sex, age=int(age), height_cm=float(height_cm), weight_kg=float(weight_kg),
activity_level=activity_level, goal=goal, dietary_pattern=dietary_pattern,
allergies=[a.strip() for a in allergies.split(",") if a.strip()],
dislikes=[d.strip() for d in dislikes.split(",") if d.strip()],
daily_meals=int(daily_meals), cuisine_prefs=[c.strip() for c in cuisine_prefs.split(",") if c.strip()],
time_per_meal_minutes=int(time_per_meal_minutes), budget_level=budget_level
)
st.session_state["profile_json"] = profile.model_dump_json()
with st.spinner("Planning your week..."):
result = agent.chat("Create my 7-day plan and grocery list.", profile)
st.session_state["last_response"] = result
if "last_response" in st.session_state:
st.subheader("Plan & Groceries")
st.markdown(st.session_state["last_response"])
st.divider()
st.subheader("Tweaks")
col1, col2, col3 = st.columns(3)
with col1:
day = st.selectbox("Day to change", ["Mon","Tue","Wed","Thu","Fri","Sat","Sun"])
with col2:
meal_index = st.selectbox("Meal slot", ["Breakfast (0)","Lunch (1)","Dinner (2)"])
meal_index = int(meal_index[-2]) # 0/1/2
with col3:
swap_btn = st.button("Swap Meal")
if swap_btn and agent.plan_cache:
profile_json = st.session_state.get("profile_json")
if not profile_json:
st.warning("Please generate a plan first.")
else:
new_plan_json = agent._call_tool("swap_meal", {
"plan": agent.plan_cache.model_dump(),
"day": day,
"meal_index": meal_index,
"profile": json.loads(profile_json)
})
agent.plan_cache = WeekPlan(**json.loads(new_plan_json))
summary = agent.chat(f"Update summary for {day}: show the swapped meal and new day totals.")
st.session_state["last_response"] = summary
st.markdown(summary)

View File

@@ -0,0 +1,84 @@
# Demo Output (Sample Profile)
**Profile**: female, 45, 152 cm, 62 kg, activity: light, goal: maintain, diet: vegetarian
## Targets
- TDEE ≈ **1680 kcal/day**
- Macros (30/40/30): **Protein 126 g**, **Carbs 168 g**, **Fat 56 g**
> These are estimates using MifflinSt Jeor and a light activity factor. Not medical advice.
---
## Example 7-Day Plan (Breakfast / Lunch / Dinner)
**Mon**
- Greek Yogurt Parfait (380 kcal, 30P/52C/8F)
- Lentil Soup + Wholegrain Bread (520 kcal, 25P/78C/8F)
- Tofu Veggie Stir-Fry with Rice (650 kcal, 28P/85C/20F)
- **Totals** ≈ 1550 kcal, 83P, 215C, 36F
**Tue**
- Cottage Cheese Bowl (380 kcal, 32P/35C/10F)
- Lentil Soup + Wholegrain Bread (520 kcal, 25P/78C/8F)
- Tofu Veggie Stir-Fry with Rice (650 kcal, 28P/85C/20F)
- **Totals** ≈ 1550 kcal, 85P, 198C, 38F
**Wed**
- Greek Yogurt Parfait
- Lentil Soup + Wholegrain Bread
- Tofu Veggie Stir-Fry with Rice
- **Totals** ≈ 1550 kcal
**Thu**
- Cottage Cheese Bowl
- Lentil Soup + Wholegrain Bread
- Tofu Veggie Stir-Fry with Rice
- **Totals** ≈ 1550 kcal
**Fri**
- Greek Yogurt Parfait
- Lentil Soup + Wholegrain Bread
- Tofu Veggie Stir-Fry with Rice
- **Totals** ≈ 1550 kcal
**Sat**
- Cottage Cheese Bowl
- Lentil Soup + Wholegrain Bread
- Tofu Veggie Stir-Fry with Rice
- **Totals** ≈ 1550 kcal
**Sun**
- Greek Yogurt Parfait
- Lentil Soup + Wholegrain Bread
- Tofu Veggie Stir-Fry with Rice
- **Totals** ≈ 1550 kcal
> Notes: The demo DB is intentionally small. In practice, plug in a larger vegetarian recipe set for more variety. Add snacks if you'd like to reach ~1680 kcal/day.
---
## Grocery List (aggregated, approx for 7 days)
- nonfat greek yogurt — **1400 g**
- berries — **1050 g**
- granola — **210 g**
- honey — **70 g**
- lentils (cooked) — **1400 g**
- vegetable broth — **2800 ml**
- carrot — **560 g**
- celery — **420 g**
- onion — **420 g**
- wholegrain bread — **420 g**
- firm tofu — **1050 g**
- mixed vegetables — **1400 g**
- soy sauce (low sodium) — **105 ml**
- olive oil — **140 ml**
- brown rice (cooked) — **1400 g**
- low-fat cottage cheese — **600 g**
- pineapple — **450 g**
- chia seeds — **45 g**
**Tip:** Use the apps *Swap Meal* to replace any item (e.g., swap Wed dinner).

View File

@@ -0,0 +1,17 @@
{
"sex": "female",
"age": 45,
"height_cm": 152,
"weight_kg": 62,
"activity_level": "light",
"goal": "maintain",
"dietary_pattern": "vegetarian",
"allergies": [],
"dislikes": [],
"daily_meals": 3,
"cuisine_prefs": [
"mediterranean"
],
"time_per_meal_minutes": 25,
"budget_level": "medium"
}

View File

@@ -0,0 +1,428 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "d15d8294-3328-4e07-ad16-8a03e9bbfdb9",
"metadata": {},
"source": [
"# Welcome to your first assignment!\n",
"\n",
"Instructions are below. Please give this a try, and look in the solutions folder if you get stuck (or feel free to ask me!)"
]
},
{
"cell_type": "markdown",
"id": "ada885d9-4d42-4d9b-97f0-74fbbbfe93a9",
"metadata": {},
"source": [
"<table style=\"margin: 0; text-align: left;\">\n",
" <tr>\n",
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
" <img src=\"../resources.jpg\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
" </td>\n",
" <td>\n",
" <h2 style=\"color:#f71;\">Just before we get to the assignment --</h2>\n",
" <span style=\"color:#f71;\">I thought I'd take a second to point you at this page of useful resources for the course. This includes links to all the slides.<br/>\n",
" <a href=\"https://edwarddonner.com/2024/11/13/llm-engineering-resources/\">https://edwarddonner.com/2024/11/13/llm-engineering-resources/</a><br/>\n",
" Please keep this bookmarked, and I'll continue to add more useful links there over time.\n",
" </span>\n",
" </td>\n",
" </tr>\n",
"</table>"
]
},
{
"cell_type": "markdown",
"id": "6e9fa1fc-eac5-4d1d-9be4-541b3f2b3458",
"metadata": {},
"source": [
"# HOMEWORK EXERCISE ASSIGNMENT\n",
"\n",
"Upgrade the day 1 project to summarize a webpage to use an Open Source model running locally via Ollama rather than OpenAI\n",
"\n",
"You'll be able to use this technique for all subsequent projects if you'd prefer not to use paid APIs.\n",
"\n",
"**Benefits:**\n",
"1. No API charges - open-source\n",
"2. Data doesn't leave your box\n",
"\n",
"**Disadvantages:**\n",
"1. Significantly less power than Frontier Model\n",
"\n",
"## Recap on installation of Ollama\n",
"\n",
"Simply visit [ollama.com](https://ollama.com) and install!\n",
"\n",
"Once complete, the ollama server should already be running locally. \n",
"If you visit: \n",
"[http://localhost:11434/](http://localhost:11434/)\n",
"\n",
"You should see the message `Ollama is running`. \n",
"\n",
"If not, bring up a new Terminal (Mac) or Powershell (Windows) and enter `ollama serve` \n",
"And in another Terminal (Mac) or Powershell (Windows), enter `ollama pull llama3.2` \n",
"Then try [http://localhost:11434/](http://localhost:11434/) again.\n",
"\n",
"If Ollama is slow on your machine, try using `llama3.2:1b` as an alternative. Run `ollama pull llama3.2:1b` from a Terminal or Powershell, and change the code below from `MODEL = \"llama3.2\"` to `MODEL = \"llama3.2:1b\"`"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4e2a9393-7767-488e-a8bf-27c12dca35bd",
"metadata": {},
"outputs": [],
"source": [
"# imports\n",
"\n",
"import requests\n",
"from bs4 import BeautifulSoup\n",
"from IPython.display import Markdown, display"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "29ddd15d-a3c5-4f4e-a678-873f56162724",
"metadata": {},
"outputs": [],
"source": [
"# Constants\n",
"\n",
"OLLAMA_API = \"http://localhost:11434/api/chat\"\n",
"HEADERS = {\"Content-Type\": \"application/json\"}\n",
"MODEL = \"llama3.2\""
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "dac0a679-599c-441f-9bf2-ddc73d35b940",
"metadata": {},
"outputs": [],
"source": [
"# Create a messages list using the same format that we used for OpenAI\n",
"\n",
"messages = [\n",
" {\"role\": \"user\", \"content\": \"Describe some of the business applications of Generative AI\"}\n",
"]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7bb9c624-14f0-4945-a719-8ddb64f66f47",
"metadata": {},
"outputs": [],
"source": [
"payload = {\n",
" \"model\": MODEL,\n",
" \"messages\": messages,\n",
" \"stream\": False\n",
" }"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "479ff514-e8bd-4985-a572-2ea28bb4fa40",
"metadata": {},
"outputs": [],
"source": [
"# Let's just make sure the model is loaded\n",
"\n",
"!ollama pull llama3.2"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "42b9f644-522d-4e05-a691-56e7658c0ea9",
"metadata": {},
"outputs": [],
"source": [
"# If this doesn't work for any reason, try the 2 versions in the following cells\n",
"# And double check the instructions in the 'Recap on installation of Ollama' at the top of this lab\n",
"# And if none of that works - contact me!\n",
"\n",
"response = requests.post(OLLAMA_API, json=payload, headers=HEADERS)\n",
"print(response.json()['message']['content'])"
]
},
{
"cell_type": "markdown",
"id": "6a021f13-d6a1-4b96-8e18-4eae49d876fe",
"metadata": {},
"source": [
"# Introducing the ollama package\n",
"\n",
"And now we'll do the same thing, but using the elegant ollama python package instead of a direct HTTP call.\n",
"\n",
"Under the hood, it's making the same call as above to the ollama server running at localhost:11434"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7745b9c4-57dc-4867-9180-61fa5db55eb8",
"metadata": {},
"outputs": [],
"source": [
"import ollama\n",
"\n",
"response = ollama.chat(model=MODEL, messages=messages)\n",
"print(response['message']['content'])"
]
},
{
"cell_type": "markdown",
"id": "a4704e10-f5fb-4c15-a935-f046c06fb13d",
"metadata": {},
"source": [
"## Alternative approach - using OpenAI python library to connect to Ollama"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "23057e00-b6fc-4678-93a9-6b31cb704bff",
"metadata": {},
"outputs": [],
"source": [
"# There's actually an alternative approach that some people might prefer\n",
"# You can use the OpenAI client python library to call Ollama:\n",
"\n",
"from openai import OpenAI\n",
"ollama_via_openai = OpenAI(base_url='http://localhost:11434/v1', api_key='ollama')\n",
"\n",
"response = ollama_via_openai.chat.completions.create(\n",
" model=MODEL,\n",
" messages=messages\n",
")\n",
"\n",
"print(response.choices[0].message.content)"
]
},
{
"cell_type": "markdown",
"id": "9f9e22da-b891-41f6-9ac9-bd0c0a5f4f44",
"metadata": {
"jp-MarkdownHeadingCollapsed": true
},
"source": [
"## Are you confused about why that works?\n",
"\n",
"It seems strange, right? We just used OpenAI code to call Ollama?? What's going on?!\n",
"\n",
"Here's the scoop:\n",
"\n",
"The python class `OpenAI` is simply code written by OpenAI engineers that makes calls over the internet to an endpoint. \n",
"\n",
"When you call `openai.chat.completions.create()`, this python code just makes a web request to the following url: \"https://api.openai.com/v1/chat/completions\"\n",
"\n",
"Code like this is known as a \"client library\" - it's just wrapper code that runs on your machine to make web requests. The actual power of GPT is running on OpenAI's cloud behind this API, not on your computer!\n",
"\n",
"OpenAI was so popular, that lots of other AI providers provided identical web endpoints, so you could use the same approach.\n",
"\n",
"So Ollama has an endpoint running on your local box at http://localhost:11434/v1/chat/completions \n",
"And in week 2 we'll discover that lots of other providers do this too, including Gemini and DeepSeek.\n",
"\n",
"And then the team at OpenAI had a great idea: they can extend their client library so you can specify a different 'base url', and use their library to call any compatible API.\n",
"\n",
"That's it!\n",
"\n",
"So when you say: `ollama_via_openai = OpenAI(base_url='http://localhost:11434/v1', api_key='ollama')` \n",
"Then this will make the same endpoint calls, but to Ollama instead of OpenAI."
]
},
{
"cell_type": "markdown",
"id": "bc7d1de3-e2ac-46ff-a302-3b4ba38c4c90",
"metadata": {},
"source": [
"## Also trying the amazing reasoning model DeepSeek\n",
"\n",
"Here we use the version of DeepSeek-reasoner that's been distilled to 1.5B. \n",
"This is actually a 1.5B variant of Qwen that has been fine-tuned using synethic data generated by Deepseek R1.\n",
"\n",
"Other sizes of DeepSeek are [here](https://ollama.com/library/deepseek-r1) all the way up to the full 671B parameter version, which would use up 404GB of your drive and is far too large for most!"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "cf9eb44e-fe5b-47aa-b719-0bb63669ab3d",
"metadata": {},
"outputs": [],
"source": [
"!ollama pull deepseek-r1:1.5b"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1d3d554b-e00d-4c08-9300-45e073950a76",
"metadata": {},
"outputs": [],
"source": [
"# This may take a few minutes to run! You should then see a fascinating \"thinking\" trace inside <think> tags, followed by some decent definitions\n",
"\n",
"response = ollama_via_openai.chat.completions.create(\n",
" model=\"deepseek-r1:1.5b\",\n",
" messages=[{\"role\": \"user\", \"content\": \"Please give definitions of some core concepts behind LLMs: a neural network, attention and the transformer\"}]\n",
")\n",
"\n",
"print(response.choices[0].message.content)"
]
},
{
"cell_type": "markdown",
"id": "1622d9bb-5c68-4d4e-9ca4-b492c751f898",
"metadata": {},
"source": [
"# NOW the exercise for you\n",
"\n",
"Take the code from day1 and incorporate it here, to build a website summarizer that uses Llama 3.2 running locally instead of OpenAI; use either of the above approaches."
]
},
{
"cell_type": "markdown",
"id": "59e9564e",
"metadata": {},
"source": [
"1. Create a Website class"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "6de38216-6d1c-48c4-877b-86d403f4e0f8",
"metadata": {},
"outputs": [],
"source": [
"from dotenv import load_dotenv\n",
"from bs4 import BeautifulSoup\n",
"import requests\n",
"from openai import OpenAI\n",
"from IPython.display import Markdown, display"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "d44c6179-5c00-46a1-a068-6c6226307e2f",
"metadata": {},
"outputs": [],
"source": [
"headers = {\n",
" \"user-agent\": \"mozilla/5.0 (windows nt 10.0; win64; x64) applewebkit/537.36 (khtml, like gecko) chrome/117.0.0.0 safari/537.36\"\n",
"}\n",
"\n",
"class Website:\n",
" def __init__(self, url):\n",
" self.url = url\n",
" response = requests.get(url, headers = headers)\n",
" soup = BeautifulSoup(response.content, 'html.parser')\n",
" self.title = soup.title if soup.title else \"No title found for this website\"\n",
" \n",
" for irrelevant in soup.body([\"script\", \"style\", \"img\", \"input\"]):\n",
" irrelevant.decompose()\n",
" self.text = soup.body.get_text(separator=\"\\n\", strip=True)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "e9edb034",
"metadata": {},
"outputs": [],
"source": [
"system_prompt = \"You are an assistant that analyzes the contents of a website \\\n",
"and provides a short summary, ignoring text that might be navigation related. \\\n",
"Respond in markdown.\"\n",
"\n",
"def getUserPrompt(website):\n",
" userPrompt = f\"You are looking at a website titled {website.title}\"\n",
" userPrompt += \"\\nThe contents of this website is as follows; \\\n",
"please provide a short summary of this website in markdown. \\\n",
"If it includes news or announcements, then summarize these too.\\n\\n\"\n",
" userPrompt += website.text\n",
" return userPrompt"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "8581edb1",
"metadata": {},
"outputs": [],
"source": [
"def getPromptMessageFor(website):\n",
" return [\n",
" {\"role\": \"system\", \"content\": system_prompt},\n",
" {\"role\": \"user\", \"content\": getUserPrompt(website)}\n",
" ]"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "caf20d99",
"metadata": {},
"outputs": [],
"source": [
"def summarize(url):\n",
" website = Website(url)\n",
" ollamaAi = OpenAI(base_url='http://localhost:11434/v1', api_key='ollama')\n",
" response = ollamaAi.chat.completions.create(\n",
" model = \"deepseek-r1:1.5b\",\n",
" messages = getPromptMessageFor(website)\n",
" )\n",
" return response.choices[0].message.content"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "5e0ed89f",
"metadata": {},
"outputs": [],
"source": [
"def display_summary(url):\n",
" summary = summarize(url)\n",
" display(Markdown(summary))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e5793933",
"metadata": {},
"outputs": [],
"source": [
"display_summary(\"https://edwarddonner.com\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.13"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@@ -0,0 +1,5 @@
# openai-twenty-questions
Chatgpt script that acts as Thinker and Guesser to play the 20 questions game
1. pip install openai==0.28
2. Run python twenty.py and it will Think of a word and try to guess it in 20 tries

View File

@@ -0,0 +1,100 @@
import openai
import os
import time
# openai.api_key = os.getenv("OPENAI_API_KEY")
# openai.api_key = "<<Your Open AI Key here>>"
# Models: You can use "gpt-4o", "gpt-4-turbo", or "gpt-3.5-turbo" — but we'll use "gpt-4o" or "gpt-4o-mini" for both players
MODEL = "gpt-4o-mini"
def call_chatgpt(messages):
response = openai.ChatCompletion.create(
model=MODEL,
messages=messages,
temperature=0.7
)
return response.choices[0].message["content"].strip()
# Step 1: Thinker chooses a secret object
thinker_messages = [
{"role": "system", "content": "You are playing 20 Questions. Think of an object or thing and just one word. Keep it secret and reply only with: 'I have thought of something. Let's begin.'"},
]
thinker_reply = call_chatgpt(thinker_messages)
print("Thinker:", thinker_reply)
# For simulation purposes, lets ask the thinker what the object is (in real game, this is hidden)
reveal_object_prompt = [
{"role": "system", "content": "You are playing 20 Questions. Think of an object or thing and just one word. Now tell me (just for logging) what you are thinking of. Reply only with the thing."}
]
object_answer = call_chatgpt(reveal_object_prompt)
print("🔒 Secret Object:", object_answer)
# Step 2: Guesser starts asking questions
guesser_messages = [
{"role": "system", "content": f"You are playing 20 Questions. Ask yes/no questions to figure out what the object is. Do not repeat questions. The object is kept secret by the other player. Begin by asking your first question."},
]
# Lets keep track of Q&A
history = []
q_count = 1
for i in range(1, 11):
print(f"\n🔄 Round {q_count}")
q_count += 1
# Guesser asks a question
question = call_chatgpt(guesser_messages)
print("Guesser:", question)
history.append(("Guesser", question))
# Thinker responds (yes/no)
thinker_round = [
{"role": "system", "content": f"You are playing 20 Questions. The secret object is: {object_answer}."},
{"role": "user", "content": f"The other player asked: {question}. Respond only with 'Yes', 'No', or 'I don't know'."}
]
answer = call_chatgpt(thinker_round)
print("Thinker:", answer)
history.append(("Thinker", answer))
# Add to conversation history for guesser
guesser_messages.append({"role": "assistant", "content": question})
guesser_messages.append({"role": "user", "content": answer})
print(f"\n🔄 Round {q_count}")
q_count += 1
# Check if guesser wants to guess
guess_check_prompt = guesser_messages + [
{"role": "user", "content": "Based on the answers so far, do you want to guess? If yes, say: 'Is it <guess>?'. If not, ask the next yes/no question."}
]
next_move_question = call_chatgpt(guess_check_prompt)
print("Guesser next move:", next_move_question)
history.append(("Guesser", next_move_question))
if next_move_question.lower().startswith("is it a"):
# Thinker validates guess
guess = next_move_question[8:].strip(" ?.")
guess = next_move_question[8:].strip(" ?")
if guess.lower() == object_answer.lower():
print("Guesser guessed correctly!")
break
# Thinker responds (yes/no)
thinker_round = [
{"role": "system", "content": f"You are playing 20 Questions. The secret object is: {object_answer}."},
{"role": "user", "content": f"The other player asked: {next_move_question}. Respond only with 'Yes', 'No', or 'I don't know'."}
]
answer = call_chatgpt(thinker_round)
print("Thinker next move:", answer)
history.append(("Thinker", answer))
# Add to conversation history for guesser
guesser_messages.append({"role": "assistant", "content": next_move_question})
guesser_messages.append({"role": "user", "content": answer})
# Prepare for next round
guesser_messages.append({"role": "assistant", "content": next_move_question})
question = next_move_question
else:
print("❌ Guesser used all 20 questions without guessing correctly.")

View File

@@ -0,0 +1,731 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "d15d8294-3328-4e07-ad16-8a03e9bbfdb9",
"metadata": {},
"source": [
"# YOUR FIRST LAB\n",
"### Please read this section. This is valuable to get you prepared, even if it's a long read -- it's important stuff.\n",
"\n",
"## Your first Frontier LLM Project\n",
"\n",
"Let's build a useful LLM solution - in a matter of minutes.\n",
"\n",
"By the end of this course, you will have built an autonomous Agentic AI solution with 7 agents that collaborate to solve a business problem. All in good time! We will start with something smaller...\n",
"\n",
"Our goal is to code a new kind of Web Browser. Give it a URL, and it will respond with a summary. The Reader's Digest of the internet!!\n",
"\n",
"Before starting, you should have completed the setup for [PC](../SETUP-PC.md) or [Mac](../SETUP-mac.md) and you hopefully launched this jupyter lab from within the project root directory, with your environment activated.\n",
"\n",
"## If you're new to Jupyter Lab\n",
"\n",
"Welcome to the wonderful world of Data Science experimentation! Once you've used Jupyter Lab, you'll wonder how you ever lived without it. Simply click in each \"cell\" with code in it, such as the cell immediately below this text, and hit Shift+Return to execute that cell. As you wish, you can add a cell with the + button in the toolbar, and print values of variables, or try out variations. \n",
"\n",
"I've written a notebook called [Guide to Jupyter](Guide%20to%20Jupyter.ipynb) to help you get more familiar with Jupyter Labs, including adding Markdown comments, using `!` to run shell commands, and `tqdm` to show progress.\n",
"\n",
"## If you're new to the Command Line\n",
"\n",
"Please see these excellent guides: [Command line on PC](https://chatgpt.com/share/67b0acea-ba38-8012-9c34-7a2541052665) and [Command line on Mac](https://chatgpt.com/canvas/shared/67b0b10c93a081918210723867525d2b). \n",
"\n",
"## If you'd prefer to work in IDEs\n",
"\n",
"If you're more comfortable in IDEs like VSCode, Cursor or PyCharm, they both work great with these lab notebooks too. \n",
"If you'd prefer to work in VSCode, [here](https://chatgpt.com/share/676f2e19-c228-8012-9911-6ca42f8ed766) are instructions from an AI friend on how to configure it for the course.\n",
"\n",
"## If you'd like to brush up your Python\n",
"\n",
"I've added a notebook called [Intermediate Python](Intermediate%20Python.ipynb) to get you up to speed. But you should give it a miss if you already have a good idea what this code does: \n",
"`yield from {book.get(\"author\") for book in books if book.get(\"author\")}`\n",
"\n",
"## I am here to help\n",
"\n",
"If you have any problems at all, please do reach out. \n",
"I'm available through the platform, or at ed@edwarddonner.com, or at https://www.linkedin.com/in/eddonner/ if you'd like to connect (and I love connecting!) \n",
"And this is new to me, but I'm also trying out X/Twitter at [@edwarddonner](https://x.com/edwarddonner) - if you're on X, please show me how it's done 😂 \n",
"\n",
"## More troubleshooting\n",
"\n",
"Please see the [troubleshooting](troubleshooting.ipynb) notebook in this folder to diagnose and fix common problems. At the very end of it is a diagnostics script with some useful debug info.\n",
"\n",
"## For foundational technical knowledge (eg Git, APIs, debugging) \n",
"\n",
"If you're relatively new to programming -- I've got your back! While it's ideal to have some programming experience for this course, there's only one mandatory prerequisite: plenty of patience. 😁 I've put together a set of self-study guides that cover Git and GitHub, APIs and endpoints, beginner python and more.\n",
"\n",
"This covers Git and GitHub; what they are, the difference, and how to use them: \n",
"https://github.com/ed-donner/agents/blob/main/guides/03_git_and_github.ipynb\n",
"\n",
"This covers technical foundations: \n",
"ChatGPT vs API; taking screenshots; Environment Variables; Networking basics; APIs and endpoints: \n",
"https://github.com/ed-donner/agents/blob/main/guides/04_technical_foundations.ipynb\n",
"\n",
"This covers Python for beginners, and making sure that a `NameError` never trips you up: \n",
"https://github.com/ed-donner/agents/blob/main/guides/06_python_foundations.ipynb\n",
"\n",
"This covers the essential techniques for figuring out errors: \n",
"https://github.com/ed-donner/agents/blob/main/guides/08_debugging.ipynb\n",
"\n",
"And you'll find other useful guides in the same folder in GitHub. Some information applies to my other Udemy course (eg Async Python) but most of it is very relevant for LLM engineering.\n",
"\n",
"## If this is old hat!\n",
"\n",
"If you're already comfortable with today's material, please hang in there; you can move swiftly through the first few labs - we will get much more in depth as the weeks progress. Ultimately we will fine-tune our own LLM to compete with OpenAI!\n",
"\n",
"<table style=\"margin: 0; text-align: left;\">\n",
" <tr>\n",
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
" <img src=\"../important.jpg\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
" </td>\n",
" <td>\n",
" <h2 style=\"color:#900;\">Please read - important note</h2>\n",
" <span style=\"color:#900;\">The way I collaborate with you may be different to other courses you've taken. I prefer not to type code while you watch. Rather, I execute Jupyter Labs, like this, and give you an intuition for what's going on. My suggestion is that you carefully execute this yourself, <b>after</b> watching the lecture. Add print statements to understand what's going on, and then come up with your own variations. If you have a Github account, use this to showcase your variations. Not only is this essential practice, but it demonstrates your skills to others, including perhaps future clients or employers...</span>\n",
" </td>\n",
" </tr>\n",
"</table>\n",
"<table style=\"margin: 0; text-align: left;\">\n",
" <tr>\n",
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
" <img src=\"../resources.jpg\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
" </td>\n",
" <td>\n",
" <h2 style=\"color:#f71;\">This code is a live resource - keep an eye out for my emails</h2>\n",
" <span style=\"color:#f71;\">I push updates to the code regularly. As people ask questions, I add more examples or improved commentary. As a result, you'll notice that the code below isn't identical to the videos. Everything from the videos is here; but I've also added better explanations and new models like DeepSeek. Consider this like an interactive book.<br/><br/>\n",
" I try to send emails regularly with important updates related to the course. You can find this in the 'Announcements' section of Udemy in the left sidebar. You can also choose to receive my emails via your Notification Settings in Udemy. I'm respectful of your inbox and always try to add value with my emails!\n",
" </span>\n",
" </td>\n",
" </tr>\n",
"</table>\n",
"<table style=\"margin: 0; text-align: left;\">\n",
" <tr>\n",
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
" <img src=\"../business.jpg\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
" </td>\n",
" <td>\n",
" <h2 style=\"color:#181;\">Business value of these exercises</h2>\n",
" <span style=\"color:#181;\">A final thought. While I've designed these notebooks to be educational, I've also tried to make them enjoyable. We'll do fun things like have LLMs tell jokes and argue with each other. But fundamentally, my goal is to teach skills you can apply in business. I'll explain business implications as we go, and it's worth keeping this in mind: as you build experience with models and techniques, think of ways you could put this into action at work today. Please do contact me if you'd like to discuss more or if you have ideas to bounce off me.</span>\n",
" </td>\n",
" </tr>\n",
"</table>"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4e2a9393-7767-488e-a8bf-27c12dca35bd",
"metadata": {},
"outputs": [],
"source": [
"# imports\n",
"\n",
"import os\n",
"import requests\n",
"from dotenv import load_dotenv\n",
"from bs4 import BeautifulSoup\n",
"from IPython.display import Markdown, display\n",
"from openai import OpenAI\n",
"\n",
"# If you get an error running this cell, then please head over to the troubleshooting notebook!"
]
},
{
"cell_type": "markdown",
"id": "6900b2a8-6384-4316-8aaa-5e519fca4254",
"metadata": {},
"source": [
"# Connecting to OpenAI (or Ollama)\n",
"\n",
"The next cell is where we load in the environment variables in your `.env` file and connect to OpenAI. \n",
"\n",
"If you'd like to use free Ollama instead, please see the README section \"Free Alternative to Paid APIs\", and if you're not sure how to do this, there's a full solution in the solutions folder (day1_with_ollama.ipynb).\n",
"\n",
"## Troubleshooting if you have problems:\n",
"\n",
"Head over to the [troubleshooting](troubleshooting.ipynb) notebook in this folder for step by step code to identify the root cause and fix it!\n",
"\n",
"If you make a change, try restarting the \"Kernel\" (the python process sitting behind this notebook) by Kernel menu >> Restart Kernel and Clear Outputs of All Cells. Then try this notebook again, starting at the top.\n",
"\n",
"Or, contact me! Message me or email ed@edwarddonner.com and we will get this to work.\n",
"\n",
"Any concerns about API costs? See my notes in the README - costs should be minimal, and you can control it at every point. You can also use Ollama as a free alternative, which we discuss during Day 2."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7b87cadb-d513-4303-baee-a37b6f938e4d",
"metadata": {},
"outputs": [],
"source": [
"# Load environment variables in a file called .env\n",
"\n",
"load_dotenv(override=True)\n",
"api_key = os.getenv('OPENAI_API_KEY')\n",
"\n",
"# Check the key\n",
"\n",
"if not api_key:\n",
" print(\"No API key was found - please head over to the troubleshooting notebook in this folder to identify & fix!\")\n",
"elif not api_key.startswith(\"sk-proj-\"):\n",
" print(\"An API key was found, but it doesn't start sk-proj-; please check you're using the right key - see troubleshooting notebook\")\n",
"elif api_key.strip() != api_key:\n",
" print(\"An API key was found, but it looks like it might have space or tab characters at the start or end - please remove them - see troubleshooting notebook\")\n",
"else:\n",
" print(\"API key found and looks good so far!\")\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "019974d9-f3ad-4a8a-b5f9-0a3719aea2d3",
"metadata": {},
"outputs": [],
"source": [
"openai = OpenAI()\n",
"\n",
"# If this doesn't work, try Kernel menu >> Restart Kernel and Clear Outputs Of All Cells, then run the cells from the top of this notebook down.\n",
"# If it STILL doesn't work (horrors!) then please see the Troubleshooting notebook in this folder for full instructions"
]
},
{
"cell_type": "markdown",
"id": "442fc84b-0815-4f40-99ab-d9a5da6bda91",
"metadata": {},
"source": [
"# Let's make a quick call to a Frontier model to get started, as a preview!"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a58394bf-1e45-46af-9bfd-01e24da6f49a",
"metadata": {},
"outputs": [],
"source": [
"# To give you a preview -- calling OpenAI with these messages is this easy. Any problems, head over to the Troubleshooting notebook.\n",
"\n",
"message = \"Hello, GPT! This is my first ever message to you! Hi!\"\n",
"response = openai.chat.completions.create(model=\"gpt-4o-mini\", messages=[{\"role\":\"user\", \"content\":message}])\n",
"print(response.choices[0].message.content)"
]
},
{
"cell_type": "markdown",
"id": "2aa190e5-cb31-456a-96cc-db109919cd78",
"metadata": {},
"source": [
"## OK onwards with our first project"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c5e793b2-6775-426a-a139-4848291d0463",
"metadata": {},
"outputs": [],
"source": [
"# A class to represent a Webpage\n",
"# If you're not familiar with Classes, check out the \"Intermediate Python\" notebook\n",
"\n",
"# Some websites need you to use proper headers when fetching them:\n",
"headers = {\n",
" \"User-Agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36\"\n",
"}\n",
"\n",
"class Website:\n",
"\n",
" def __init__(self, url):\n",
" \"\"\"\n",
" Create this Website object from the given url using the BeautifulSoup library\n",
" \"\"\"\n",
" self.url = url\n",
" response = requests.get(url, headers=headers)\n",
" soup = BeautifulSoup(response.content, 'html.parser')\n",
" self.title = soup.title.string if soup.title else \"No title found\"\n",
" for irrelevant in soup.body([\"script\", \"style\", \"img\", \"input\"]):\n",
" irrelevant.decompose()\n",
" self.text = soup.body.get_text(separator=\"\\n\", strip=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "2ef960cf-6dc2-4cda-afb3-b38be12f4c97",
"metadata": {},
"outputs": [],
"source": [
"# Let's try one out. Change the website and add print statements to follow along.\n",
"\n",
"ed = Website(\"https://edwarddonner.com\")\n",
"print(ed.title)\n",
"print(ed.text)"
]
},
{
"cell_type": "markdown",
"id": "6a478a0c-2c53-48ff-869c-4d08199931e1",
"metadata": {},
"source": [
"## Types of prompts\n",
"\n",
"You may know this already - but if not, you will get very familiar with it!\n",
"\n",
"Models like GPT4o have been trained to receive instructions in a particular way.\n",
"\n",
"They expect to receive:\n",
"\n",
"**A system prompt** that tells them what task they are performing and what tone they should use\n",
"\n",
"**A user prompt** -- the conversation starter that they should reply to"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "abdb8417-c5dc-44bc-9bee-2e059d162699",
"metadata": {},
"outputs": [],
"source": [
"# Define our system prompt - you can experiment with this later, changing the last sentence to 'Respond in markdown in Spanish.\"\n",
"\n",
"system_prompt = \"You are an assistant that analyzes the contents of a website \\\n",
"and provides a short summary, ignoring text that might be navigation related. \\\n",
"Respond in markdown.\""
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f0275b1b-7cfe-4f9d-abfa-7650d378da0c",
"metadata": {},
"outputs": [],
"source": [
"# A function that writes a User Prompt that asks for summaries of websites:\n",
"\n",
"def user_prompt_for(website):\n",
" user_prompt = f\"You are looking at a website titled {website.title}\"\n",
" user_prompt += \"\\nThe contents of this website is as follows; \\\n",
"please provide a short summary of this website in markdown. \\\n",
"If it includes news or announcements, then summarize these too.\\n\\n\"\n",
" user_prompt += website.text\n",
" return user_prompt"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "26448ec4-5c00-4204-baec-7df91d11ff2e",
"metadata": {},
"outputs": [],
"source": [
"print(user_prompt_for(ed))"
]
},
{
"cell_type": "markdown",
"id": "ea211b5f-28e1-4a86-8e52-c0b7677cadcc",
"metadata": {},
"source": [
"## Messages\n",
"\n",
"The API from OpenAI expects to receive messages in a particular structure.\n",
"Many of the other APIs share this structure:\n",
"\n",
"```python\n",
"[\n",
" {\"role\": \"system\", \"content\": \"system message goes here\"},\n",
" {\"role\": \"user\", \"content\": \"user message goes here\"}\n",
"]\n",
"```\n",
"To give you a preview, the next 2 cells make a rather simple call - we won't stretch the mighty GPT (yet!)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f25dcd35-0cd0-4235-9f64-ac37ed9eaaa5",
"metadata": {},
"outputs": [],
"source": [
"messages = [\n",
" {\"role\": \"system\", \"content\": \"You are a snarky assistant\"},\n",
" {\"role\": \"user\", \"content\": \"What is 2 + 2?\"}\n",
"]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "21ed95c5-7001-47de-a36d-1d6673b403ce",
"metadata": {},
"outputs": [],
"source": [
"# To give you a preview -- calling OpenAI with system and user messages:\n",
"\n",
"response = openai.chat.completions.create(model=\"gpt-4o-mini\", messages=messages)\n",
"print(response.choices[0].message.content)"
]
},
{
"cell_type": "markdown",
"id": "d06e8d78-ce4c-4b05-aa8e-17050c82bb47",
"metadata": {},
"source": [
"## And now let's build useful messages for GPT-4o-mini, using a function"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0134dfa4-8299-48b5-b444-f2a8c3403c88",
"metadata": {},
"outputs": [],
"source": [
"# See how this function creates exactly the format above\n",
"\n",
"def messages_for(website):\n",
" return [\n",
" {\"role\": \"system\", \"content\": system_prompt},\n",
" {\"role\": \"user\", \"content\": user_prompt_for(website)}\n",
" ]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "36478464-39ee-485c-9f3f-6a4e458dbc9c",
"metadata": {},
"outputs": [],
"source": [
"# Try this out, and then try for a few more websites\n",
"\n",
"messages_for(ed)"
]
},
{
"cell_type": "markdown",
"id": "16f49d46-bf55-4c3e-928f-68fc0bf715b0",
"metadata": {},
"source": [
"## Time to bring it together - the API for OpenAI is very simple!"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "905b9919-aba7-45b5-ae65-81b3d1d78e34",
"metadata": {},
"outputs": [],
"source": [
"# And now: call the OpenAI API. You will get very familiar with this!\n",
"\n",
"def summarize(url):\n",
" website = Website(url)\n",
" response = openai.chat.completions.create(\n",
" model = \"gpt-4o-mini\",\n",
" messages = messages_for(website)\n",
" )\n",
" return response.choices[0].message.content"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "05e38d41-dfa4-4b20-9c96-c46ea75d9fb5",
"metadata": {},
"outputs": [],
"source": [
"summarize(\"https://edwarddonner.com\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3d926d59-450e-4609-92ba-2d6f244f1342",
"metadata": {},
"outputs": [],
"source": [
"# A function to display this nicely in the Jupyter output, using markdown\n",
"\n",
"def display_summary(url):\n",
" summary = summarize(url)\n",
" display(Markdown(summary))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3018853a-445f-41ff-9560-d925d1774b2f",
"metadata": {},
"outputs": [],
"source": [
"display_summary(\"https://edwarddonner.com\")"
]
},
{
"cell_type": "markdown",
"id": "b3bcf6f4-adce-45e9-97ad-d9a5d7a3a624",
"metadata": {},
"source": [
"# Let's try more websites\n",
"\n",
"Note that this will only work on websites that can be scraped using this simplistic approach.\n",
"\n",
"Websites that are rendered with Javascript, like React apps, won't show up. See the community-contributions folder for a Selenium implementation that gets around this. You'll need to read up on installing Selenium (ask ChatGPT!)\n",
"\n",
"Also Websites protected with CloudFront (and similar) may give 403 errors - many thanks Andy J for pointing this out.\n",
"\n",
"But many websites will work just fine!"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "45d83403-a24c-44b5-84ac-961449b4008f",
"metadata": {},
"outputs": [],
"source": [
"display_summary(\"https://cnn.com\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "75e9fd40-b354-4341-991e-863ef2e59db7",
"metadata": {},
"outputs": [],
"source": [
"display_summary(\"https://anthropic.com\")"
]
},
{
"cell_type": "markdown",
"id": "c951be1a-7f1b-448f-af1f-845978e47e2c",
"metadata": {},
"source": [
"<table style=\"margin: 0; text-align: left;\">\n",
" <tr>\n",
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
" <img src=\"../business.jpg\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
" </td>\n",
" <td>\n",
" <h2 style=\"color:#181;\">Business applications</h2>\n",
" <span style=\"color:#181;\">In this exercise, you experienced calling the Cloud API of a Frontier Model (a leading model at the frontier of AI) for the first time. We will be using APIs like OpenAI at many stages in the course, in addition to building our own LLMs.\n",
"\n",
"More specifically, we've applied this to Summarization - a classic Gen AI use case to make a summary. This can be applied to any business vertical - summarizing the news, summarizing financial performance, summarizing a resume in a cover letter - the applications are limitless. Consider how you could apply Summarization in your business, and try prototyping a solution.</span>\n",
" </td>\n",
" </tr>\n",
"</table>\n",
"\n",
"<table style=\"margin: 0; text-align: left;\">\n",
" <tr>\n",
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
" <img src=\"../important.jpg\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
" </td>\n",
" <td>\n",
" <h2 style=\"color:#900;\">Before you continue - now try yourself</h2>\n",
" <span style=\"color:#900;\">Use the cell below to make your own simple commercial example. Stick with the summarization use case for now. Here's an idea: write something that will take the contents of an email, and will suggest an appropriate short subject line for the email. That's the kind of feature that might be built into a commercial email tool.</span>\n",
" </td>\n",
" </tr>\n",
"</table>"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "00743dac-0e70-45b7-879a-d7293a6f68a6",
"metadata": {},
"outputs": [],
"source": [
"# Step 1: Create your prompts\n",
"\n",
"system_prompt = \"something here\"\n",
"user_prompt = \"\"\"\n",
" Lots of text\n",
" Can be pasted here\n",
"\"\"\"\n",
"\n",
"# Step 2: Make the messages list\n",
"\n",
"messages = [] # fill this in\n",
"\n",
"# Step 3: Call OpenAI\n",
"\n",
"response =\n",
"\n",
"# Step 4: print the result\n",
"\n"
]
},
{
"cell_type": "markdown",
"id": "36ed9f14-b349-40e9-a42c-b367e77f8bda",
"metadata": {},
"source": [
"## An extra exercise for those who enjoy web scraping\n",
"\n",
"You may notice that if you try `display_summary(\"https://openai.com\")` - it doesn't work! That's because OpenAI has a fancy website that uses Javascript. There are many ways around this that some of you might be familiar with. For example, Selenium is a hugely popular framework that runs a browser behind the scenes, renders the page, and allows you to query it. If you have experience with Selenium, Playwright or similar, then feel free to improve the Website class to use them. In the community-contributions folder, you'll find an example Selenium solution from a student (thank you!)"
]
},
{
"cell_type": "markdown",
"id": "eeab24dc-5f90-4570-b542-b0585aca3eb6",
"metadata": {},
"source": [
"# Sharing your code\n",
"\n",
"I'd love it if you share your code afterwards so I can share it with others! You'll notice that some students have already made changes (including a Selenium implementation) which you will find in the community-contributions folder. If you'd like add your changes to that folder, submit a Pull Request with your new versions in that folder and I'll merge your changes.\n",
"\n",
"If you're not an expert with git (and I am not!) then GPT has given some nice instructions on how to submit a Pull Request. It's a bit of an involved process, but once you've done it once it's pretty clear. As a pro-tip: it's best if you clear the outputs of your Jupyter notebooks (Edit >> Clean outputs of all cells, and then Save) for clean notebooks.\n",
"\n",
"Here are good instructions courtesy of an AI friend: \n",
"https://chatgpt.com/share/677a9cb5-c64c-8012-99e0-e06e88afd293"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f4484fcf-8b39-4c3f-9674-37970ed71988",
"metadata": {},
"outputs": [],
"source": [
"from playwright.sync_api import sync_playwright\n",
"import time \n",
"import asyncio\n",
"from playwright.async_api import async_playwright\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "fd3fdc92",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "44099289",
"metadata": {},
"outputs": [],
"source": [
"class Website:\n",
" def __init__(self, url):\n",
" self.url = url\n",
" self.title = None\n",
" self.text = None\n",
"\n",
" @classmethod\n",
" async def create(cls, url):\n",
" website = cls(url)\n",
" retries = 3 # Add retry logic\n",
" for attempt in range(retries):\n",
" try:\n",
" await website.initialize()\n",
" return website\n",
" except TimeoutError as e:\n",
" if attempt == retries - 1: # Last attempt\n",
" raise\n",
" print(f\"Attempt {attempt + 1} failed, retrying...\")\n",
" await asyncio.sleep(2) # Wait between retries\n",
"\n",
" async def initialize(self):\n",
" async with async_playwright() as p:\n",
" # Launch with stealth mode settings\n",
" browser = await p.chromium.launch(\n",
" headless=True,\n",
" args=[\n",
" '--disable-blink-features=AutomationControlled',\n",
" '--disable-dev-shm-usage',\n",
" '--no-sandbox'\n",
" ]\n",
" )\n",
" \n",
" # Create context with stealth settings\n",
" context = await browser.new_context(\n",
" user_agent='Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36',\n",
" viewport={'width': 1920, 'height': 1080},\n",
" java_script_enabled=True,\n",
" bypass_csp=True, # Bypass Content Security Policy\n",
" extra_http_headers={\n",
" 'Accept-Language': 'en-US,en;q=0.9',\n",
" 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8'\n",
" }\n",
" )\n",
" \n",
" page = await context.new_page()\n",
" \n",
" try:\n",
" # Navigate with progressive waits\n",
" await page.goto(self.url, timeout=90000) # 90 second timeout\n",
" \n",
" # Wait for either real content or Cloudflare challenge\n",
" try:\n",
" # Wait for actual content first\n",
" await page.wait_for_selector('main', timeout=10000)\n",
" except:\n",
" # If main content not found, wait for Cloudflare to clear\n",
" await page.wait_for_load_state('networkidle', timeout=30000)\n",
" await page.wait_for_selector('body', state='visible', timeout=30000)\n",
" \n",
" # Get content after all waits\n",
" self.title = await page.title()\n",
" content = await page.content()\n",
" \n",
" soup = BeautifulSoup(content, 'html.parser')\n",
" for irrelevant in soup.find_all([\"script\", \"style\", \"img\", \"input\"]):\n",
" irrelevant.decompose()\n",
" self.text = soup.body.get_text(separator=\"\\n\", strip=True) if soup.body else \"\"\n",
" \n",
" finally:\n",
" await browser.close()\n",
"\n",
"# Modified summarize function to use the factory method\n",
"async def summarize(url):\n",
" website = await Website.create(url)\n",
" response = openai.chat.completions.create(\n",
" model=\"gpt-4o-mini\",\n",
" messages=messages_for(website)\n",
" )\n",
" return response.choices[0].message.content\n",
"\n",
"# Display function remains the same\n",
"async def display_summary(url):\n",
" summary = await summarize(url)\n",
" display(Markdown(summary))\n",
"\n",
"# Usage\n",
"await display_summary(\"https://openai.com\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "78e0d270",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "llms",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.13"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@@ -0,0 +1,148 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "fe12c203-e6a6-452c-a655-afb8a03a4ff5",
"metadata": {},
"source": [
"# End of week 1 exercise\n",
"\n",
"To demonstrate your familiarity with OpenAI API, and also Ollama, build a tool that takes a technical question, \n",
"and responds with an explanation. This is a tool that you will be able to use yourself during the course!"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c1070317-3ed9-4659-abe3-828943230e03",
"metadata": {},
"outputs": [],
"source": [
"# imports\n",
"import os\n",
"from openai import OpenAI\n",
"from IPython.display import Markdown, display, update_display\n",
"from dotenv import load_dotenv"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4a456906-915a-4bfd-bb9d-57e505c5093f",
"metadata": {},
"outputs": [],
"source": [
"# constants\n",
"\n",
"MODEL_GPT = 'gpt-4o-mini'\n",
"MODEL_LLAMA = 'llama3.2'"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a8d7923c-5f28-4c30-8556-342d7c8497c1",
"metadata": {},
"outputs": [],
"source": [
"# set up environment\n",
"load_dotenv(override=True)\n",
"api_key=os.getenv(\"OPENAI_API_KEY\")\n",
"if not api_key.startswith(\"sk-proj-\") and len(api_key)<10:\n",
" print(\"api key not foud\")\n",
"else:\n",
" print(\"api found and is ok\")\n",
"\n",
"openai=OpenAI()\n",
"print()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3f0d0137-52b0-47a8-81a8-11a90a010798",
"metadata": {},
"outputs": [],
"source": [
"# here is the question; type over this to ask something new\n",
"\n",
"question = \"\"\"\n",
"Please explain what this code does and why:\n",
"yield from {book.get(\"author\") for book in books if book.get(\"author\")}\n",
"\"\"\""
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "60ce7000-a4a5-4cce-a261-e75ef45063b4",
"metadata": {},
"outputs": [],
"source": [
"# Get gpt-4o-mini to answer, with streaming\n",
"messages = [{\"role\":\"system\",\"content\":\"You are a expert Dta Scientist\"}, {\"role\":\"user\",\"content\":question}]\n",
"\n",
"stream = openai.chat.completions.create(\n",
" model = MODEL_GPT,\n",
" messages = messages,\n",
" stream = True\n",
")\n",
"response = \"\"\n",
"display_handle = display(Markdown(\"\"), display_id=True)\n",
"for chunk in stream:\n",
" response += chunk.choices[0].delta.content or ''\n",
" response = response.replace(\"```\",\"\").replace(\"markdown\", \"\")\n",
" update_display(Markdown(response), display_id=display_handle.display_id)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "8f7c8ea8-4082-4ad0-8751-3301adcf6538",
"metadata": {},
"outputs": [],
"source": [
"# Get Llama 3.2 to answer\n",
"import ollama\n",
"\n",
"stream = ollama.chat(model=MODEL_LLAMA, messages=messages, stream=True)\n",
"response = \"\"\n",
"display_handle = display(Markdown(\"\"), display_id=True)\n",
"for chunk in stream:\n",
" response += chunk[\"message\"][\"content\"] or ''\n",
" response = response.replace(\"```\",\"\").replace(\"markdown\", \"\")\n",
" update_display(Markdown(response), display_id=display_handle.display_id)\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "2a573174-779b-4d50-8792-fa0889b37211",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "llmenv",
"language": "python",
"name": "llmenv"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.13"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@@ -0,0 +1,426 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "d15d8294-3328-4e07-ad16-8a03e9bbfdb9",
"metadata": {},
"source": [
"# Welcome to your first assignment!\n",
"\n",
"Instructions are below. Please give this a try, and look in the solutions folder if you get stuck (or feel free to ask me!)"
]
},
{
"cell_type": "markdown",
"id": "ada885d9-4d42-4d9b-97f0-74fbbbfe93a9",
"metadata": {},
"source": [
"<table style=\"margin: 0; text-align: left;\">\n",
" <tr>\n",
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
" <img src=\"../resources.jpg\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
" </td>\n",
" <td>\n",
" <h2 style=\"color:#f71;\">Just before we get to the assignment --</h2>\n",
" <span style=\"color:#f71;\">I thought I'd take a second to point you at this page of useful resources for the course. This includes links to all the slides.<br/>\n",
" <a href=\"https://edwarddonner.com/2024/11/13/llm-engineering-resources/\">https://edwarddonner.com/2024/11/13/llm-engineering-resources/</a><br/>\n",
" Please keep this bookmarked, and I'll continue to add more useful links there over time.\n",
" </span>\n",
" </td>\n",
" </tr>\n",
"</table>"
]
},
{
"cell_type": "markdown",
"id": "6e9fa1fc-eac5-4d1d-9be4-541b3f2b3458",
"metadata": {},
"source": [
"# HOMEWORK EXERCISE ASSIGNMENT\n",
"\n",
"Upgrade the day 1 project to summarize a webpage to use an Open Source model running locally via Ollama rather than OpenAI\n",
"\n",
"You'll be able to use this technique for all subsequent projects if you'd prefer not to use paid APIs.\n",
"\n",
"**Benefits:**\n",
"1. No API charges - open-source\n",
"2. Data doesn't leave your box\n",
"\n",
"**Disadvantages:**\n",
"1. Significantly less power than Frontier Model\n",
"\n",
"## Recap on installation of Ollama\n",
"\n",
"Simply visit [ollama.com](https://ollama.com) and install!\n",
"\n",
"Once complete, the ollama server should already be running locally. \n",
"If you visit: \n",
"[http://localhost:11434/](http://localhost:11434/)\n",
"\n",
"You should see the message `Ollama is running`. \n",
"\n",
"If not, bring up a new Terminal (Mac) or Powershell (Windows) and enter `ollama serve` \n",
"And in another Terminal (Mac) or Powershell (Windows), enter `ollama pull llama3.2` \n",
"Then try [http://localhost:11434/](http://localhost:11434/) again.\n",
"\n",
"If Ollama is slow on your machine, try using `llama3.2:1b` as an alternative. Run `ollama pull llama3.2:1b` from a Terminal or Powershell, and change the code below from `MODEL = \"llama3.2\"` to `MODEL = \"llama3.2:1b\"`"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4e2a9393-7767-488e-a8bf-27c12dca35bd",
"metadata": {},
"outputs": [],
"source": [
"# imports\n",
"\n",
"import requests\n",
"from bs4 import BeautifulSoup\n",
"from IPython.display import Markdown, display"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "29ddd15d-a3c5-4f4e-a678-873f56162724",
"metadata": {},
"outputs": [],
"source": [
"# Constants\n",
"\n",
"OLLAMA_API = \"http://localhost:11434/api/chat\"\n",
"HEADERS = {\"Content-Type\": \"application/json\"}\n",
"MODEL = \"llama3.2\""
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "dac0a679-599c-441f-9bf2-ddc73d35b940",
"metadata": {},
"outputs": [],
"source": [
"# Create a messages list using the same format that we used for OpenAI\n",
"\n",
"messages = [\n",
" {\"role\": \"user\", \"content\": \"Describe some of the business applications of Generative AI\"}\n",
"]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7bb9c624-14f0-4945-a719-8ddb64f66f47",
"metadata": {},
"outputs": [],
"source": [
"payload = {\n",
" \"model\": MODEL,\n",
" \"messages\": messages,\n",
" \"stream\": False\n",
" }"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "479ff514-e8bd-4985-a572-2ea28bb4fa40",
"metadata": {},
"outputs": [],
"source": [
"# Let's just make sure the model is loaded\n",
"\n",
"!ollama pull llama3.2"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "42b9f644-522d-4e05-a691-56e7658c0ea9",
"metadata": {},
"outputs": [],
"source": [
"# If this doesn't work for any reason, try the 2 versions in the following cells\n",
"# And double check the instructions in the 'Recap on installation of Ollama' at the top of this lab\n",
"# And if none of that works - contact me!\n",
"\n",
"response = requests.post(OLLAMA_API, json=payload, headers=HEADERS)\n",
"print(response.json()['message']['content'])"
]
},
{
"cell_type": "markdown",
"id": "6a021f13-d6a1-4b96-8e18-4eae49d876fe",
"metadata": {},
"source": [
"# Introducing the ollama package\n",
"\n",
"And now we'll do the same thing, but using the elegant ollama python package instead of a direct HTTP call.\n",
"\n",
"Under the hood, it's making the same call as above to the ollama server running at localhost:11434"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7745b9c4-57dc-4867-9180-61fa5db55eb8",
"metadata": {},
"outputs": [],
"source": [
"import ollama\n",
"\n",
"response = ollama.chat(model=MODEL, messages=messages)\n",
"print(response['message']['content'])"
]
},
{
"cell_type": "markdown",
"id": "a4704e10-f5fb-4c15-a935-f046c06fb13d",
"metadata": {},
"source": [
"## Alternative approach - using OpenAI python library to connect to Ollama"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "23057e00-b6fc-4678-93a9-6b31cb704bff",
"metadata": {},
"outputs": [],
"source": [
"# There's actually an alternative approach that some people might prefer\n",
"# You can use the OpenAI client python library to call Ollama:\n",
"\n",
"from openai import OpenAI\n",
"ollama_via_openai = OpenAI(base_url='http://localhost:11434/v1', api_key='ollama')\n",
"\n",
"response = ollama_via_openai.chat.completions.create(\n",
" model=MODEL,\n",
" messages=messages\n",
")\n",
"\n",
"print(response.choices[0].message.content)"
]
},
{
"cell_type": "markdown",
"id": "9f9e22da-b891-41f6-9ac9-bd0c0a5f4f44",
"metadata": {},
"source": [
"## Are you confused about why that works?\n",
"\n",
"It seems strange, right? We just used OpenAI code to call Ollama?? What's going on?!\n",
"\n",
"Here's the scoop:\n",
"\n",
"The python class `OpenAI` is simply code written by OpenAI engineers that makes calls over the internet to an endpoint. \n",
"\n",
"When you call `openai.chat.completions.create()`, this python code just makes a web request to the following url: \"https://api.openai.com/v1/chat/completions\"\n",
"\n",
"Code like this is known as a \"client library\" - it's just wrapper code that runs on your machine to make web requests. The actual power of GPT is running on OpenAI's cloud behind this API, not on your computer!\n",
"\n",
"OpenAI was so popular, that lots of other AI providers provided identical web endpoints, so you could use the same approach.\n",
"\n",
"So Ollama has an endpoint running on your local box at http://localhost:11434/v1/chat/completions \n",
"And in week 2 we'll discover that lots of other providers do this too, including Gemini and DeepSeek.\n",
"\n",
"And then the team at OpenAI had a great idea: they can extend their client library so you can specify a different 'base url', and use their library to call any compatible API.\n",
"\n",
"That's it!\n",
"\n",
"So when you say: `ollama_via_openai = OpenAI(base_url='http://localhost:11434/v1', api_key='ollama')` \n",
"Then this will make the same endpoint calls, but to Ollama instead of OpenAI."
]
},
{
"cell_type": "markdown",
"id": "bc7d1de3-e2ac-46ff-a302-3b4ba38c4c90",
"metadata": {},
"source": [
"## Also trying the amazing reasoning model DeepSeek\n",
"\n",
"Here we use the version of DeepSeek-reasoner that's been distilled to 1.5B. \n",
"This is actually a 1.5B variant of Qwen that has been fine-tuned using synethic data generated by Deepseek R1.\n",
"\n",
"Other sizes of DeepSeek are [here](https://ollama.com/library/deepseek-r1) all the way up to the full 671B parameter version, which would use up 404GB of your drive and is far too large for most!"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "cf9eb44e-fe5b-47aa-b719-0bb63669ab3d",
"metadata": {},
"outputs": [],
"source": [
"!ollama pull deepseek-r1:1.5b"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1d3d554b-e00d-4c08-9300-45e073950a76",
"metadata": {},
"outputs": [],
"source": [
"# This may take a few minutes to run! You should then see a fascinating \"thinking\" trace inside <think> tags, followed by some decent definitions\n",
"\n",
"response = ollama_via_openai.chat.completions.create(\n",
" model=\"deepseek-r1:1.5b\",\n",
" messages=[{\"role\": \"user\", \"content\": \"Please give definitions of some core concepts behind LLMs: a neural network, attention and the transformer\"}]\n",
")\n",
"\n",
"print(response.choices[0].message.content)"
]
},
{
"cell_type": "markdown",
"id": "1622d9bb-5c68-4d4e-9ca4-b492c751f898",
"metadata": {},
"source": [
"# NOW the exercise for you\n",
"\n",
"Take the code from day1 and incorporate it here, to build a website summarizer that uses Llama 3.2 running locally instead of OpenAI; use either of the above approaches."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "43ef4b92-53e1-4af2-af3f-726812f4265c",
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"import requests\n",
"#from dotenv import load_dotenv\n",
"from bs4 import BeautifulSoup\n",
"from IPython.display import Markdown, display\n",
"#from openai import OpenAI"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "97d45733-394e-493e-a92b-1475876d9028",
"metadata": {},
"outputs": [],
"source": [
"headers = {\n",
" \"User-Agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36\"\n",
"}\n",
"\n",
"class Website:\n",
"\n",
" def __init__(self, url):\n",
" \"\"\"\n",
" Create this Website object from the given url using the BeautifulSoup library\n",
" \"\"\"\n",
" self.url = url\n",
" response = requests.get(url, headers=headers)\n",
" soup = BeautifulSoup(response.content, 'html.parser')\n",
" self.title = soup.title.string if soup.title else \"No title found\"\n",
" for irrelevant in soup.body([\"script\", \"style\", \"img\", \"input\"]):\n",
" irrelevant.decompose()\n",
" self.text = soup.body.get_text(separator=\"\\n\", strip=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6a40f9c5-1b14-42f9-9319-6a66e58e03f2",
"metadata": {},
"outputs": [],
"source": [
"webpage = Website(\"https://www.pleasurewebsite.com\")\n",
"print(webpage.title)\n",
"print(webpage.text)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a72a005d-43de-4ae5-b427-99a8fcb6065c",
"metadata": {},
"outputs": [],
"source": [
"system_prompt = \"You are an assistant that analyzes the contents of a website \\\n",
"and provides a short summary, ignoring text that might be navigation related. \\\n",
"Respond in markdown.\""
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f0e4f95f-0ccf-4027-9457-5c973cd17702",
"metadata": {},
"outputs": [],
"source": [
"def user_prompt_for(website):\n",
" user_prompt = f\"You are looking at a website titled {website.title}\"\n",
" user_prompt += \"\\nThe contents of this website is as follows; \\\n",
"please provide a short summary of this website in markdown. \\\n",
"If it includes news or announcements, then summarize these too.\\n\\n\"\n",
" user_prompt += website.text\n",
" return user_prompt"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ceae6073-a085-49ce-ad44-39e46d8e6934",
"metadata": {},
"outputs": [],
"source": [
"def messages_for(website):\n",
" return [\n",
" {\"role\": \"system\", \"content\": system_prompt},\n",
" {\"role\": \"user\", \"content\": user_prompt_for(website)}\n",
" ]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9d53b26b-308c-470c-a0a9-9edb887aed6d",
"metadata": {},
"outputs": [],
"source": [
"messages=messages_for(webpage)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6de38216-6d1c-48c4-877b-86d403f4e0f8",
"metadata": {},
"outputs": [],
"source": [
"import ollama\n",
"MODEL = \"llama3.2\"\n",
"response = ollama.chat(model=MODEL, messages=messages)\n",
"print(response['message']['content'])"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "llmenv",
"language": "python",
"name": "llmenv"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.13"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@@ -0,0 +1,351 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "06cf3063-9f3e-4551-a0d5-f08d9cabb927",
"metadata": {},
"source": [
"# Triangular agent conversation\n",
"\n",
"## GPT (Hamlet), LLM (Falstaff), Gemini (Iago):"
]
},
{
"cell_type": "markdown",
"id": "3637910d-2c6f-4f19-b1fb-2f916d23f9ac",
"metadata": {},
"source": [
"### Created a 3-way, bringing Gemini into the coversation.\n",
"### Replacing one of the models with an open source model running with Ollama."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f8e0c1bd-a159-475b-9cdc-e219a7633355",
"metadata": {},
"outputs": [],
"source": [
"# imports\n",
"\n",
"import os\n",
"from dotenv import load_dotenv\n",
"from openai import OpenAI\n",
"from IPython.display import Markdown, display, update_display\n",
"import ollama"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a3ad57ad-46a8-460e-9cb3-67a890093536",
"metadata": {},
"outputs": [],
"source": [
"import google.generativeai"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4f531c14-5743-4a5b-83d9-cb5863ca2ddf",
"metadata": {},
"outputs": [],
"source": [
"# Load environment variables in a file called .env\n",
"# Print the key prefixes to help with any debugging\n",
"\n",
"load_dotenv(override=True)\n",
"openai_api_key = os.getenv('OPENAI_API_KEY')\n",
"google_api_key = os.getenv('GOOGLE_API_KEY')\n",
"\n",
"if openai_api_key:\n",
" print(f\"OpenAI API Key exists and begins {openai_api_key[:8]}\")\n",
"else:\n",
" print(\"OpenAI API Key not set\")\n",
"\n",
"if google_api_key:\n",
" print(f\"Google API Key exists and begins {google_api_key[:8]}\")\n",
"else:\n",
" print(\"Google API Key not set\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3d5150ee-3858-4921-bce6-2eecfb96bc75",
"metadata": {},
"outputs": [],
"source": [
"# Connect to OpenAI\n",
"\n",
"openai = OpenAI()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "11381fd8-5099-41e8-a1d7-6787dea56e43",
"metadata": {},
"outputs": [],
"source": [
"google.generativeai.configure()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c1766d20-54b6-4f76-96c5-c338ae7073c9",
"metadata": {},
"outputs": [],
"source": [
"gpt_model = \"gpt-4o-mini\"\n",
"llama_model = \"llama3.2\"\n",
"gemini_model = 'gemini-2.0-flash'\n",
"\n",
"gpt_system = \"You are playing part of Hamlet. he is philosopher, probes Iago with a mixture of suspicion\\\n",
"and intellectual curiosity, seeking to unearth the origins of his deceit.\\\n",
"Is malice born of scorn, envy, or some deeper void? Hamlets introspective nature\\\n",
"drives him to question whether Iagos actions reveal a truth about humanity itself.\\\n",
"You will respond as Shakespear's Hamlet will do.\"\n",
"\n",
"llama_system = \"You are acting part of Falstaff who attempts to lighten the mood with his jokes and observations,\\\n",
"potentially clashing with Hamlet's melancholic nature.You respond as Shakespear's Falstaff do.\"\n",
"\n",
"gemini_system = \"You are acting part of Iago, subtly trying to manipulate both Hamlet and Falstaff\\\n",
"to his own advantage, testing their weaknesses and exploiting their flaws. You respond like Iago\"\n",
"\n",
"gpt_messages = [\"Hi there\"]\n",
"llama_messages = [\"Hi\"]\n",
"gemini_messages = [\"Hello\"]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "806a0506-dac8-4bad-ac08-31f350256b58",
"metadata": {},
"outputs": [],
"source": [
"def call_gpt():\n",
" messages = [{\"role\": \"system\", \"content\": gpt_system}]\n",
" for gpt, claude, gemini in zip(gpt_messages, llama_messages, gemini_messages):\n",
" messages.append({\"role\": \"assistant\", \"content\": gpt})\n",
" messages.append({\"role\": \"user\", \"content\": claude})\n",
" messages.append({\"role\": \"user\", \"content\": gemini})\n",
" completion = openai.chat.completions.create(\n",
" model=gpt_model,\n",
" messages=messages\n",
" )\n",
" return completion.choices[0].message.content"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "43674885-ede7-48bf-bee4-467454f3e96a",
"metadata": {},
"outputs": [],
"source": [
"def call_llama():\n",
" messages = []\n",
" for gpt, llama, gemini in zip(gpt_messages, llama_messages, gemini_messages):\n",
" messages.append({\"role\": \"user\", \"content\": gpt})\n",
" messages.append({\"role\": \"assistant\", \"content\": llama})\n",
" messages.append({\"role\": \"user\", \"content\": gemini})\n",
" messages.append({\"role\": \"user\", \"content\": gpt_messages[-1]})\n",
" response = ollama.chat(model=llama_model, messages=messages)\n",
"\n",
" \n",
" return response['message']['content']"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "03d34769-b339-4c4b-8c60-69494c39d725",
"metadata": {},
"outputs": [],
"source": [
"#import google.generativeai as genai\n",
"\n",
"# Make sure you configure the API key first:\n",
"#genai.configure(api_key=\"YOUR_API_KEY\")\n",
"\n",
"def call_gemini():\n",
" gemini_messages = []\n",
" \n",
" # Format the history for Gemini\n",
" for gpt, llama, gemini_message in zip(gpt_messages, llama_messages, gemini_messages):\n",
" gemini_messages.append({\"role\": \"user\", \"parts\": [gpt]}) # Hamlet speaks\n",
" gemini_messages.append({\"role\": \"model\", \"parts\": [llama]}) # Falstaff responds\n",
" gemini_messages.append({\"role\": \"model\", \"parts\": [gemini_message]}) # Iago responds\n",
"\n",
" # Add latest user input if needed (optional)\n",
" gemini_messages.append({\"role\": \"user\", \"parts\": [llama_messages[-1]]})\n",
"\n",
" # Initialize the model with the correct system instruction\n",
" gemini = google.generativeai.GenerativeModel(\n",
" #model_name='gemini-1.5-flash', # Or 'gemini-pro'\n",
" model_name = gemini_model,\n",
" system_instruction=gemini_system\n",
" )\n",
"\n",
" response = gemini.generate_content(gemini_messages)\n",
" return response.text\n",
"#print(response.text)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "93fc8253-67cb-4ea4-aff7-097b2a222793",
"metadata": {},
"outputs": [],
"source": [
"gpt_messages = [\"Hi there\"]\n",
"llama_messages = [\"Hi\"]\n",
"gemini_messages = [\"Hello\"]\n",
"\n",
"print(f\"Hamlet:\\n{gpt_messages[0]}\\n\")\n",
"print(f\"Falstaff:\\n{llama_messages[0]}\\n\")\n",
"print(f\"Iago:\\n{gemini_messages[0]}\\n\")\n",
"\n",
"for i in range(3):\n",
" gpt_next = call_gpt()\n",
" print(f\"GPT:\\n{gpt_next}\\n\")\n",
" gpt_messages.append(gpt_next)\n",
" \n",
" llama_next = call_llama()\n",
" print(f\"Llama:\\n{llama_next}\\n\")\n",
" llama_messages.append(llama_next)\n",
"\n",
" gemini_next = call_gemini()\n",
" print(f\"Gemini:\\n{gemini_next}\\n\")\n",
" llama_messages.append(gemini_next)"
]
},
{
"cell_type": "markdown",
"id": "bca66ffc-9dc1-4384-880c-210889f5d0ac",
"metadata": {},
"source": [
"## Conversation between gpt-4.0-mini and llama3.2"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c23224f6-7008-44ed-a57f-718975f4e291",
"metadata": {},
"outputs": [],
"source": [
"# Let's make a conversation between GPT-4o-mini and Claude-3-haiku\n",
"# We're using cheap versions of models so the costs will be minimal\n",
"\n",
"gpt_model = \"gpt-4o-mini\"\n",
"llama_model = \"llama3.2\"\n",
"\n",
"gpt_system = \"You are a tapori from mumbai who is very optimistic; \\\n",
"you alway look at the brighter part of the situation and you always ready to take act to win way.\"\n",
"\n",
"llama_system = \"You are a Jaat from Haryana. You try to express with hindi poems \\\n",
"to agree with other person and or find common ground. If the other person is optimistic, \\\n",
"you respond in poetic way and keep chatting.\"\n",
"\n",
"gpt_messages = [\"Hi there\"]\n",
"llama_messages = [\"Hi\"]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "2d704bbb-f22b-400d-a695-efbd02b26548",
"metadata": {},
"outputs": [],
"source": [
"def call_gpt():\n",
" messages = [{\"role\": \"system\", \"content\": gpt_system}]\n",
" for gpt, llama in zip(gpt_messages, llama_messages):\n",
" messages.append({\"role\": \"assistant\", \"content\": gpt})\n",
" messages.append({\"role\": \"user\", \"content\": llama})\n",
" completion = openai.chat.completions.create(\n",
" model=gpt_model,\n",
" messages=messages\n",
" )\n",
" return completion.choices[0].message.content"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "385ccec8-de59-4e42-9616-3f5c9a05589c",
"metadata": {},
"outputs": [],
"source": [
"def call_llama():\n",
" messages = []\n",
" for gpt, llama_message in zip(gpt_messages, llama_messages):\n",
" messages.append({\"role\": \"user\", \"content\": gpt})\n",
" messages.append({\"role\": \"assistant\", \"content\": llama_message})\n",
" messages.append({\"role\": \"user\", \"content\": gpt_messages[-1]})\n",
" response = ollama.chat(model=llama_model, messages=messages)\n",
"\n",
" \n",
" return response['message']['content']"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "70b5481b-455e-4275-80d3-0afe0fabcb0f",
"metadata": {},
"outputs": [],
"source": [
"gpt_messages = [\"Hi there\"]\n",
"llama_messages = [\"Hi\"]\n",
"\n",
"print(f\"GPT:\\n{gpt_messages[0]}\\n\")\n",
"print(f\"Llama:\\n{llama_messages[0]}\\n\")\n",
"\n",
"for i in range(3):\n",
" gpt_next = call_gpt()\n",
" print(f\"GPT:\\n{gpt_next}\\n\")\n",
" gpt_messages.append(gpt_next)\n",
" \n",
" llama_next = call_llama()\n",
" print(f\"Llama:\\n{llama_next}\\n\")\n",
" llama_messages.append(llama_next)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7f8d734b-57e5-427d-bcb1-7956fc58a348",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "llmenv",
"language": "python",
"name": "llmenv"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.13"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@@ -0,0 +1,37 @@
# LLM-Powered Dataset Synthesizer: LLaMA 3 + Gradio Demo
This interactive demo showcases a synthetic dataset generation pipeline powered by Meta's LLaMA 3.1 8B-Instruct model, running in 4-bit quantized mode. Users can input natural language prompts describing the structure and logic of a desired dataset, and the model will generate tabular data accordingly.
## ✨ Description
Modern LLMs are capable of reasoning over structured data formats and generating realistic, constrained datasets. This demo leverages the LLaMA 3.1 instruct model, combined with prompt engineering, to generate high-quality synthetic tabular data from plain-language descriptions.
Key components:
- **LLaMA 3.1 8B-Instruct** via Hugging Face Transformers
- **4-bit quantized loading** with `bitsandbytes` for memory efficiency
- **Custom prompt framework** for schema + value constraints
- **Interactive interface** built with Gradio for user-friendly data generation
## 🚀 Functionality
With this tool, you can:
- Generate synthetic datasets by describing the column names, data types, value logic, and number of rows
- Apply constraints based on age, gender, matching conditions, and more (e.g., “females over 40; males under 40”)
- Preview the raw model output or extract structured JSON/tabular results
- Interactively explore and copy generated datasets from the Gradio UI
## 🛠️ Under the Hood
- The model prompt template includes both a **system message** and user instruction
- Output is parsed to extract valid JSON objects
- The generated data is displayed in the Gradio interface and downloadable as CSV
## 📦 Requirements
- Python (Colab recommended)
- `transformers`, `bitsandbytes`, `accelerate`, `gradio`, `torch`
- Hugging Face access token with permission to load LLaMA 3.1
---
Ready to generate smart synthetic datasets with just a sentence? Try it!

File diff suppressed because it is too large Load Diff

View File

@@ -44,3 +44,4 @@ dependencies:
- twilio
- pydub
- protobuf==3.20.2
- wandb

View File

@@ -36,3 +36,4 @@ speedtest-cli
sentence_transformers
feedparser
protobuf==3.20.2
wandb

View File

@@ -0,0 +1,162 @@
# AI Property Rental Assistant
A Python tool that scrapes UK property rental listings and uses OpenAI's GPT-4o-mini to provide personalized property recommendations based on your requirements.
## What It Does
- Scrapes property listings from OnTheMarket.com
- Uses AI to analyze properties against your specific needs
- Provides smart recommendations with reasons why properties match
- Works for any UK location (currently configured for Durham)
## Quick Start
### Prerequisites
- Python 3.7+
- OpenAI API key ([Get one here](https://platform.openai.com/api-keys))
### Installation
1. **Install required packages:**
```bash
pip install requests beautifulsoup4 openai python-dotenv ipython
```
2. **Set up your API key:**
Create a `.env` file in the same directory as your script:
```
OPENAI_API_KEY=your_openai_api_key_here
```
3. **Run the script:**
```bash
python your_script_name.py
```
## How to Use
### Basic Usage
The script is pre-configured to search for student housing in Durham. Just run it and you'll get AI-powered recommendations!
### Customizing Your Search
**Change the location:**
```python
website_url = "https://www.onthemarket.com/to-rent/property/manchester/"
```
**Update your requirements:**
```python
user_needs = "I'm a young professional looking for a 1-bedroom flat in Manchester under £1,000/month"
```
### Example Requirements You Can Use:
- `"Student looking for shared accommodation under £600/month"`
- `"Family needing 3-bedroom house with garden under £1,500/month"`
- `"Professional couple wanting modern 2-bed apartment near city center"`
## Configuration
### Supported Cities
Replace `durham` in the URL with any UK city:
- `london` - London properties
- `manchester` - Manchester properties
- `birmingham` - Birmingham properties
- `leeds` - Leeds properties
- `bristol` - Bristol properties
### AI Behavior
The system prompt is configured for UK rentals but you can modify it in the `system_prompt` variable to:
- Focus on specific property types
- Emphasize certain features (parking, garden, etc.)
- Target different tenant types (students, families, professionals)
## Example Output
```
Website Title: Properties to rent in Durham - OnTheMarket
Content Length: 15847 characters
==================================================
RENTAL RECOMMENDATIONS:
==================================================
# Property Recommendations for Durham
Based on your requirements for a 2-bedroom student property under £2,000/month, here are my top recommendations:
## 1. **Student House on North Road** - £1,600/month
- **Bedrooms:** 2
- **Perfect because:** Well within budget, popular student area
- **Features:** Close to university, furnished, bills included
## 2. **Modern Apartment City Centre** - £1,400/month
- **Bedrooms:** 2
- **Perfect because:** Great location, modern amenities
- **Features:** Parking space, balcony, near shops
```
## Requirements
Create a `requirements.txt` file:
```
requests>=2.28.0
beautifulsoup4>=4.11.0
openai>=1.0.0
python-dotenv>=0.19.0
ipython>=8.0.0
```
Install with: `pip install -r requirements.txt`
## Important Notes
### API Costs
- Uses GPT-4o-mini model (very affordable - ~$0.001 per request)
- Monitor usage at: https://platform.openai.com/usage
### Rate Limits
- Free OpenAI accounts: 3 requests per minute
- The script makes 1 request per run
## How It Works
1. **Web Scraping:** Downloads the property listing page
2. **Text Extraction:** Cleans HTML and extracts property information
3. **AI Analysis:** Sends your requirements + property data to GPT-4
4. **Smart Recommendations:** AI filters and ranks properties with explanations
## Troubleshooting
**"No API key found"**
- Make sure `.env` file exists in the same folder as your script
- Check the API key has no extra spaces
- Verify it starts with `sk-proj-`
**"Error fetching website"**
- Check your internet connection
- Try a different city URL
- Some websites may temporarily block requests
**No good recommendations**
- Try adjusting your budget or requirements
- Check if the website loaded properly (look at content length)
- Try a different city with more properties
## Possible Improvements
- Make it interactive (ask for user input)
- Support multiple property websites
- Add price tracking over time
- Include property images in analysis
- Create a simple web interface
## Disclaimer
This tool is for educational purposes. Always verify property information directly with landlords or estate agents. Respect website terms of service.
---
**Need help?** Check that your `.env` file is set up correctly and you have an active internet connection. The script will tell you if there are any issues with your API key!

View File

@@ -0,0 +1,294 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "0993b44c-58f3-4d7d-ac31-80871a867040",
"metadata": {},
"source": [
"# AI Property Rental Assistant for Durham\n",
"This notebook creates an intelligent property rental assistant that:\n",
"1. Scrapes rental property listings from OnTheMarket.com\n",
"2. Uses OpenAI's GPT-4o-mini to analyze and recommend properties based on user preferences\n",
"3. Provides formatted recommendations in markdown for easy reading\n",
"\n",
"Purpose: Help students and professionals find suitable rental properties in Durham, UK"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6f3fa597-bac5-496f-b0c6-ac1cb524062d",
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"import requests\n",
"from dotenv import load_dotenv\n",
"from bs4 import BeautifulSoup\n",
"from IPython.display import Markdown, display\n",
"from openai import OpenAI"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "dfa715c4-81d4-4f1e-87d8-6cf7fa17db71",
"metadata": {},
"outputs": [],
"source": [
"# =====================================\n",
"# STEP 1: ENVIRONMENT SETUP & API KEYS\n",
"# =====================================\n",
"\n",
"# Load environment variables from .env file\n",
"# Make sure you have a .env file with: OPENAI_API_KEY=your_key_here\n",
"load_dotenv(override=True)\n",
"api_key = os.getenv('OPENAI_API_KEY')\n",
"\n",
"# Validate the OpenAI API key format and existence\n",
"if not api_key:\n",
" print(\"No API key was found - please head over to the troubleshooting notebook in this folder to identify & fix!\")\n",
"elif not api_key.startswith(\"sk-proj-\"):\n",
" print(\"An API key was found, but it doesn't start sk-proj-; please check you're using the right key - see troubleshooting notebook\")\n",
"elif api_key.strip() != api_key:\n",
" print(\"An API key was found, but it looks like it might have space or tab characters at the start or end - please remove them - see troubleshooting notebook\")\n",
"else:\n",
" print(\"API key found and looks good so far!\")\n",
"\n",
"# Initialize OpenAI client\n",
"openai = OpenAI()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c7e44572-1cda-42d2-a6ff-45f462fd436f",
"metadata": {},
"outputs": [],
"source": [
"# =====================================\n",
"# STEP 2: WEB SCRAPING SETUP\n",
"# =====================================\n",
"\n",
"# HTTP headers to mimic a real browser request\n",
"# Many websites block requests without proper headers\n",
"headers = {\n",
" \"User-Agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36\"\n",
"}\n",
"\n",
"class Website:\n",
" \"\"\"\n",
" A class to represent and scrape content from a webpage.\n",
" \n",
" This class handles:\n",
" - Fetching HTML content from a URL\n",
" - Parsing HTML with BeautifulSoup\n",
" - Extracting clean text content (removing scripts, styles, etc.)\n",
" - Error handling for failed requests\n",
" \n",
" Attributes:\n",
" url (str): The URL of the website\n",
" title (str): The page title\n",
" text (str): Clean text content from the page body\n",
" \"\"\"\n",
" \n",
" def __init__(self, url):\n",
" \"\"\"\n",
" Initialize Website object by scraping content from the given URL.\n",
" \n",
" Args:\n",
" url (str): The website URL to scrape\n",
" \"\"\"\n",
" self.url = url\n",
" try:\n",
" # Make HTTP request with timeout to prevent hanging\n",
" response = requests.get(url, headers=headers, timeout=10)\n",
" response.raise_for_status() # Raises an HTTPError for bad responses\n",
" \n",
" # Parse HTML content\n",
" soup = BeautifulSoup(response.content, 'html.parser')\n",
" \n",
" # Extract page title\n",
" self.title = soup.title.string if soup.title else \"No title found\"\n",
" \n",
" # Clean up the HTML by removing irrelevant elements\n",
" if soup.body:\n",
" # Remove scripts, styles, images, and input elements\n",
" for irrelevant in soup.body([\"script\", \"style\", \"img\", \"input\"]):\n",
" irrelevant.decompose()\n",
" \n",
" # Extract clean text with proper line separation\n",
" self.text = soup.body.get_text(separator=\"\\n\", strip=True)\n",
" else:\n",
" self.text = \"No body content found\"\n",
" \n",
" except requests.RequestException as e:\n",
" # Handle network errors gracefully\n",
" print(f\"Error fetching website: {e}\")\n",
" self.title = \"Error loading page\"\n",
" self.text = \"Could not load page content\""
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a97d9c34-2831-4730-949e-bba1b6ac9bb3",
"metadata": {},
"outputs": [],
"source": [
"# =====================================\n",
"# STEP 3: AI ASSISTANT FUNCTIONS\n",
"# =====================================\n",
"\n",
"def house_renting(system_prompt, user_prompt):\n",
" \"\"\"\n",
" Send prompts to OpenAI's GPT model and get rental recommendations.\n",
" \n",
" This function:\n",
" - Formats the conversation for the AI model\n",
" - Sends requests to GPT-4o-mini (cost-effective model)\n",
" - Returns the AI's response as a string\n",
" \n",
" Args:\n",
" system_prompt (str): Instructions for how the AI should behave\n",
" user_prompt (str): The user's specific request with property data\n",
" \n",
" Returns:\n",
" str: AI-generated rental recommendations in markdown format\n",
" \"\"\"\n",
" messages = [\n",
" {\"role\": \"system\", \"content\": system_prompt},\n",
" {\"role\": \"user\", \"content\": user_prompt}\n",
" ]\n",
" \n",
" # Call OpenAI API\n",
" response = openai.chat.completions.create(\n",
" model=\"gpt-4o-mini\", # Cost-effective model, good for this task\n",
" messages=messages,\n",
" )\n",
" \n",
" return response.choices[0].message.content"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6d0c4b96-b907-45ed-8a4d-a67d8f7e4f33",
"metadata": {},
"outputs": [],
"source": [
"# =====================================\n",
"# STEP 4: AI SYSTEM CONFIGURATION\n",
"# =====================================\n",
"\n",
"# Define how the AI assistant should behave\n",
"# This is crucial for getting consistent, helpful responses\n",
"system_prompt = \"\"\"\n",
"You are a helpful real estate assistant specializing in UK property rentals. Your job is to guide users in finding houses to rent, especially in Durham. Follow these rules:\n",
"\n",
"1. Always ask clarifying questions if user input is vague. Determine location, budget, number of bedrooms, and tenant type (e.g. student, family, professional).\n",
"2. Use structured data provided from the website (like property listings) to identify relevant options.\n",
"3. If listings are provided, filter and rank them based on the user's preferences.\n",
"4. Recommend up to 5 top properties with rent price, bedroom count, key features, and location.\n",
"5. Always respond in markdown with clean formatting using headers, bold text, and bullet points.\n",
"6. If no listings match well, provide tips (e.g. \"try adjusting your budget or search radius\").\n",
"7. Stay concise, helpful, and adapt to whether the user is a student, family, couple, or solo tenant.\n",
"\"\"\"\n",
"\n",
"def user_prompt_for_renting(website, user_needs):\n",
" \"\"\"\n",
" Create a formatted prompt that combines user requirements with scraped property data.\n",
" \n",
" This function:\n",
" - Takes user preferences and website content\n",
" - Formats them into a clear prompt for the AI\n",
" - Limits content to first 4000 characters to stay within token limits\n",
" \n",
" Args:\n",
" website (Website): The scraped website object\n",
" user_needs (str): Description of what the user is looking for\n",
" \n",
" Returns:\n",
" str: Formatted prompt ready to send to the AI\n",
" \"\"\"\n",
" user_prompt = f\"\"\"\n",
"I want to rent a house and here's what I'm looking for:\n",
"{user_needs}\n",
"\n",
"Here are the property listings I found on the website titled: \"{website.title}\".\n",
"Please analyze them and recommend the best 35 options that match my needs. If none are suitable, tell me why and offer suggestions.\n",
"\n",
"The page content is below:\n",
"{website.text[:4000]} # Truncated to first 4000 characters to manage token usage\n",
"\"\"\"\n",
" return user_prompt"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "cecb1f11-060a-4737-828c-e94ae04a42ae",
"metadata": {},
"outputs": [],
"source": [
"# =====================================\n",
"# STEP 5: MAIN EXECUTION\n",
"# =====================================\n",
"\n",
"if __name__ == \"__main__\":\n",
" print(\"Starting AI Property Rental Assistant...\")\n",
" print(\"=\" * 50)\n",
" \n",
" # Configure the property search\n",
" website_url = \"https://www.onthemarket.com/to-rent/property/durham/\"\n",
" print(f\"🔍 Scraping properties from: {website_url}\")\n",
" \n",
" # Scrape the website\n",
" website = Website(website_url)\n",
" \n",
" # Display scraping results\n",
" print(f\"Website Title: {website.title}\")\n",
" print(f\"Content Length: {len(website.text)} characters\")\n",
" print(f\"Successfully scraped property listings\")\n",
" print()\n",
" \n",
" # Define user requirements\n",
" # TODO: Make this interactive by adding input() statements\n",
" user_needs = \"I'm a student looking for a 2-bedroom house in Durham under £2,000/month\"\n",
" print(f\"User Requirements: {user_needs}\")\n",
" print()\n",
" \n",
" # Generate AI prompt\n",
" user_prompt = user_prompt_for_renting(website, user_needs)\n",
" \n",
" # Get AI recommendations\n",
" print(\"Generating AI recommendations...\")\n",
" output = house_renting(system_prompt, user_prompt)\n",
" \n",
" # Display results\n",
" display(Markdown(output))\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python [conda env:llms]",
"language": "python",
"name": "conda-env-llms-py"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.13"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@@ -0,0 +1,425 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "d5b08506-dc8b-4443-9201-5f1848161363",
"metadata": {},
"outputs": [],
"source": [
"# imports\n",
"import os\n",
"import requests\n",
"import json\n",
"from typing import List\n",
"from dotenv import load_dotenv\n",
"from bs4 import BeautifulSoup\n",
"from IPython.display import Markdown, display, update_display\n",
"from openai import OpenAI"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9ce95b0e-3f98-4cef-b9fe-d963fdeeed2d",
"metadata": {},
"outputs": [],
"source": [
"# run ollama serve in your Anaconda terminal\n",
"MODEL = \"llama3.2\"\n",
"openai = OpenAI(base_url=\"http://localhost:11434/v1\", api_key=\"ollama\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "106dd65e-90af-4ca8-86b6-23a41840645b",
"metadata": {},
"outputs": [],
"source": [
"# A class to represent a Webpage\n",
"# Some websites need you to use proper headers when fetching them:\n",
"headers = {\n",
" \"User-Agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36\"\n",
"}\n",
"\n",
"class Website:\n",
" \"\"\"\n",
" A utility class to represent a Website that we have scraped, now with links\n",
" \"\"\"\n",
"\n",
" def __init__(self, url):\n",
" self.url = url\n",
" response = requests.get(url, headers=headers)\n",
" self.body = response.content\n",
" soup = BeautifulSoup(self.body, 'html.parser')\n",
" self.title = soup.title.string if soup.title else \"No title found\"\n",
" if soup.body:\n",
" for irrelevant in soup.body([\"script\", \"style\", \"img\", \"input\"]):\n",
" irrelevant.decompose()\n",
" self.text = soup.body.get_text(separator=\"\\n\", strip=True)\n",
" else:\n",
" self.text = \"\"\n",
" links = [link.get('href') for link in soup.find_all('a')]\n",
" self.links = [link for link in links if link]\n",
"\n",
" def get_contents(self):\n",
" return f\"Webpage Title:\\n{self.title}\\nWebpage Contents:\\n{self.text}\\n\\n\""
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e30d8128-933b-44cc-81c8-ab4c9d86589a",
"metadata": {},
"outputs": [],
"source": [
"ed = Website(\"https://edwarddonner.com\")\n",
"ed.links"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6957b079-0d96-45f7-a26a-3487510e9b35",
"metadata": {},
"outputs": [],
"source": [
"link_system_prompt = \"You are provided with a list of links found on a webpage. \\\n",
"You are able to decide which of the links would be most relevant to include in a brochure about the company, \\\n",
"such as links to an About page, or a Company page, or Careers/Jobs pages.\\n\"\n",
"link_system_prompt += \"You should respond in JSON as in these examples:\"\n",
"link_system_prompt += \"\"\"example 1:\n",
"{\n",
" \"links\": [\n",
" {\"type\": \"about page\", \"url\": \"https://full.url/goes/here/about\"},\n",
" {\"type\": \"careers page\", \"url\": \"https://another.full.url/careers\"}]}\n",
"\n",
"Example 2:\n",
" {\"links\": [{\"type\": \"about page\", \"url\": \"https://great-comps.de/about-me\"},\n",
" {\"type\": \"news page\": \"url\": \"https://great-comps.de/news\"},\n",
" {\"type\": \"case studies page\": \"url\": \"https://great-comps.de/case-studies\"},\n",
" {\"type\": \"workshop page\": \"url\": \"https://great-comps.de/workshop-ai\"},]}\n",
"\n",
"\n",
"Example 3:\n",
" {\"links\": [{\"type\": \"über mich\", \"url\": \"https://wahlen-robbie.at/ueber-mich\"},\n",
" {\"type\": \"aktuelles\": \"url\": \"https://wahlen-robbie.at/neuigkeiten\"},\n",
" {\"type\": \"whitepaper\": \"url\": \"https://wahlen-robbie.at/whitepapers\"},\n",
" {\"type\": \"services\": \"url\": \"https://wahlen-robbie.at/services\"}]}\n",
"\"\"\""
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b97e4068-97ed-4120-beae-c42105e4d59a",
"metadata": {},
"outputs": [],
"source": [
"print(link_system_prompt)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "8e1f601b-2eaf-499d-b6b8-c99050c9d6b3",
"metadata": {},
"outputs": [],
"source": [
"def get_links_user_prompt(website):\n",
" user_prompt = f\"Here is the list of links on the website of {website.url} - \"\n",
" user_prompt += \"please decide which of these are relevant web links for a brochure about the company, respond with the full https URL in JSON format. \\\n",
"Do not include Terms of Service, Privacy, email links.\\n\"\n",
" user_prompt += \"Links (some might be relative links):\\n\"\n",
" user_prompt += \"\\n\".join(website.links)\n",
" return user_prompt"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6bcbfa78-6395-4685-b92c-22d592050fd7",
"metadata": {},
"outputs": [],
"source": [
"print(get_links_user_prompt(ed))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a29aca19-ca13-471c-a4b4-5abbfa813f69",
"metadata": {},
"outputs": [],
"source": [
"def get_links(url):\n",
" website = Website(url)\n",
" response = openai.chat.completions.create(\n",
" model=MODEL,\n",
" messages=[\n",
" {\"role\": \"system\", \"content\": link_system_prompt},\n",
" {\"role\": \"user\", \"content\": get_links_user_prompt(website)}\n",
" ],\n",
" response_format={\"type\": \"json_object\"}\n",
" )\n",
" result = response.choices[0].message.content\n",
" return json.loads(result)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "74a827a0-2782-4ae5-b210-4a242a8b4cc2",
"metadata": {},
"outputs": [],
"source": [
"# Getting the links for HuggingFace\n",
"huggingface = Website(\"https://huggingface.co\")\n",
"huggingface.links"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d3d583e2-dcc4-40cc-9b28-1e8dbf402924",
"metadata": {},
"outputs": [],
"source": [
"get_links(\"https://huggingface.co\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "85a5b6e2-e7ef-44a9-bc7f-59ede71037b5",
"metadata": {},
"outputs": [],
"source": [
"def get_all_details(url):\n",
" result = \"Landing page:\\n\"\n",
" result += Website(url).get_contents()\n",
" links = get_links(url)\n",
" #print(\"Found links:\", links)\n",
" for link in links[\"links\"]:\n",
" result += f\"\\n\\n{link['type']}\\n\"\n",
" result += Website(link[\"url\"]).get_contents()\n",
" return result"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5099bd14-076d-4745-baf3-dac08d8e5ab2",
"metadata": {},
"outputs": [],
"source": [
"print(get_all_details(\"https://edwarddonner.com\"))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9b863a55-f86c-4e3f-8a79-94e24c1a8cf2",
"metadata": {},
"outputs": [],
"source": [
"system_prompt = \"You are an assistant that analyzes the contents of several relevant pages from a company website \\\n",
"and creates a short brochure about the company for prospective customers, investors and recruits. Respond in markdown.\\\n",
"Include details of company culture, customers and careers/jobs if you have the information.\"\n",
"\n",
"# Or uncomment the lines below for a more humorous brochure - this demonstrates how easy it is to incorporate 'tone':\n",
"\n",
"# system_prompt = \"You are an assistant that analyzes the contents of several relevant pages from a company website \\\n",
"# and creates a short humorous, entertaining, jokey brochure about the company for prospective customers, investors and recruits. Respond in markdown.\\\n",
"# Include details of company culture, customers and careers/jobs if you have the information.\"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6ab83d92-d36b-4ce0-8bcc-5bb4c2f8ff23",
"metadata": {},
"outputs": [],
"source": [
"def get_brochure_user_prompt(company_name, url):\n",
" user_prompt = f\"You are looking at a company called: {company_name}\\n\"\n",
" user_prompt += f\"Here are the contents of its landing page and other relevant pages; use this information to build a short brochure of the company in markdown.\\n\"\n",
" user_prompt += get_all_details(url)\n",
" user_prompt = user_prompt[:5_000] # Truncate if more than 5,000 characters\n",
" return user_prompt"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "cd909e0b-1312-4ce2-a553-821e795d7572",
"metadata": {},
"outputs": [],
"source": [
"get_brochure_user_prompt(\"ed\", \"https://edwarddonner.com\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e44de579-4a1a-4e6a-a510-20ea3e4b8d46",
"metadata": {},
"outputs": [],
"source": [
"def create_brochure(company_name, url):\n",
" response = openai.chat.completions.create(\n",
" model=MODEL,\n",
" messages=[\n",
" {\"role\": \"system\", \"content\": system_prompt},\n",
" {\"role\": \"user\", \"content\": get_brochure_user_prompt(company_name, url)}\n",
" ],\n",
" )\n",
" result = response.choices[0].message.content\n",
" display(Markdown(result))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e093444a-9407-42ae-924a-145730591a39",
"metadata": {},
"outputs": [],
"source": [
"create_brochure(\"ed\", \"https://edwarddonner.com\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "fbc1a523-4c5c-4571-8541-456e5feed185",
"metadata": {},
"outputs": [],
"source": [
"def create_brochure_translated(company_name, url, language):\n",
" system_prompt = f\"\"\"You are an expert professional translator. Translate the following brochure text into {language}.\n",
" Make sure to translate into a idiomatic {language}, matching the users language's natural structure, wording and expressions,\n",
" so it can't be recognised as a translation.\n",
" Be sure to also maintain an appropriate tone, Output the translated brochure in Markdown format.\"\"\"\n",
" response = openai.chat.completions.create(\n",
" model=MODEL,\n",
" messages=[\n",
" {\"role\": \"system\", \"content\": system_prompt},\n",
" {\"role\": \"user\", \"content\": get_brochure_user_prompt(company_name, url)}\n",
" ],\n",
" \n",
" )\n",
" result = response.choices[0].message.content\n",
" display(Markdown(result))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6ff2ddc4-f13c-44b2-8043-d66f36b0954c",
"metadata": {},
"outputs": [],
"source": [
"create_brochure_translated(\"ed\", \"https://edwarddonner.com\", \"French\")"
]
},
{
"cell_type": "markdown",
"id": "61eaaab7-0b47-4b29-82d4-75d474ad8d18",
"metadata": {},
"source": [
"## Finally - a minor improvement\n",
"\n",
"With a small adjustment, we can change this so that the results stream back from OpenAI,\n",
"with the familiar typewriter animation"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "51db0e49-f261-4137-aabe-92dd601f7725",
"metadata": {},
"outputs": [],
"source": [
"def stream_brochure(company_name, url):\n",
" stream = openai.chat.completions.create(\n",
" model=MODEL,\n",
" messages=[\n",
" {\"role\": \"system\", \"content\": system_prompt},\n",
" {\"role\": \"user\", \"content\": get_brochure_user_prompt(company_name, url)}\n",
" ],\n",
" stream=True\n",
" )\n",
" \n",
" response = \"\"\n",
" display_handle = display(Markdown(\"\"), display_id=True)\n",
" for chunk in stream:\n",
" response += chunk.choices[0].delta.content or ''\n",
" response = response.replace(\"```\",\"\").replace(\"markdown\", \"\")\n",
" update_display(Markdown(response), display_id=display_handle.display_id)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "09f18581-b870-4952-8430-217afaf7a83e",
"metadata": {},
"outputs": [],
"source": [
"def stream_brochure_translated(company_name, url, language):\n",
" system_prompt = f\"\"\"You are an expert professional translator. \n",
" Translate the following brochure text into {language}.\n",
" Make sure to translate into idiomatic {language}, matching the natural structure, wording, and expressions.\n",
" Maintain an appropriate tone. Output the translated brochure in Markdown format.\"\"\"\n",
"\n",
" stream = openai.chat.completions.create(\n",
" model=MODEL,\n",
" messages=[\n",
" {\"role\": \"system\", \"content\": system_prompt},\n",
" {\"role\": \"user\", \"content\": get_brochure_user_prompt(company_name, url)}\n",
" ],\n",
" stream=True\n",
" )\n",
"\n",
" response = \"\"\n",
" display_handle = display(Markdown(\"\"), display_id=True)\n",
" for chunk in stream:\n",
" response += chunk.choices[0].delta.content or ''\n",
" # clean up formatting glitches while streaming\n",
" response = response.replace(\"```\", \"\").replace(\"markdown\", \"\")\n",
" update_display(Markdown(response), display_id=display_handle.display_id)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "56bf0ae3-ee9d-4a72-9cd6-edcac67ceb6d",
"metadata": {},
"outputs": [],
"source": [
"stream_brochure_translated(\"ed\", \"https://edwarddonner.com\", \"Spanish\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.13"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@@ -0,0 +1,179 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "606e9c73-50fe-46b9-8df3-ae2246c00a3e",
"metadata": {},
"source": [
"# Business Use Case - LLM based Resume Upgrader"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "919f6546-80ec-4d4c-8a80-00228f50e4a0",
"metadata": {},
"outputs": [],
"source": [
"# imports\n",
"\n",
"import os\n",
"from openai import OpenAI\n",
"from dotenv import load_dotenv\n",
"from IPython.display import Markdown, display"
]
},
{
"cell_type": "markdown",
"id": "b2f5b02c-f782-4578-8a91-07891c39ceb0",
"metadata": {},
"source": [
"steps to perform\n",
"-> load API key from env file\n",
"-> create a function to call llm api\n",
"-> create messages for system prompt and user prompt\n",
"-> display the llm output"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "31aaa20e-4996-43cb-b43a-a1aef80fd391",
"metadata": {},
"outputs": [],
"source": [
"load_dotenv()\n",
"api_key = os.getenv('OPENAI_API_KEY')\n",
"# error handling\n",
"\n",
"if not api_key:\n",
" print(\"No API key was found - please head over to the troubleshooting notebook in this folder to identify & fix!\")\n",
"elif not api_key.startswith(\"sk-proj-\"):\n",
" print(\"An API key was found, but it doesn't start sk-proj-; please check you're using the right key - see troubleshooting notebook\")\n",
"elif api_key.strip() != api_key:\n",
" print(\"An API key was found, but it looks like it might have space or tab characters at the start or end - please remove them - see troubleshooting notebook\")\n",
"else:\n",
" print(\"API key found and looks good so far!\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "92f65c91-ca7f-47e6-9fd7-d63b278ba264",
"metadata": {},
"outputs": [],
"source": [
"openai = OpenAI()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "98fc7bac-07c8-4801-9225-8f843837f3c2",
"metadata": {},
"outputs": [],
"source": [
"# system prompt\n",
"\n",
"system_prompt = \"\"\"You are a helpful resume editor assistant that provides required assistance in changing a resume to match the given job descrption role \\\n",
"You are given a resume and job description, your job is to understand the resume and job description to suggest upto 6 missing key words in the resume. Then you have to \n",
"suggest how the user can improve his resume by giving upto 3 example sentences using the suggest keywords to fit into their resume.\n",
"by using the following structure provide your response \\\n",
"Sturcture:\n",
"Job role : [Job Role]:\n",
"Candidate Name : [Candidate Name]\n",
"Missing Key words in Resume Based on Given job description:\n",
" - [] Missing key words\n",
" -[] Missing key words\n",
"\n",
"\n",
"Suggestion:\n",
" - [] # write a sentence including the key words to put them in the resume\n",
" - [] # write a sentence including the key words to put them in the resume\n",
"\n",
"Guidelines:\n",
"- give proper keyword suggestions which are essential for the job function. Do not give any unnecesary suggestions\n",
"- Keep the suggested sentences less that 50 words\n",
"- \n",
"\"\"\"\n",
"user_prompt = f'Give me suggestions on how to improve my resume and for the given job description '\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0d9c40b5-8e27-41b9-8b88-2c83e7d2b3ec",
"metadata": {},
"outputs": [],
"source": [
"# call openai api\n",
"def resume_upgrader(resume:str, job_description:str):\n",
" user_prompt = f'Give me suggestions on how to improve my resume {resume} and for the given job description {job_description}'\n",
" messages = [\n",
" {'role': 'system', 'content': system_prompt},\n",
" {'role': 'user', 'content': user_prompt}\n",
" ]\n",
" try:\n",
" \n",
" response = openai.chat.completions.create(model =\"gpt-4o-mini\", messages = messages)\n",
" return response.choices[0].message.content\n",
" except:\n",
" print('got error while retting the response from api')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5aa29465-c119-4178-90f1-3ebdc9eeb11a",
"metadata": {},
"outputs": [],
"source": [
"def print_api_response(response_markdown):\n",
" \"\"\"Print the markdown response\"\"\"\n",
" display(Markdown(response_markdown))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "82a92034-6722-4e78-a901-b4ef2b9cbb84",
"metadata": {},
"outputs": [],
"source": [
"resume = input(\"Paste your resume in here\")\n",
"job_description = input(\"paste your job descritpion here\")\n",
"response = resume_upgrader(resume, job_description)\n",
"print_api_response(response)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d0be536f-e890-473f-8c68-767bc0e3b47c",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.13"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@@ -0,0 +1,189 @@
# AI Property Rental Assistant
An intelligent property rental assistant Jupyter notebook that scrapes real estate listings from OnTheMarket and uses a local LLM (DeepSeek R1) to analyze and recommend properties based on user requirements.
## Features
- **Web Scraping**: Automatically fetches property listings from OnTheMarket
- **AI-Powered Analysis**: Uses DeepSeek R1 model via Ollama for intelligent recommendations
- **Personalized Recommendations**: Filters and ranks properties based on:
- Budget constraints
- Number of bedrooms
- Tenant type (student, family, professional)
- Location preferences
- **Clean Output**: Returns formatted markdown with top 3-5 property recommendations
- **Smart Filtering**: Handles cases where no suitable properties are found with helpful suggestions
## Prerequisites
- Python 3.7+
- Ollama installed and running locally
- DeepSeek R1 14B model pulled in Ollama
## Installation
1. **Clone the repository**
```bash
git clone <your-repo-url>
cd property-rental-assistant
```
2. **Install required Python packages**
```bash
pip install requests beautifulsoup4 ollama ipython jupyter
```
3. **Install and setup Ollama**
```bash
# Install Ollama (macOS/Linux)
curl -fsSL https://ollama.ai/install.sh | sh
# For Windows, download from: https://ollama.ai/download
```
4. **Pull the DeepSeek R1 model**
```bash
ollama pull deepseek-r1:14b
```
5. **Start Ollama server**
```bash
ollama serve
```
## Usage
### Running the Notebook
1. **Start Jupyter Notebook**
```bash
jupyter notebook
```
2. **Open the notebook**
Navigate to `property_rental_assistant.ipynb` in the Jupyter interface
3. **Run all cells**
Click `Cell``Run All` or use `Shift + Enter` to run cells individually
### Customizing Search Parameters
Modify the `user_needs` variable in the notebook:
```python
user_needs = "I'm a student looking for a 2-bedroom house in Durham under £2,000/month"
```
Other examples:
- `"Family of 4 looking for 3-bedroom house with garden in Durham, budget £2,500/month"`
- `"Professional couple seeking modern 1-bed apartment near city center, max £1,500/month"`
- `"Student group needs 4-bedroom house near Durham University, £600/month per person"`
### Changing the Property Website
Update the `website_url` variable in the notebook:
```python
website_url = "https://www.onthemarket.com/to-rent/property/durham/"
```
## Architecture
```
┌─────────────────┐ ┌──────────────┐ ┌─────────────┐
│ OnTheMarket │────▶│ Web Scraper │────▶│ Ollama │
│ Website │ │ (BeautifulSoup)│ │ (DeepSeek R1)│
└─────────────────┘ └──────────────┘ └─────────────┘
┌─────────────────────────────────┐
│ AI-Generated Recommendations │
│ • Top 5 matching properties │
│ • Filtered by requirements │
│ • Markdown formatted output │
└─────────────────────────────────┘
```
## Project Structure
```
property-rental-assistant/
├── property_rental_assistant.ipynb # Main Jupyter notebook
└── README.md # This file
```
## 🔧 Configuration
### Ollama API Settings
```python
OLLAMA_API = "http://localhost:11434/api/chat" # Default Ollama endpoint
MODEL = "deepseek-r1:14b" # Model to use
```
### Web Scraping Settings
```python
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
}
timeout = 10 # Request timeout in seconds
```
### Content Limits
```python
website.text[:4000] # Truncate content to 4000 chars for token limits
```
## How It Works
1. **Web Scraping**: The `Website` class fetches and parses HTML content from the property listing URL
2. **Content Cleaning**: Removes scripts, styles, and images to extract clean text
3. **Prompt Engineering**: Combines system prompt with user requirements and scraped data
4. **LLM Analysis**: Sends the prompt to DeepSeek R1 via Ollama API
5. **Recommendation Generation**: The AI analyzes listings and returns top matches in markdown format
## 🛠️ Troubleshooting
### Ollama Connection Error
```
Error communicating with Ollama: [Errno 111] Connection refused
```
**Solution**: Ensure Ollama is running with `ollama serve`
### Model Not Found
```
Error: model 'deepseek-r1:14b' not found
```
**Solution**: Pull the model with `ollama pull deepseek-r1:14b`
### Web Scraping Blocked
```
Error fetching website: 403 Forbidden
```
**Solution**: The website may be blocking automated requests. Try:
- Updating the User-Agent string
- Adding delays between requests
- Using a proxy or VPN
### Insufficient Property Data
If recommendations are poor quality, the scraper may not be capturing listing details properly. Check:
- The website structure hasn't changed
- The content truncation limit (4000 chars) isn't too restrictive
## Future Enhancements
- [ ] Support multiple property websites (Rightmove, Zoopla, SpareRoom)
- [ ] Interactive CLI for dynamic user input
- [ ] Property image analysis
- [ ] Save search history and favorite properties
- [ ] Email notifications for new matching properties
- [ ] Price trend analysis
- [ ] Commute time calculations to specified locations
- [ ] Multi-language support
- [ ] Web interface with Flask/FastAPI
- [ ] Docker containerization
## Acknowledgments
- [Ollama](https://ollama.ai/) for local LLM hosting
- [DeepSeek](https://www.deepseek.com/) for the R1 model
- [BeautifulSoup](https://www.crummy.com/software/BeautifulSoup/) for web scraping
- [OnTheMarket](https://www.onthemarket.com/) for property data

View File

@@ -0,0 +1,217 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "57112e5c-7b0f-4ba7-9022-ae21e8ac0f42",
"metadata": {},
"outputs": [],
"source": [
"# imports\n",
"\n",
"import requests\n",
"from bs4 import BeautifulSoup\n",
"from IPython.display import Markdown, display"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3b71a051-fc0e-46a9-8b1b-b58f685e800d",
"metadata": {},
"outputs": [],
"source": [
"# Constants\n",
"OLLAMA_API = \"http://localhost:11434/api/chat\"\n",
"HEADERS = {\"Content-Type\": \"application/json\"}\n",
"MODEL = \"deepseek-r1:14b\""
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ed3be9dc-d459-46ac-a8eb-f9b932c4302f",
"metadata": {},
"outputs": [],
"source": [
"headers = {\n",
" \"User-Agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36\"\n",
"}\n",
"\n",
"class Website:\n",
" def __init__(self, url):\n",
" self.url = url\n",
" try:\n",
" response = requests.get(url, headers=headers, timeout=10)\n",
" response.raise_for_status()\n",
" soup = BeautifulSoup(response.content, 'html.parser')\n",
" self.title = soup.title.string if soup.title else \"No title found\"\n",
" if soup.body:\n",
" for irrelevant in soup.body([\"script\", \"style\", \"img\", \"input\"]):\n",
" irrelevant.decompose()\n",
" self.text = soup.body.get_text(separator=\"\\n\", strip=True)\n",
" else:\n",
" self.text = \"No body content found\"\n",
" except requests.RequestException as e:\n",
" print(f\"Error fetching website: {e}\")\n",
" self.title = \"Error loading page\"\n",
" self.text = \"Could not load page content\""
]
},
{
"cell_type": "markdown",
"id": "17ea76f8-38d9-40b9-8aba-eb957d690a0d",
"metadata": {},
"source": [
"## Without Ollama package"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3a6fd698-8e59-4cd7-bb53-b9375e50f899",
"metadata": {},
"outputs": [],
"source": [
"def house_renting(system_prompt, user_prompt):\n",
" messages = [\n",
" {\"role\": \"system\", \"content\": system_prompt},\n",
" {\"role\": \"user\", \"content\": user_prompt}\n",
" ]\n",
" payload = {\n",
" \"model\": MODEL,\n",
" \"messages\": messages,\n",
" \"stream\": False\n",
" }\n",
" response = requests.post(OLLAMA_API, json=payload, headers=HEADERS)\n",
" return response.json()['message']['content']"
]
},
{
"cell_type": "markdown",
"id": "c826a52c-d1d3-493a-8b7c-6e75b848b453",
"metadata": {},
"source": [
"## Introducing Ollama package "
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "519e27da-eeff-4c1b-a8c6-e680fdf01da2",
"metadata": {},
"outputs": [],
"source": [
"import ollama\n",
"\n",
"def house_renting_ollama(system_prompt, user_prompt):\n",
" try:\n",
" messages = [\n",
" {\"role\": \"system\", \"content\": system_prompt},\n",
" {\"role\": \"user\", \"content\": user_prompt}\n",
" ]\n",
" response = ollama.chat(model=MODEL, messages=messages)\n",
" return response['message']['content']\n",
" except Exception as e:\n",
" return f\"Error communicating with Ollama: {e}\""
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "60e98b28-06d9-4303-b8ca-f7b798244eb4",
"metadata": {},
"outputs": [],
"source": [
"system_prompt = \"\"\"\n",
"You are a helpful real estate assistant specializing in UK property rentals. Your job is to guide users in finding houses to rent, especially in Durham. Follow these rules:\n",
"1. Always ask clarifying questions if user input is vague. Determine location, budget, number of bedrooms, and tenant type (e.g. student, family, professional).\n",
"2. Use structured data provided from the website (like property listings) to identify relevant options.\n",
"3. If listings are provided, filter and rank them based on the user's preferences.\n",
"4. Recommend up to 5 top properties with rent price, bedroom count, key features, and location.\n",
"5. Always respond in markdown with clean formatting using headers, bold text, and bullet points.\n",
"6. If no listings match well, provide tips (e.g. \"try adjusting your budget or search radius\").\n",
"7. Stay concise, helpful, and adapt to whether the user is a student, family, couple, or solo tenant.\n",
"\"\"\"\n",
"\n",
"def user_prompt_for_renting(website, user_needs):\n",
" return f\"\"\"\n",
"I want to rent a house and here's what I'm looking for:\n",
"{user_needs}\n",
"\n",
"Here are the property listings I found on the website titled: \"{website.title}\".\n",
"\n",
"Please analyze them and recommend the best 35 options that match my needs. If none are suitable, tell me why and offer suggestions.\n",
"\n",
"The page content is below:\n",
"{website.text[:4000]}\n",
"\"\"\" # content is truncated for token limits"
]
},
{
"cell_type": "markdown",
"id": "ef420f4b-e3d2-4fbd-bf6f-811f2c8536e0",
"metadata": {},
"source": [
"## Ollama Package"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1cf128af-4ece-41ab-b353-5c8564c7de1d",
"metadata": {},
"outputs": [],
"source": [
"if __name__ == \"__main__\": \n",
" print(\"Starting AI Property Rental Assistant...\")\n",
" print(\"=\" * 50)\n",
" \n",
" website_url = \"https://www.onthemarket.com/to-rent/property/durham/\"\n",
" print(f\"🔍 Scraping properties from: {website_url}\")\n",
" \n",
" website = Website(website_url)\n",
" print(f\"Website Title: {website.title}\")\n",
" print(f\"Content Length: {len(website.text)} characters\")\n",
" print(f\"Successfully scraped property listings\\n\")\n",
" \n",
" user_needs = \"I'm a student looking for a 2-bedroom house in Durham under £2,000/month\"\n",
" print(f\"User Requirements: {user_needs}\\n\")\n",
" \n",
" user_prompt = user_prompt_for_renting(website, user_needs)\n",
" print(\"Generating AI recommendations...\")\n",
" \n",
" # Choose which method to use (comment out the one you don't want)\n",
" \n",
" # Method 1: Using ollama Python library\n",
" output = house_renting_ollama(system_prompt, user_prompt)\n",
" \n",
" # Method 2: Using direct API call\n",
" # output = house_renting(system_prompt, user_prompt)\n",
" \n",
" display(Markdown(output))"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python [conda env:llms]",
"language": "python",
"name": "conda-env-llms-py"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.13"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@@ -0,0 +1,256 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 3,
"id": "e0ab4a60-bc68-446d-ae13-6bd90d54ae44",
"metadata": {},
"outputs": [],
"source": [
"# imports\n",
"import os\n",
"from dotenv import load_dotenv\n",
"import requests\n",
"from bs4 import BeautifulSoup\n",
"from IPython.display import Markdown, display\n",
"from openai import OpenAI"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "749afaa0-a82e-4783-91fc-f69756075606",
"metadata": {},
"outputs": [],
"source": [
"# A class to represent a Webpage\n",
"\n",
"# Some websites need you to use proper headers when fetching them:\n",
"headers = {\n",
" \"User-Agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36\"\n",
"}\n",
"\n",
"class Website:\n",
"\n",
" def __init__(self, url):\n",
" \"\"\"\n",
" Create this Website object from the given url using the BeautifulSoup library\n",
" \"\"\"\n",
" self.url = url\n",
" response = requests.get(url, headers=headers)\n",
" soup = BeautifulSoup(response.content, 'html.parser')\n",
" self.title = soup.title.string if soup.title else \"No title found\"\n",
" for irrelevant in soup.body([\"script\", \"style\", \"img\", \"input\"]):\n",
" irrelevant.decompose()\n",
" self.text = soup.body.get_text(separator=\"\\n\", strip=True)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "7e760d9c-d899-49e5-8b8f-c202794486cc",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"API key found and looks good so far!\n"
]
}
],
"source": [
"# Load environment variables in a file called .env\n",
"\n",
"load_dotenv(override=True)\n",
"api_key = os.getenv('OPENAI_API_KEY')\n",
"\n",
"# Check the key\n",
"\n",
"if not api_key:\n",
" print(\"No API key was found - please head over to the troubleshooting notebook in this folder to identify & fix!\")\n",
"elif not api_key.startswith(\"sk-proj-\"):\n",
" print(\"An API key was found, but it doesn't start sk-proj-; please check you're using the right key - see troubleshooting notebook\")\n",
"elif api_key.strip() != api_key:\n",
" print(\"An API key was found, but it looks like it might have space or tab characters at the start or end - please remove them - see troubleshooting notebook\")\n",
"else:\n",
" print(\"API key found and looks good so far!\")"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "8efb8bb3-9be9-404b-aff5-306db64a75e7",
"metadata": {},
"outputs": [],
"source": [
"openai = OpenAI()"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "cf677c78-012c-4b86-a76c-be47ed3cb987",
"metadata": {},
"outputs": [],
"source": [
"# Define our system prompt - you can experiment with this later, changing the last sentence to 'Respond in markdown in Spanish.\"\n",
"\n",
"system_prompt = \"You are an assistant that analyzes the \\\n",
"the dashboard in a website and provides a short executive summary, ignoring text that might be navigation related. \\\n",
"Respond in markdown.\""
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "970a493a-880a-4206-9609-eee0651aa91f",
"metadata": {},
"outputs": [],
"source": [
"# A function that writes a User Prompt that asks for summaries of websites:\n",
"\n",
"def user_prompt_for(website):\n",
" user_prompt = f\"You are looking at a website titled {website.title}\"\n",
" user_prompt += \"\\nPlease provide a detailed summary of the report for the year in markdown for its user (CFO); \\\n",
"The summary should be in a suitable form which could be sent through a mail for the exective.\\n\\n\"\n",
" user_prompt += website.text\n",
" return user_prompt"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "2520cdd1-4755-4c87-854f-430e81dbc3fc",
"metadata": {},
"outputs": [],
"source": [
"def messages_for(website):\n",
" return [\n",
" {\"role\": \"system\", \"content\": system_prompt},\n",
" {\"role\": \"user\", \"content\": user_prompt_for(website)}\n",
" ]"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "7452990b-352b-43cc-adc6-4307d6d5c1d5",
"metadata": {},
"outputs": [],
"source": [
"def summarize(url):\n",
" website = Website(url)\n",
" response = openai.chat.completions.create(\n",
" model = \"gpt-4o-mini\",\n",
" messages = messages_for(website)\n",
" )\n",
" return response.choices[0].message.content"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "7372da3c-f3c7-455b-825e-f54d3b0cee68",
"metadata": {},
"outputs": [],
"source": [
"def display_summary(url):\n",
" summary = summarize(url)\n",
" display(Markdown(summary))"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "be7d57dc-bec1-4771-9d15-d80fd4d3fbb5",
"metadata": {},
"outputs": [
{
"data": {
"text/markdown": [
"# Executive Summary: Revenue & Profitability Dashboard\n",
"\n",
"**To:** [CFO Name] \n",
"**From:** [Your Name] \n",
"**Date:** [Current Date] \n",
"**Subject:** Yearly Analysis of Revenue & Profitability \n",
"\n",
"---\n",
"\n",
"Dear [CFO Name],\n",
"\n",
"I am pleased to present the Year-over-Year analysis derived from the Revenue & Profitability Dashboard. This dashboard has been designed to provide concise insights into our core financial performance metrics, enabling data-driven decision-making at the executive level.\n",
"\n",
"### Key Metrics Overview:\n",
"- **Revenue**: Comprehensive insights into total revenue across various regions and product categories, indicating sustainable growth patterns.\n",
"- **Profit**: Detailed profitability analysis segmented by customer groups, revealing key opportunities for margin improvement and cost optimization.\n",
"- **Unit Sales**: Analysis of unit sales trends that highlight product performance and demand fluctuations.\n",
"\n",
"### Insights by Segment:\n",
"- **Regional Performance**: Comparative analysis of revenue and profitability by region helps identify areas of growth and those requiring strategic intervention.\n",
"- **Product Performance**: A focused review of individual product lines shows which offerings are driving profitability and where we might consider realignment or innovation.\n",
"\n",
"### Dashboard Features:\n",
"- A **clean and focused layout** reduces cognitive load, allowing for quick assimilation and understanding of critical data points.\n",
"- **Contextual metrics** that align with our overarching business strategy, ensuring that our analysis supports our organizational goals.\n",
"- **Clear comparison points** are established to aid executives in making informed and timely decisions.\n",
"- Insightful details are presented at both product and regional levels, facilitating targeted strategies for improvement.\n",
"\n",
"### Conclusion:\n",
"The integration of design and context in our dashboard framework turns our data into strategic tools, empowering us to make faster and more informed decisions that drive real business impact.\n",
"\n",
"Please feel free to reach out for a more detailed discussion or specific metrics that may interest you.\n",
"\n",
"Best regards,\n",
"\n",
"[Your Name] \n",
"[Your Position]\n",
"\n",
"--- \n",
"\n",
"*Note: For additional inquiries or insights, feel free to follow our updates on LinkedIn or contact me directly.*"
],
"text/plain": [
"<IPython.core.display.Markdown object>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"display_summary(\"https://community.fabric.microsoft.com/t5/Data-Stories-Gallery/Revenue-amp-Profitability-Dashboard/td-p/4780272\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1aa33cfd-d497-4ab8-abb5-eb4e6030890b",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.13"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@@ -0,0 +1,103 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "d7a6bb51",
"metadata": {},
"outputs": [],
"source": [
"# import library\n",
"from openai import OpenAI\n",
"import os\n",
"from dotenv import load_dotenv\n",
"\n",
"# Load your API key from an .env file\n",
"load_dotenv(override=True)\n",
"api_key = os.getenv('OPENAI_API_KEY')\n",
"openai = OpenAI()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7ac4cdf9",
"metadata": {
"vscode": {
"languageId": "plaintext"
}
},
"outputs": [],
"source": [
"# Step 1: Create your prompts\n",
"system_prompt = \"you are a helpful assistant that suggests an appropriate short subject line for an email based on its contents.\"\n",
"\n",
"user_prompt = \"\"\"\n",
"Hi John,\n",
"I hope this email finds you well. I wanted to follow up on our meeting last week regarding the quarterly budget proposal.\n",
"After reviewing the numbers with my team, we've identified some areas where we can reduce costs by approximately 15% without impacting our core operations. This would involve consolidating some vendor contracts and optimizing our software licensing.\n",
"Could we schedule a meeting next week to discuss these findings in detail? I'm available Tuesday through Thursday afternoon.\n",
"Looking forward to hearing from you.\n",
"\n",
"Best regards,\n",
"Sarah\n",
"\"\"\""
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a77ca09e",
"metadata": {
"vscode": {
"languageId": "plaintext"
}
},
"outputs": [],
"source": [
"# Step 2: Make the messages list\n",
"messages = [\n",
" {\"role\": \"system\", \"content\": system_prompt},\n",
" {\"role\": \"user\", \"content\": user_prompt}\n",
"]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "8404f0fe",
"metadata": {
"vscode": {
"languageId": "plaintext"
}
},
"outputs": [],
"source": [
"# Step 3: Call OpenAI\n",
"response = openai.chat.completions.create(model=\"gpt-4o-mini\", messages=messages)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7a4875f7",
"metadata": {
"vscode": {
"languageId": "plaintext"
}
},
"outputs": [],
"source": [
"# Step 4: Print the result\n",
"print(response.choices[0].message.content)"
]
}
],
"metadata": {
"language_info": {
"name": "python"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@@ -0,0 +1,290 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "135717e7",
"metadata": {
"vscode": {
"languageId": "plaintext"
}
},
"outputs": [],
"source": [
"# imports\n",
"\n",
"import os\n",
"import requests\n",
"from dotenv import load_dotenv\n",
"from bs4 import BeautifulSoup\n",
"from IPython.display import Markdown, display\n",
"from openai import OpenAI\n",
"import ollama"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "29a9e634",
"metadata": {
"vscode": {
"languageId": "plaintext"
}
},
"outputs": [],
"source": [
"# OPTION 1\n",
"# using openai\n",
"\n",
"# message = \"Hello, GPT! This is my first ever message to you! Hi!\"\n",
"# client = OpenAI(base_url=\"http://localhost:11434/v1\", api_key=\"not-needed\")\n",
"# response = openai.chat.completions.create(model=`<name of model>`, messages=[{\"role\":\"user\", \"content\":message}])\n",
"# print(response.choices[0].message.content)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "306993ed",
"metadata": {
"vscode": {
"languageId": "plaintext"
}
},
"outputs": [],
"source": [
"# OPTION 2\n",
"# using Ollama\n",
"\n",
"message = \"Hello, GPT! This is my first ever message to you! Hi!\"\n",
"model=\"llama3\"\n",
"response=ollama.chat(model=model,messages=[{\"role\":\"user\",\"content\":message}])\n",
"print(response[\"message\"][\"content\"])\n"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "856f767b",
"metadata": {
"vscode": {
"languageId": "plaintext"
}
},
"outputs": [],
"source": [
"# A class to represent a Webpage\n",
"# If you're not familiar with Classes, check out the \"Intermediate Python\" notebook\n",
"\n",
"# Some websites need you to use proper headers when fetching them:\n",
"headers = {\n",
" \"User-Agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36\"\n",
"}\n",
"\n",
"class Website:\n",
"\n",
" def __init__(self, url):\n",
" \"\"\"\n",
" Create this Website object from the given url using the BeautifulSoup library\n",
" \"\"\"\n",
" self.url = url\n",
" response = requests.get(url, headers=headers)\n",
" soup = BeautifulSoup(response.content, 'html.parser')\n",
" self.title = soup.title.string if soup.title else \"No title found\"\n",
" for irrelevant in soup.body([\"script\", \"style\", \"img\", \"input\"]):\n",
" irrelevant.decompose()\n",
" self.text = soup.body.get_text(separator=\"\\n\", strip=True)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "4ce558dc",
"metadata": {
"vscode": {
"languageId": "plaintext"
}
},
"outputs": [],
"source": [
"# Let's try one out. Change the website and add print statements to follow along.\n",
"\n",
"ed = Website(\"https://edwarddonner.com\")"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "5e3956f8",
"metadata": {
"vscode": {
"languageId": "plaintext"
}
},
"outputs": [],
"source": [
"# Define our system prompt - you can experiment with this later, changing the last sentence to 'Respond in markdown in Spanish.\"\n",
"\n",
"system_prompt = \"You are an assistant that analyzes the contents of a website \\\n",
"and provides a short summary, ignoring text that might be navigation related. \\\n",
"Respond in markdown.\""
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "99d791b4",
"metadata": {
"vscode": {
"languageId": "plaintext"
}
},
"outputs": [],
"source": [
"# A function that writes a User Prompt that asks for summaries of websites:\n",
"\n",
"def user_prompt_for(website):\n",
" user_prompt = f\"You are looking at a website titled {website.title}\"\n",
" user_prompt += \"\\nThe contents of this website is as follows; \\\n",
"please provide a short summary of this website in markdown. \\\n",
"If it includes news or announcements, then summarize these too.\\n\\n\"\n",
" user_prompt += website.text\n",
" return user_prompt"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "5d89b748",
"metadata": {
"vscode": {
"languageId": "plaintext"
}
},
"outputs": [],
"source": [
"# See how this function creates exactly the format above\n",
"\n",
"def messages_for(website):\n",
" return [\n",
" {\"role\": \"system\", \"content\": system_prompt},\n",
" {\"role\": \"user\", \"content\": user_prompt_for(website)}\n",
" ]"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "9a97d3e2",
"metadata": {
"vscode": {
"languageId": "plaintext"
}
},
"outputs": [],
"source": [
"# And now: call the OpenAI API. You will get very familiar with this!\n",
"\n",
"def summarize(url):\n",
" website = Website(url)\n",
" response=ollama.chat(model=model,messages=messages_for(website))\n",
" return(response[\"message\"][\"content\"])"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ec13fe0a",
"metadata": {
"vscode": {
"languageId": "plaintext"
}
},
"outputs": [],
"source": [
"summarize(\"https://edwarddonner.com\")"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "e3ade092",
"metadata": {
"vscode": {
"languageId": "plaintext"
}
},
"outputs": [],
"source": [
"# A function to display this nicely in the Jupyter output, using markdown\n",
"\n",
"def display_summary(url):\n",
" summary = summarize(url)\n",
" display(Markdown(summary))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "be2d49e6",
"metadata": {
"vscode": {
"languageId": "plaintext"
}
},
"outputs": [],
"source": [
"display_summary(\"https://edwarddonner.com\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1ccbf33b",
"metadata": {
"vscode": {
"languageId": "plaintext"
}
},
"outputs": [],
"source": [
"display_summary(\"https://cnn.com\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ae3d0eae",
"metadata": {
"vscode": {
"languageId": "plaintext"
}
},
"outputs": [],
"source": [
"display_summary(\"https://anthropic.com\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.13"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@@ -0,0 +1,167 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 4,
"id": "9138adfe-71b0-4db2-a08f-dd9e472fdd63",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import boto3"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "15d71dd6-cc03-485e-8a34-7a33ed5dee0e",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"id": "1358921d-173b-4d5d-828c-b6c3726a5eb3",
"metadata": {},
"source": [
"#### Connect to bedrock models"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "b3827087-182f-48be-8b59-b2741f8ded44",
"metadata": {},
"outputs": [],
"source": [
"import json"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "94c11534-6847-4e4a-b8e4-8066e0cc6aca",
"metadata": {},
"outputs": [],
"source": [
"# Use the Conversation API to send a text message to Amazon Nova.\n",
"\n",
"import boto3\n",
"from botocore.exceptions import ClientError\n",
"\n",
"# Create a Bedrock Runtime client in the AWS Region you want to use.\n",
"client = boto3.client(\"bedrock-runtime\", region_name=\"us-east-1\")\n",
"\n",
"# Set the model ID, e.g., Amazon Nova Lite.\n",
"model_id = \"amazon.nova-lite-v1:0\""
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9a8ad65f-abaa-475c-892c-2e2b4e668f5d",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 7,
"id": "ac20bb00-e93f-4a95-a1de-dd2688bce591",
"metadata": {},
"outputs": [],
"source": [
"# Start a conversation with the user message.\n",
"user_message = \"\"\"\n",
"List the best parks to see in London with number of google ratings and value ie. 4.5 out of 5 etc. \n",
"Give number of ratings and give output in table form\n",
"\"\"\""
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "a29f0055-48c4-4f25-b33f-cde1eaf755c5",
"metadata": {},
"outputs": [],
"source": [
"conversation = [\n",
" {\n",
" \"role\": \"user\",\n",
" \"content\": [{\"text\": user_message}],\n",
" }\n",
"]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0e68b2d5-4d43-4b80-8574-d3c847b33661",
"metadata": {},
"outputs": [],
"source": [
"try:\n",
" # Send the message to the model, using a basic inference configuration.\n",
" response = client.converse(\n",
" modelId=model_id,\n",
" messages=conversation,\n",
" inferenceConfig={\"maxTokens\": 512, \"temperature\": 0.5, \"topP\": 0.9},\n",
" )\n",
"\n",
" # Extract and print the response text.\n",
" response_text = response[\"output\"][\"message\"][\"content\"][0][\"text\"]\n",
" print(response_text)\n",
"\n",
"except (ClientError, Exception) as e:\n",
" print(f\"ERROR: Can't invoke '{model_id}'. Reason: {e}\")\n",
" exit(1)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "8ed16ee7-3f09-4780-8dfc-d1c5f3cffdbe",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "7f8c7a18-0907-430d-bfe7-86ecb8933bfd",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "2183994b-cde5-45b0-b18b-37be3277d73b",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.13"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@@ -0,0 +1,104 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "7bb9010e-48a8-491e-a2a9-1a8dacc26f87",
"metadata": {},
"source": [
"# Movie Suggestion using Ollama Running Locally\n",
"\n",
"#### Takes the user input like languages and Genre and suggests Top 10 Movies of the selected attributes.\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ad049302-dce8-4a0a-88ab-e485ac15fbe4",
"metadata": {},
"outputs": [],
"source": [
"import requests\n",
"from IPython.display import display, Markdown\n",
"\n",
"def get_movie_recommendations(language, genre, top_n=10, model='llama3.2'):\n",
" api_url = \"http://localhost:11434/api/generate\"\n",
" prompt = (\n",
" f\"Recommend {top_n} well-rated {language} movies from the {genre} genre. \"\n",
" \"For each movie, provide the name and a 1-2 sentence preview of its story. \"\n",
" \"Return the results as a Markdown table with columns: Title, Short Summary.\"\n",
" )\n",
" data = {\n",
" \"model\": model,\n",
" \"prompt\": prompt,\n",
" \"options\": {\"num_predict\": 800},\n",
" \"stream\": False\n",
" }\n",
" response = requests.post(api_url, json=data)\n",
" # Extract text response (could be markdown table already)\n",
" return response.json().get(\"response\", \"\").strip()"
]
},
{
"cell_type": "markdown",
"id": "01400553-419c-4798-8f19-e32e49379761",
"metadata": {},
"source": [
"#### Enter your Language and Genre"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a7527230-1e10-4b67-94c0-a84519b256c2",
"metadata": {},
"outputs": [],
"source": [
"language = input(\"Enter preferred language (e.g., French, Japanese): \").strip()\n",
"genre = input(\"Enter preferred genre (e.g., Drama, Comedy, Thriller): \").strip()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7ff0146f-b37e-4218-9678-15a40bed3659",
"metadata": {},
"outputs": [],
"source": [
"recommendations_md = get_movie_recommendations(language, genre)\n",
"# This prints out the Markdown table as formatted by the Llama 3.2 model\n",
"from IPython.display import display, Markdown\n",
"\n",
"display(Markdown(recommendations_md))"
]
},
{
"cell_type": "markdown",
"id": "58cc0fa4-a2a6-4597-8ae9-39970fb2a7b5",
"metadata": {},
"source": [
"### The Result will be displayed in a markdown fashion in a neat table with rows and columns."
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.13"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@@ -0,0 +1,56 @@
import os
import openai
from IPython.display import Markdown, display
from dotenv import load_dotenv
from playwright.sync_api import sync_playwright
from bs4 import BeautifulSoup
load_dotenv()
openai.api_key = os.getenv("OPENAI_API_KEY") # Or set it directly
def scrape_website(url):
# Code to scrape a website using Playwright
with sync_playwright() as p:
browser = p.chromium.launch(headless=True)
page = browser.new_page()
page.goto(url)
content = page.content()
browser.close()
return content
def summarize_content(html_content):
#Get only the text parts of the webpage
soup = BeautifulSoup(html_content, 'html.parser')
summary_text = soup.get_text(separator=' ', strip=True)
# Code to summarize using OpenAI API
system_prompt = ("You summarize html content as markdown.")
user_prompt = (
"You are a helpful assistant. Summarize the following HTML webpage content in markdown with simple terms:\n\n"
+ summary_text
)
response = openai.chat.completions.create(
model="gpt-4o",
messages=[{"role": "user", "content": user_prompt}]
)
return response.choices[0].message.content
def save_markdown(summary, filename="summary.md", url=None):
#Open the file summary.md
with open(filename, "w", encoding="utf-8") as f:
if url:
f.write(f"# Summary of [{url}]({url})\n\n")
else:
f.write("# Summary\n\n")
f.write(summary.strip())
# 4. Main Logic
def main():
url = input("Enter the URL to summarize: ").strip()
html = scrape_website(url)
summary = summarize_content(html)
save_markdown(summary, filename="summary.md", url=url)
print("✅ Summary saved to summary.md")
# 5. Entry Point
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,34 @@
# Summary of [https://www.willwight.com/](https://www.willwight.com/)
# Will Wight - New York Times Best-Selling Author
### Overview
Will Wight is a renowned author known for the "Cradle" series, alongside other works like "The Last Horizon" and "The Traveler's Gate Trilogy." He combines humor and storytelling in his blog and engages actively with his readers.
### Books
- **The Last Horizon**: Currently ongoing series.
- **Cradle**: A 12-book series, now complete.
- **The Traveler's Gate Trilogy**: Completed series.
- **The Elder Empire**: Consists of two trilogies with stories happening simultaneously, totaling 6 books.
### Recent Highlights
- **The Pilot Release**: The fourth book in "The Last Horizon" series, celebrated on July 4th, 2025. The 26th book by Will, marking a milestone as his next book will be his 27th.
- **Barnes & Noble Success**: A significant achievement of getting Will's books stocked nationwide in Barnes & Noble, marking a breakthrough for indie publishing.
### Blog Highlights
- Will shares personal anecdotes and behind-the-scenes insights into his creative process.
- A humorous tone is used, including whimsical stories about his life and writing challenges.
- Recent experiences at Epic Universe theme park with thoughts on its design and offerings.
### Connect
- **Mailing List**: Over 15,000 fans subscribe to receive updates on new stories and releases.
- **Hidden Gnome Publishing**: The entity behind Will's publications, working to bring his books to wider audiences.
### Extras
- **Merch**: Available for fans wanting to support and connect with Will's universe.
- **Podcast**: Offers sneak peeks, discussions, and insights into Will's works.
### Humorous Note
Will humorously describes himself transforming into a "monstrous mongoose" during a full moon, adding a quirky touch to his persona.
For more detailed information on books, blogs, and extras, visit Will's website and explore his engaging world of storytelling!

View File

@@ -0,0 +1,181 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "bbd8585e-0a28-4fd9-80b5-690569f93e16",
"metadata": {},
"outputs": [],
"source": [
"#This notebook will help you to get top tech products with by providing category and subcategory"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "df039118-f462-4a8b-949e-53d3a726e292",
"metadata": {},
"outputs": [],
"source": [
"# imports\n",
"\n",
"import os\n",
"import requests\n",
"from dotenv import load_dotenv\n",
"from bs4 import BeautifulSoup\n",
"from IPython.display import Markdown, display\n",
"from openai import OpenAI\n",
"aa"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e2ffd2e5-d061-446c-891e-15a6d1958ab6",
"metadata": {},
"outputs": [],
"source": [
"# Load environment variables in a file called .env\n",
"\n",
"load_dotenv(override=True)\n",
"api_key = os.getenv('OPENAI_API_KEY')\n",
"\n",
"# Check the key\n",
"\n",
"if not api_key:\n",
" print(\"No API key was found - please head over to the troubleshooting notebook in this folder to identify & fix!\")\n",
"elif not api_key.startswith(\"sk-proj-\"):\n",
" print(\"An API key was found, but it doesn't start sk-proj-; please check you're using the right key - see troubleshooting notebook\")\n",
"elif api_key.strip() != api_key:\n",
" print(\"An API key was found, but it looks like it might have space or tab characters at the start or end - please remove them - see troubleshooting notebook\")\n",
"else:\n",
" print(\"API key found and looks good so far!\")\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "92e26007-521f-4ea2-9df9-edd77dd7e183",
"metadata": {},
"outputs": [],
"source": [
"openai = OpenAI()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "27d21593-8feb-42e4-bbc0-2e949b51137d",
"metadata": {},
"outputs": [],
"source": [
"def tech_product(category_subcategory_budget):\n",
" parts = category_subcategory_budget.split('_')\n",
" return f\"{parts[0]}-{parts[1]}-{parts[2]}\""
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "dd978d25-5b84-4122-af7c-116f2bf72179",
"metadata": {},
"outputs": [],
"source": [
"def messages_for(products):\n",
" return [\n",
" {\"role\": \"system\", \"content\": \"you are a tech product expert and you need to suggest the best suited product available in India basis the input received in the form of category-subcategory-budget (in inr),\\\n",
" revert with category and subcategory and show the product links as well along with pros and cons, respond in markdown\"},\n",
" {\"role\": \"user\", \"content\": tech_product(products)}\n",
" ]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b916db7a-81a4-41d9-87c2-a2346fd874d2",
"metadata": {},
"outputs": [],
"source": [
"messages_for(\"phone_gaming_40000\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3b4bb3f1-95de-4eb5-afe1-068744f93301",
"metadata": {},
"outputs": [],
"source": [
"def get_top_products(category_subcategory):\n",
" response = openai.chat.completions.create(\n",
" model = \"gpt-4o-mini\",\n",
" messages= messages_for(category_subcategory)\n",
" )\n",
" return response.choices[0].message.content \n",
" "
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c9272942-acfe-4fca-bd0a-3435c1ee6691",
"metadata": {},
"outputs": [],
"source": [
"get_top_products('phone_gaming_30000')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "2c2b3b9a-aceb-4f00-8c8d-8f6837ab94fc",
"metadata": {},
"outputs": [],
"source": [
"def display_markdown(category_subcategory_budget):\n",
" output = get_top_products(category_subcategory_budget)\n",
" display(Markdown(output))\n",
" "
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6c135dd7-4ed4-48ee-ba3f-9b4ca1c32149",
"metadata": {},
"outputs": [],
"source": [
"display_markdown('Console_gaming_100000')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0ba06c55-7ef9-47eb-aeaf-3c4a7b29bccc",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.13"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@@ -0,0 +1,223 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "fdc2f470",
"metadata": {},
"outputs": [],
"source": [
"# imports\n",
"\n",
"import os\n",
"import requests\n",
"from dotenv import load_dotenv\n",
"from bs4 import BeautifulSoup\n",
"from IPython.display import Markdown, display\n",
"from openai import OpenAI\n",
"\n",
"# If you get an error running this cell, then please head over to the troubleshooting notebook!"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "5f0fbd79",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"API key found and looks good so far!\n"
]
}
],
"source": [
"# Load environment variables in a file called .env\n",
"\n",
"load_dotenv(override=True)\n",
"api_key = os.getenv('OPENAI_API_KEY')\n",
"\n",
"# Check the key\n",
"\n",
"if not api_key:\n",
" print(\"No API key was found - please head over to the troubleshooting notebook in this folder to identify & fix!\")\n",
"elif not api_key.startswith(\"sk-proj-\"):\n",
" print(\"An API key was found, but it doesn't start sk-proj-; please check you're using the right key - see troubleshooting notebook\")\n",
"elif api_key.strip() != api_key:\n",
" print(\"An API key was found, but it looks like it might have space or tab characters at the start or end - please remove them - see troubleshooting notebook\")\n",
"else:\n",
" print(\"API key found and looks good so far!\")\n"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "b771480a",
"metadata": {},
"outputs": [],
"source": [
"openai = OpenAI()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "0e97974c",
"metadata": {},
"outputs": [],
"source": [
"# A class to represent a Webpage\n",
"# If you're not familiar with Classes, check out the \"Intermediate Python\" notebook\n",
"\n",
"# Some websites need you to use proper headers when fetching them:\n",
"headers = {\n",
" \"User-Agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36\"\n",
"}\n",
"\n",
"class Website:\n",
"\n",
" def __init__(self, url):\n",
" \"\"\"\n",
" Create this Website object from the given url using the BeautifulSoup library\n",
" \"\"\"\n",
" self.url = url\n",
" response = requests.get(url, headers=headers)\n",
" soup = BeautifulSoup(response.content, 'html.parser')\n",
" self.title = soup.title.string if soup.title else \"No title found\"\n",
" for irrelevant in soup.body([\"script\", \"style\", \"img\", \"input\"]):\n",
" irrelevant.decompose()\n",
" self.text = soup.body.get_text(separator=\"\\n\", strip=True)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "2ec62fb3",
"metadata": {},
"outputs": [],
"source": [
"# Define our system prompt - you can experiment with this later, changing the last sentence to 'Respond in markdown in Spanish.\"\n",
"\n",
"system_prompt = \"You are an assistant that analyzes the contents of a website \\\n",
"and provides a list of the flights available according to what user asks for, ignoring text that might be navigation related. \\\n",
"Respond in markdown.\""
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "7a93a605",
"metadata": {},
"outputs": [],
"source": [
"# A function that writes a User Prompt that asks for summaries of websites:\n",
"\n",
"def user_prompt_for(website):\n",
" user_prompt = f\"You are looking at a website titled {website.title}\"\n",
" user_prompt += \"\\nThe contents of this website is as follows; \\\n",
"please provide a list of all the flights available in a table format in markdown. The columns of the table should be - Flight carrier, Flight Dat and times, Fare, No. of stops. \\\n",
"Provide exact flight carriers. If it includes ads or offers, then summarize these too.\\n\\n\"\n",
" user_prompt += website.text\n",
" return user_prompt"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "597646e5",
"metadata": {},
"outputs": [],
"source": [
"def fetch_flights(from_tx,to_tx, date_from,date_to=''):\n",
" website = Website(f\"https://www.ca.kayak.com/flights/{from_tx}-{to_tx}/{date_from}/{date_to}\")\n",
" user_prompt = user_prompt_for(website)\n",
" messages = [{\"role\":\"system\",\"content\":system_prompt},{\"role\":\"user\",\"content\":user_prompt}]\n",
" response = openai.chat.completions.create(model=\"gpt-4o-mini\", messages=messages)\n",
" display(Markdown(response.choices[0].message.content))"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "47ae61f3",
"metadata": {},
"outputs": [
{
"data": {
"text/markdown": [
"Here is the list of available flights from YYZ to DEL on 9/11:\n",
"\n",
"| Flight Carrier | Flight Date and Times | Fare | No. of Stops |\n",
"|----------------|-----------------------|--------|--------------|\n",
"| Air Canada | 9/11, 10:00 AM | C$ 833 | 1 |\n",
"| Lufthansa | 9/11, 5:00 PM | C$ 847 | 2 |\n",
"| Qatar Airways | 9/11, 1:30 PM | C$ 1,559| 1 |\n",
"\n",
"### Summary of Offers\n",
"- The cheapest fare is C$ 833 with a travel time of 23 hours and 35 minutes.\n",
"- The best fare option is C$ 847 with a travel time of 22 hours and 20 minutes.\n",
"- The quickest option is priced at C$ 1,559 with a travel duration of 13 hours and 55 minutes. \n",
"\n",
"*Note: Prices are per person and do not include baggage fees.*"
],
"text/plain": [
"<IPython.core.display.Markdown object>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"fetch_flights('yyz','del','2025-11-09')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3a48ceb6",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "15bb1a04",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "902975bf",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.11"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@@ -0,0 +1,233 @@
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
}
},
"cells": [
{
"cell_type": "markdown",
"source": [
"# **Youtube Informative-video Summerizer**\n",
"\n",
"This python app let's you summerize youtube videos that contains information-sharing-through-talking, like someone talking about a subject, someone sharing a life advice, a podcast etc.\n",
"\n",
"We extract the transcipt analyize it with an LLM to summerize and create summerization and analysis.\n",
"\n",
"\n",
"> We use youtube_transcript_api which allows you to get the transcript text of any youtube video.\n",
"\n",
"> Results however are not ideal for our use case since it does not provide who says what in case of more than one speaker. it only provide one giant string of all the words said in the video respectivly with some noise.\n",
"\n",
"\n",
"\n",
"\n",
"\n",
"\n",
"\n",
"\n"
],
"metadata": {
"id": "4KULQ4rViju1"
}
},
{
"cell_type": "code",
"source": [
"#!pip install youtube-transcript-api"
],
"metadata": {
"id": "C21ZN5MNZ_1b"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"from youtube_transcript_api import YouTubeTranscriptApi\n",
"from youtube_transcript_api.formatters import TextFormatter, SRTFormatter\n",
"import re\n",
"from openai import OpenAI\n",
"from google.colab import userdata # dotenv equevilant for google colab\n",
"from IPython.display import Markdown, display, update_display"
],
"metadata": {
"id": "ttbBAJC7Zrn5"
},
"execution_count": 35,
"outputs": []
},
{
"cell_type": "code",
"source": [
"ytt = YouTubeTranscriptApi()\n",
"formatter = TextFormatter() # --> Plain text\n",
"# formatter = SRTFormatter() # --> With timestamps\n",
"\n",
"openai_api_key = userdata.get('OPENAI_TOKEN')\n",
"openai_client = OpenAI(api_key=openai_api_key)\n",
"MODEL = \"gpt-4o-mini\""
],
"metadata": {
"id": "1oP0uPCylaig"
},
"execution_count": 36,
"outputs": []
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {
"id": "ILPjwpGkZm1t"
},
"outputs": [],
"source": [
"def get_video_id(url):\n",
" \"\"\"Extracts video ID from a YouTube URL.\"\"\"\n",
" regex = r\"(?:v=|\\/)([0-9A-Za-z_-]{11}).*\"\n",
" match = re.search(regex, url)\n",
" if match:\n",
" return match.group(1)\n",
" raise ValueError(\"Invalid YouTube URL\")\n",
"\n",
"\n",
"def get_transcript(url):\n",
" video_id = get_video_id(url)\n",
" fetched_transcript = ytt.fetch(video_id)\n",
" # ^ defaults to English transcript, for other language use:\n",
" # fetched = ytt.fetch(video_id, languages=['de', 'en'])\n",
" transcript_text = formatter.format_transcript(fetched_transcript)\n",
" return transcript_text"
]
},
{
"cell_type": "code",
"source": [
"system_prompt = \"\"\"You are an expert assistant specialized in analyzing podcast transcripts. You will be given the full transcript of a YouTube podcast episode.\n",
"\n",
"Your task is to extract and summarize the main views or arguments presented in the podcast. For each view or argument, also identify and list any supporting evidence such as:\n",
"\n",
"- Facts or statistics\n",
"- Academic studies or research\n",
"- Theories or philosophical frameworks\n",
"- Anecdotes or personal experiences\n",
"- Expert opinions or quotes\n",
"\n",
"Recognize off topic segments and adds and igrone them.\n",
"\n",
"Structure your output in a clear and concise format.\n",
"\n",
"Output Format:\n",
"\n",
"Podcast Summary:\n",
"\n",
"1. View/Argument:\n",
" - Description: [Summarize the view or claim in 1-2 sentences.]\n",
" - Supporting Evidence:\n",
" • [Fact, study, or reasoning #1]\n",
" • [Fact, study, or reasoning #2]\n",
" • [Optional counterarguments or nuances, if any]\n",
"\n",
"2. View/Argument:\n",
" - Description: [...]\n",
" - Supporting Evidence:\n",
" • [...]\n",
"\n",
"Guidelines:\n",
"- Only include major views or arguments that are discussed in depth.\n",
"- Paraphrase in clear, neutral, and objective language.\n",
"- Do not include filler, small talk, or off-topic segments.\n",
"- If a claim lacks explicit evidence, note it as “No clear supporting evidence provided.”\n",
"\n",
"Always respond and orginize your response using Markdow.\n",
"\"\"\"\n"
],
"metadata": {
"id": "Ye3m_3lEejb_"
},
"execution_count": 38,
"outputs": []
},
{
"cell_type": "code",
"source": [
"def get_user_prompt(title,url):\n",
" prompt = f\"Following is a transcript for a podcast titled '{title}' \\n\"\n",
" prompt += \"Carefully read through this content, analyse and summerize it as told, respond in Markdown.\"\n",
" prompt += \"\\nTranscript: \\n\\n\"\n",
" prompt += get_transcript(url)\n",
" return prompt"
],
"metadata": {
"id": "1jk6YbkpupqI"
},
"execution_count": 39,
"outputs": []
},
{
"cell_type": "code",
"source": [
"# user_prompt = get_user_prompt()\n",
"def summerize_video(title,url):\n",
" user_prompt = get_user_prompt(title,url)\n",
" stream = openai_client.chat.completions.create(\n",
" model=MODEL,\n",
" messages=[\n",
" {\"role\": \"system\", \"content\": system_prompt},\n",
" {\"role\": \"user\", \"content\": user_prompt},\n",
" ],\n",
" stream = True,\n",
" )\n",
"\n",
" response = \"\"\n",
" display_handle = display(Markdown(\"\"), display_id=True)\n",
" for chunk in stream:\n",
" response += chunk.choices[0].delta.content or ''\n",
" response = response.replace(\"```\",\"\").replace(\"markdown\", \"\")\n",
" update_display(Markdown(response), display_id=display_handle.display_id)"
],
"metadata": {
"id": "wJy0Qb8u9uqR"
},
"execution_count": 40,
"outputs": []
},
{
"cell_type": "code",
"source": [
"summerize_video(\"Anti-Aging Expert: Missing This Vitamin Is As Bad As Smoking! The Truth About Creatine!\",\"https://www.youtube.com/watch?v=JCTb3QSrGMQ\")"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 1000
},
"id": "tbvBiPrv_O3i",
"outputId": "69d24254-e384-4b07-e35f-96c7bb733298"
},
"execution_count": 41,
"outputs": [
{
"output_type": "display_data",
"data": {
"text/plain": [
"<IPython.core.display.Markdown object>"
],
"text/markdown": "# Podcast Summary: \"Anti-Aging Expert: Missing This Vitamin Is As Bad As Smoking! The Truth About Creatine!\"\n\n1. **View/Argument: Vitamin D and Health Risks**\n - **Description:** Vitamin D deficiency significantly increases the risk of dementia and various health issues, yet many individuals are unaware of its critical importance.\n - **Supporting Evidence:**\n - Vitamin D deficiency can raise dementia risk by 80%.\n - Individuals with adequate vitamin D have a 40% reduced risk of dementia and experience better cognitive function.\n\n2. **View/Argument: Role of Lifestyle in Aging**\n - **Description:** Lifestyle choices account for over 70% of aging effects, with exercise and nutrition being key factors in improving longevity and health.\n - **Supporting Evidence:**\n - Studies show participants involved in regular exercise did not experience hippocampal shrinkage, but rather an increase in size.\n - Exercise is equated to a miracle drug for its extensive health benefits, as highlighted by unquantifiable positive effects when compared to medications.\n\n3. **View/Argument: Importance of Magnesium**\n - **Description:** Magnesium is crucial for cellular function, metabolism, and reducing cancer risk, yet nearly half the U.S. population is magnesium deficient.\n - **Supporting Evidence:**\n - Individuals with the highest magnesium levels have a 40% lower all-cause mortality.\n - A 24% increase in pancreatic cancer incidents is associated with every 100 mg decrease in magnesium intake.\n\n4. **View/Argument: Benefits of Creatine in Brain Health**\n - **Description:** Creatine isn't just beneficial for muscle health but also shows promise for enhancing cognitive performance, especially under stress or sleep deprivation.\n - **Supporting Evidence:**\n - A study found that creatine can negate cognitive deficits caused by 21 hours of sleep deprivation.\n - Users often report improved focus and energy levels when supplementing with creatine regularly.\n\n5. **View/Argument: Exercise and Hormonal Benefits**\n - **Description:** Regular exercise, especially high-intensity interval training, can reverse heart aging and improve mental health markers.\n - **Supporting Evidence:**\n - Participants in an intensive exercise program showed heart structures that were more akin to those of individuals two decades younger.\n - High-intensity workouts were shown to improve cognition and neuroplasticity due to the metabolic changes they induce.\n\n6. **View/Argument: Impact of Nutrition on Cognitive Function**\n - **Description:** A healthy diet rich in omega-3 fatty acids, vitamins D and other nutrients is essential for maintaining cognitive function and overall health.\n - **Supporting Evidence:**\n - Adequate omega-3 intake has been linked to a 5-year increase in life expectancy.\n - Regular consumption of nutrient-rich foods, such as blueberries and dark leafy greens, supports cognition and potentially reduces the risk of neurodegenerative diseases.\n\n7. **View/Argument: The Importance of Autophagy**\n - **Description:** Fasting promotes autophagy, a cellular cleaning process that can protect against diseases and improve health.\n - **Supporting Evidence:**\n - Studies suggest that fasting for 16 hours can activate autophagy and contribute to cellular repair processes.\n\n8. **View/Argument: Intermittent Fasting and Health Improvements**\n - **Description:** Intermittent fasting can improve metabolic parameters and cognitive performance while providing health benefits beyond simple calorie restriction.\n - **Supporting Evidence:**\n - Individuals practicing intermittent fasting showed improved glucose regulation compared to those restricting calories alone without fasting.\n\n9. **View/Argument: Microplastics and Health Risks**\n - **Description:** The pervasive presence of microplastics in everyday products poses health risks that are not widely recognized.\n - **Supporting Evidence:**\n - Common items, such as paper coffee cups and plastic water bottles, can release harmful chemicals, leading to increased levels of substances like BPA in beverages.\n\nBy summarizing these key points, the podcast emphasizes the interconnectedness of nutrition, exercise, and mental well-being in managing aging and chronic diseases. Additionally, it highlights emerging research on creatine, fasting, and environmental health risks that affect longevity and quality of life."
},
"metadata": {}
}
]
}
]
}

View File

@@ -0,0 +1,210 @@
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[codz]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py.cover
.hypothesis/
.pytest_cache/
cover/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
.pybuilder/
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# UV
# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
#uv.lock
# poetry
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
#poetry.lock
#poetry.toml
# pdm
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
# pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
# https://pdm-project.org/en/latest/usage/project/#working-with-version-control
#pdm.lock
#pdm.toml
.pdm-python
.pdm-build/
# pixi
# Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
#pixi.lock
# Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
# in the .venv directory. It is recommended not to include this directory in version control.
.pixi
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.envrc
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# pytype static type analyzer
.pytype/
# Cython debug symbols
cython_debug/
# PyCharm
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/
# Abstra
# Abstra is an AI-powered process automation framework.
# Ignore directories containing user credentials, local state, and settings.
# Learn more at https://abstra.io/docs
.abstra/
# Visual Studio Code
# Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
# that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
# and can be added to the global gitignore or merged into this file. However, if you prefer,
# you could uncomment the following to ignore the entire vscode folder
# .vscode/
# Ruff stuff:
.ruff_cache/
# PyPI configuration file
.pypirc
# Cursor
# Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to
# exclude from AI features like autocomplete and code analysis. Recommended for sensitive data
# refer to https://docs.cursor.com/context/ignore-files
.cursorignore
.cursorindexingignore
# Marimo
marimo/_static/
marimo/_lsp/
__marimo__/
.*-env

View File

@@ -0,0 +1,207 @@
from ai_core import AICore
from ai_brochure_config import AIBrochureConfig
from extractor_of_relevant_links import ExtractorOfRelevantLinks
from website import Website
from openai.types.responses import Response
from rich.console import Console
from rich.markdown import Markdown
from requests import Session
from concurrent.futures import ThreadPoolExecutor, as_completed
from json import loads
class BrochureCreator(AICore[str]):
"""
Builds a short Markdown brochure for a company or individual by:
- extracting relevant links from the website,
- inferring the entity name and status,
- and prompting the model using the collected page content.
"""
@property
def _website(self) -> Website:
"""Return the main Website instance to analyze."""
return self.__website
@property
def _extractor(self) -> ExtractorOfRelevantLinks:
"""Return the helper responsible for extracting relevant links."""
return self.__extractor
def __init__(self, config: AIBrochureConfig, website: Website) -> None:
"""
Initialize the brochure creator with configuration and target website.
Parameters:
config: AI and runtime configuration.
website: The root website to analyze and summarize.
"""
system_behavior: str = ("You are an assistant that analyzes the contents of several relevant pages from a company website "
"and creates a short brochure about the company for prospective customers, investors and recruits. "
"Include details of company culture, customers and careers/jobs if information is available. ")
super().__init__(config, system_behavior)
self.__website: Website = website
self.__extractor: ExtractorOfRelevantLinks = ExtractorOfRelevantLinks(config, website)
def create_brochure(self) -> str:
"""
Create a short Markdown brochure based on the website's content.
Returns:
A Markdown string with the brochure, or a fallback message if no relevant pages were found.
"""
relevant_pages: list[dict[str, str | Website]] = self._get_relevant_pages()
if not relevant_pages:
return "No relevant pages found to create a brochure."
brochure_prompt_part: str = self._form_brochure_prompt(relevant_pages)
inferred_company_name, inferred_status = self._infer_entity(brochure_prompt_part)
full_brochure_prompt: str = self._form_full_prompt(inferred_company_name, inferred_status)
response: str = self.ask(full_brochure_prompt)
return response
def _get_relevant_pages(self) -> list[dict[str, str | Website]]:
"""
Resolve relevant links into Website objects using a shared session and concurrency.
"""
relevant_pages: list[dict[str, str | Website]] = []
relevant_links: list[dict[str, str]] = self._extractor.extract_relevant_links()["links"]
# Limit the number of pages to fetch to keep latency and token usage reasonable.
MAX_PAGES: int = 6
links_subset = relevant_links[:MAX_PAGES]
def build_page(item: dict[str, str], session: Session) -> dict[str, str | Website] | None:
try:
url = str(item["url"])
page_type = str(item["type"])
return {"type": page_type, "page": Website(url, session=session)}
except Exception:
return None
with Session() as session, ThreadPoolExecutor(max_workers=4) as executor:
futures = [executor.submit(build_page, link, session) for link in links_subset]
for fut in as_completed(futures):
res = fut.result()
if res:
relevant_pages.append(res)
return relevant_pages
def _truncate_text(self, text: str, limit: int) -> str:
"""
Truncate text to 'limit' characters to reduce tokens and latency.
"""
if len(text) <= limit:
return text
return text[: max(0, limit - 20)] + "... [truncated]"
def _form_brochure_prompt(self, relevant_pages: list[dict[str, str | Website]]) -> str:
"""
Assemble a prompt that includes the main page and relevant pages' titles and text.
Parameters:
relevant_pages: List of page descriptors returned by _get_relevant_pages.
Returns:
A prompt string containing quoted sections per page.
"""
QUOTE_DELIMITER: str = "\n\"\"\"\n"
MAX_MAIN_CHARS = 6000
MAX_PAGE_CHARS = 3000
prompt: str = (
f"Main page:{QUOTE_DELIMITER}"
f"Title: {self._website.title}\n"
f"Text:\n{self._truncate_text(self._website.text, MAX_MAIN_CHARS)}{QUOTE_DELIMITER}\n"
)
for page in relevant_pages:
if isinstance(page['page'], Website) and not page['page'].fetch_failed:
prompt += (
f"{page['type']}:{QUOTE_DELIMITER}"
f"Title: {page['page'].title}\n"
f"Text:\n{self._truncate_text(page['page'].text, MAX_PAGE_CHARS)}{QUOTE_DELIMITER}\n"
)
return prompt
def _infer_entity(self, brochure_prompt_part: str) -> tuple[str, str]:
"""
Infer both the entity name and status in a single model call to reduce latency.
Returns:
(name, status) where status is 'company' or 'individual'.
"""
prompt = (
"From the following website excerpts, infer the entity name and whether it is a company or an individual. "
"Respond strictly as JSON with keys 'name' and 'status' (status must be 'company' or 'individual').\n"
f"{brochure_prompt_part}"
)
raw = self.ask(prompt)
try:
data: dict[str, str] = loads(raw)
name: str = str(data.get("name", "")).strip() or "Unknown"
status: str = str(data.get("status", "")).strip().lower()
if status not in ("company", "individual"):
status = "company"
return name, status
except Exception:
# Fallback: use entire output as name, assume company
return raw.strip() or "Unknown", "company"
def _form_full_prompt(self, inferred_company_name: str, inferred_status: str) -> str:
"""
Build the final brochure-generation prompt using the inferred entity and prior history.
Parameters:
inferred_company_name: The inferred entity name.
inferred_status: Either 'company' or 'individual'.
Returns:
A final prompt instructing the model to produce a Markdown brochure.
"""
full_prompt: str = (f"You are looking at a {inferred_status} called {inferred_company_name}, to whom website {self._website.website_url} belongs.\n"
f"Build a short brochure about the {inferred_status}. Use the information from the website that is already stored in the history.\n"
"Your response must be in a Markdown format.")
return full_prompt
def ask(self, question: str) -> str:
"""
Send a question to the model, update chat history, and return the text output.
Parameters:
question: The user prompt.
Returns:
The model output text.
"""
self.history_manager.add_user_message(question)
response: Response = self._ai_api.responses.create(
model=self.config.model_name,
instructions=self.history_manager.system_behavior,
input=self.history_manager.chat_history,
reasoning={ "effort": "low" }
)
self.history_manager.add_assistant_message(response)
return response.output_text
console: Console = Console()
def display_markdown(content: str) -> None:
"""
Render Markdown content to the console using rich.
"""
console.print(Markdown(content))
def show_summary(summary: str) -> None:
"""
Print a Markdown summary if provided; otherwise print a fallback message.
"""
if summary:
display_markdown(summary)
else:
console.print("No summary found.")
if __name__ == "__main__":
website: Website = Website("<put your site address here>")
brochure_creator: BrochureCreator = BrochureCreator(AIBrochureConfig(), website)
brochure: str = brochure_creator.create_brochure()
display_markdown(brochure)

View File

@@ -0,0 +1,59 @@
import os
from dotenv import load_dotenv
class AIBrochureConfig:
"""
Configuration class to load environment variables.
"""
def __get_config_value(self, key: str):
"""
Get the value of an environment variable.
"""
if not key:
raise ValueError("Key must be provided")
value: str | None = os.getenv(key)
if not value:
raise ValueError(f"Environment variable '{key}' not found")
return value
def _get_str(self, key: str) -> str:
"""
Get a string value from the environment variables.
"""
return self.__get_config_value(key)
def _get_int(self, key: str) -> int:
"""
Get an integer value from the environment variables.
"""
value = self.__get_config_value(key)
try:
return int(value)
except ValueError:
raise ValueError(f"Environment variable '{key}' must be an integer")
@property
def openai_api_key(self) -> str:
"""
Get the OpenAI API key from the environment variables.
"""
if self.__openai_api_key == "":
self.__openai_api_key = self._get_str("OPENAI_API_KEY")
return self.__openai_api_key
@property
def model_name(self) -> str:
"""
Get the model name from the environment variables.
"""
if self.__model_name == "":
self.__model_name = self._get_str("MODEL_NAME")
return self.__model_name
def __init__(self) -> None:
load_dotenv(dotenv_path=".env")
self.__openai_api_key: str = ""
self.__model_name: str = ""

View File

@@ -0,0 +1,181 @@
import openai
from abc import ABC, abstractmethod
from ai_brochure_config import AIBrochureConfig
from typing import Any, cast, Generic, TypeVar
from openai.types.responses import ResponseInputItemParam, Response, ResponseOutputMessage
TAiResponse = TypeVar('TAiResponse', default=Any)
class HistoryManager:
"""
Manage chat history and system behavior for a conversation with the model.
"""
@property
def chat_history(self) -> list[ResponseInputItemParam]:
"""
Return the accumulated conversation as a list of response input items.
"""
return self.__chat_history
@property
def system_behavior(self) -> str:
"""
Return the system behavior (instructions) used for this conversation.
"""
return self.__system_behavior
def __init__(self, system_behavior: str) -> None:
"""
Initialize the history manager.
Parameters:
system_behavior: The system instruction string for the conversation.
"""
self.__chat_history: list[ResponseInputItemParam] = []
self.__system_behavior: str = system_behavior
def add_user_message(self, message: str) -> None:
"""
Append a user message to the chat history.
Parameters:
message: The user text to add.
"""
self.__chat_history.append({
"role": "user",
"content": [{"type": "input_text", "text": message}],
})
def add_assistant_message(self, output_message: Response) -> None:
"""
Append the assistant's output to the chat history.
Parameters:
output_message: The model response to convert and store.
"""
for out in output_message.output:
# Convert the Pydantic output model to an input item shape
self.__chat_history.append(
cast(ResponseInputItemParam, out.model_dump(exclude_unset=True))
)
class AICore(ABC, Generic[TAiResponse]):
"""
Abstract base class for AI core functionalities.
"""
@property
def config(self) -> AIBrochureConfig:
"""
Return the stored AIBrochureConfig for this instance.
Returns:
AIBrochureConfig: The current configuration used by this object.
Notes:
- This accessor returns the internal configuration reference. Mutating the returned
object may affect the internal state of this instance.
- To change the configuration, use the appropriate setter or factory method rather
than modifying the returned value in-place.
"""
return self.__config
@config.setter
def config(self, config: AIBrochureConfig | None) -> None:
"""
Set the instance configuration for the AI brochure generator.
Parameters
----------
config : AIBrochureConfig | None
The configuration to assign to the instance. If None, the instance's
configuration will be reset to a newly created default AIBrochureConfig.
Returns
-------
None
Notes
-----
This method stores the provided configuration on a private attribute
"""
if config is None:
self.__config = AIBrochureConfig()
else:
self.__config = config
@property
def _ai_api(self) -> openai.OpenAI:
"""
Return the cached OpenAI API client, initializing it on first access.
This private helper lazily constructs and caches an openai.OpenAI client using
the API key found on self.config.openai_api_key. On the first call, if the
client has not yet been created, the method verifies that self.config is set,
creates the client with openai.OpenAI(api_key=...), stores it on
self.__ai_api, and returns it. Subsequent calls return the same cached
instance.
Returns:
openai.OpenAI: A configured OpenAI API client.
Raises:
ValueError: If self.config is None when attempting to initialize the client.
Notes:
- The method mutates self.__ai_api as a side effect (caching).
- The caller should treat this as a private implementation detail.
- Thread safety is not guaranteed; concurrent initialization may result in
multiple client instances if invoked from multiple threads simultaneously.
"""
if self.__ai_api is None:
if self.config is None:
raise ValueError("Configuration must be set before accessing AI API")
self.__ai_api = openai.OpenAI(api_key=self.config.openai_api_key)
return self.__ai_api
@property
def history_manager(self) -> HistoryManager:
"""
Return the history manager for this AI core instance.
This property provides access to the HistoryManager that tracks the chat
history and system behavior.
Returns:
HistoryManager: The current history manager. This property always returns
a HistoryManager instance and never None.
"""
return self.__history_manager
def __init__(self, config: AIBrochureConfig, system_behavior: str) -> None:
"""
Initializes the AI core with the provided configuration.
Parameters:
config (AIBrochureConfig): The configuration object for the AI core.
system_behavior (str): The behavior of the system.
"""
# Initialize all instance-level attributes here
self.__config: AIBrochureConfig = config
self.__history_manager: HistoryManager = HistoryManager(system_behavior)
self.__ai_api: openai.OpenAI | None = None
if __debug__:
# Sanity check: confirm attributes are initialized
assert hasattr(self, "_AICore__config")
assert hasattr(self, "_AICore__history_manager")
assert hasattr(self, "_AICore__ai_api")
@abstractmethod
def ask(self, question: str) -> TAiResponse:
"""
Ask a question to the AI model.
Parameters:
question: The question to ask.
Returns:
TAiResponse: The model's response type defined by the subclass.
"""
pass

View File

@@ -0,0 +1,91 @@
from ai_brochure_config import AIBrochureConfig
from website import Website
from ai_core import AICore
from openai.types.responses import Response
from json import loads
RelevantLinksDict = dict[str, list[dict[str, str]]]
class ExtractorOfRelevantLinks(AICore[RelevantLinksDict]):
"""
Extractor for relevant links from a website.
"""
@property
def website(self) -> Website:
"""Return the root Website whose links are being analyzed."""
return self.__website
def __init__(self, config: AIBrochureConfig, website: Website) -> None:
"""
Initialize the extractor with configuration and target website.
Parameters:
config: AI and runtime configuration.
website: The Website from which links were collected.
"""
system_behavior: str = ("You are an expert in creation of online advertisement materials."
"You are going to be provided with a list of links found on a website."
"You are able to decide which of the links would be most relevant to include in a brochure about the company,"
"such as links to an About page or a Company page or Careers/Jobs pages.\n"
"You should respond in JSON as in this example:")
system_behavior += """
{
"links": [
{"type": "about page", "url": "https://www.example.com/about"},
{"type": "company page", "url": "https://www.another_example.net/company"},
{"type": "careers page", "url": "https://ex.one_more_example.org/careers"}
]
}
"""
super().__init__(config, system_behavior)
self.__website: Website = website
def get_links_user_prompt(self) -> str:
"""
Build a user prompt listing discovered links and instructions for relevance filtering.
Returns:
A string to send to the model listing links and guidance.
"""
starter_part: str = (f"Here is a list of links found on the website of {self.website.website_url} - "
"please decide which of these links are relevant web links for a brochure about company."
"Respond with full HTTPS URLs. Avoid including Terms of Service, Privacy, email links.\n"
"Links (some might be relative links):\n")
links_part: str = "\n".join(f"- {link}" for link in self.website.links_on_page) if self.website.links_on_page else "No links found."
return starter_part + links_part
def extract_relevant_links(self) -> RelevantLinksDict:
"""
Request the model to select relevant links for brochure creation.
Returns:
A dictionary with a 'links' array containing objects with 'type' and 'url'.
"""
user_prompt = self.get_links_user_prompt()
response = self.ask(user_prompt)
return response
def ask(self, question: str) -> RelevantLinksDict:
"""
Send a question to the model and parse the JSON response.
Parameters:
question: The prompt to submit.
Returns:
RelevantLinksDict: Parsed JSON containing selected links.
"""
self.history_manager.add_user_message(question)
response: Response = self._ai_api.responses.create(
model=self.config.model_name,
instructions=self.history_manager.system_behavior,
reasoning={ "effort": "low" },
input=self.history_manager.chat_history
)
self.history_manager.add_assistant_message(response)
return loads(response.output_text)

View File

@@ -0,0 +1,5 @@
python-dotenv
openai
bs4
requests
rich

View File

@@ -0,0 +1,286 @@
from ipaddress import ip_address, IPv4Address, IPv6Address
from urllib.parse import ParseResult, urlparse
from bs4 import BeautifulSoup, Tag
from requests import get, RequestException, Session
class Extractor:
"""
Extracts and processes content from HTML response text using BeautifulSoup.
"""
__soup: BeautifulSoup
__extracted_title: str = ""
@property
def extracted_title(self) -> str:
"""
Returns the extracted title from the HTML content.
"""
if not self.__extracted_title:
self.__extracted_title = self.get_title()
return self.__extracted_title
__extracted_text: str = ""
@property
def extracted_text(self) -> str:
"""
Returns the extracted main text content from the HTML, excluding irrelevant tags.
"""
if not self.__extracted_text:
self.__extracted_text = self.get_text()
return self.__extracted_text
__extracted_links_on_page: list[str] | None = None
@property
def extracted_links_on_page(self) -> list[str]:
"""
Return all href values found on the page.
Notes:
- Only anchor tags with an href are included.
- Values are returned as-is (may be relative or absolute).
"""
if self.__extracted_links_on_page is None:
self.__extracted_links_on_page = [str(a.get("href")) for a in self._soup.find_all('a', href=True) if isinstance(a, Tag)]
return self.__extracted_links_on_page
@property
def _soup(self) -> BeautifulSoup:
"""
Returns the BeautifulSoup object for the HTML content.
"""
return self.__soup
def __init__(self, response_text_content: str) -> None:
"""
Initializes the Extractor with HTML response text.
Parameters:
response_text_content (str): The HTML response text to be processed.
"""
self.__soup = BeautifulSoup(response_text_content, "html.parser")
self.__extracted_links_on_page = None
def get_title(self) -> str:
"""
Extracts the title from the HTML content.
"""
return self._soup.title.get_text() if self._soup.title is not None else "No title"
def get_text(self) -> str:
"""
Extracts and cleans the main text content from the HTML, removing irrelevant tags.
"""
for irrelevant in self._soup.find_all(["script", "style", "img", "figure", "video", "audio", "button", "svg", "canvas", "input", "form", "meta"]):
irrelevant.decompose()
raw_text: str = self._soup.get_text(separator="\n")
cleaned_text: str = " ".join(raw_text.split())
return cleaned_text if cleaned_text else "No content"
class Website:
"""
A class to represent a website.
"""
__DEFAULT_ALLOWED_DOMAINS: list[str] = [".com", ".org", ".net"]
__title: str = ""
__website_url: str = ""
__text: str = ""
__allowed_domains: list[str] = []
__links_on_page: list[str] | None = None
@property
def title(self) -> str:
"""
Returns the title of the website.
"""
return self.__title
@property
def text(self) -> str:
"""
Returns the main text content of the website.
"""
return self.__text
@property
def website_url(self) -> str:
"""
Returns the URL of the website.
"""
return self.__website_url
@property
def links_on_page(self) -> list[str] | None:
"""
Returns the list of links extracted from the website.
"""
return self.__links_on_page
@property
def _allowed_domains(self) -> list[str]:
"""
Returns the list of allowed domain suffixes.
"""
return self.__allowed_domains
@_allowed_domains.setter
def _allowed_domains(self, value: list[str] | str) -> None:
"""
Sets the list of allowed domain suffixes.
Filters out empty strings and ensures each suffix starts with a dot.
"""
if isinstance(value, str):
value = [
item.strip() if item.strip().startswith(".") else f".{item.strip()}"
for item in value.split(",")
if item.strip()
]
else:
value = [
item if item.startswith(".") else f".{item}"
for item in value
if item
]
self.__allowed_domains = value
def _set_website_url(self, value: str) -> None:
"""
Protected: set the website URL after validating and fetch website data.
Use this from inside the class to initialize or change the URL.
"""
if not value:
raise ValueError("Website URL must be provided")
parsed_url: ParseResult = urlparse(value)
self._validate(parsed_url)
self.__website_url = value
self.__fetch_website_data()
@property
def fetch_failed(self) -> bool:
"""
Returns whether the website data fetch failed.
"""
return self.__fetch_failed
def _validate(self, parsed_url: ParseResult) -> None:
"""
Validate the parsed URL.
Parameters:
parsed_url: The parsed URL to validate.
Raises:
ValueError: If the URL is missing parts, uses an invalid scheme,
points to a local/private address, or is not in allowed domains.
"""
if not parsed_url.netloc or parsed_url.scheme not in ("http", "https"):
raise ValueError("Website URL must be a valid URL")
if not parsed_url.hostname:
raise ValueError("Website URL must contain a valid hostname")
if self.__is_local_address(parsed_url.hostname):
raise ValueError("Website URL must not be a local address")
if not self.__is_allowed_domain(parsed_url.hostname):
raise ValueError("Website URL must be an allowed domain")
def __is_local_address(self, hostname: str) -> bool:
"""
Check if the given hostname is a local address.
Parameters:
hostname (str): The hostname to check.
Returns:
bool: True if the hostname is a local address, False otherwise.
"""
if hostname in ("localhost", "127.0.0.1", "::1"):
return True
try:
ip: IPv4Address | IPv6Address = ip_address(hostname)
if ip.is_loopback or ip.is_private or ip.is_link_local or ip.is_reserved:
return True
except ValueError:
return False
return False
def __is_allowed_domain(self, hostname: str) -> bool:
"""
Check if the given hostname is an allowed domain.
Parameters:
hostname (str): The hostname to check.
Returns:
bool: True if the hostname is an allowed domain, False otherwise.
"""
allowed_domains = [".com", ".org", ".net", ".io"]
return any(hostname.endswith(domain) for domain in allowed_domains)
def __fetch_website_data(self) -> None:
"""
Fetch website content and populate title, text, and links.
Side effects:
- Sets internal state: __title, __text, __links_on_page, __fetch_failed.
- Performs an HTTP GET with a browser-like User-Agent.
"""
try:
get_fn = self.__session.get if self.__session else get
response = get_fn(
self.website_url,
timeout=10,
verify=True,
headers={"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36"}
)
except RequestException as e:
self.__title = "Error"
self.__text = str(e)
self.__fetch_failed = True
return
if response.ok:
extractor: Extractor = Extractor(response.text)
self.__title = extractor.extracted_title
self.__text = extractor.extracted_text
self.__links_on_page = extractor.extracted_links_on_page
else:
if response.status_code == 404:
self.__title = "Not Found"
self.__text = "The requested page was not found (404)."
else:
self.__title = "Error"
self.__text = f"Error: {response.status_code} - {response.reason}"
self.__fetch_failed = True
def __init__(self, website_url: str, allowed_domains: list[str] | str | None = None, session: Session | None = None) -> None:
"""
Initializes the Website object and fetches its data.
Parameters:
website_url (str): The URL of the website to fetch.
allowed_domains (list[str] | str, optional): A list of allowed domain suffixes.
If a string is provided, it should be a comma-separated list of domain suffixes (e.g., ".com,.org,.net").
session (requests.Session | None, optional): Reused HTTP session for connection pooling.
"""
self.__fetch_failed: bool = False
self.__session: Session | None = session
if allowed_domains is None:
self._allowed_domains = self.__DEFAULT_ALLOWED_DOMAINS.copy()
else:
self._allowed_domains = allowed_domains
# Use protected setter internally so the public API exposes only the getter.
self._set_website_url(website_url)
def __str__(self) -> str:
"""
Returns a string representation of the Website object.
"""
return f"Website(title={self.title}, url={self.website_url})"

View File

@@ -0,0 +1,402 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "9905f163-759f-474b-8f7a-7d14da0df44d",
"metadata": {},
"source": [
"### BUSINESS CHALLENGE: Using Multi-shot Prompting\n",
"#### Day 5\n",
"\n",
"Create a product that builds a Brochure for a company to be used for prospective clients, investors and potential recruits.\n",
"\n",
"We will be provided a company name and their primary website."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a0895f24-65ff-4624-8ae0-15d2d400d8f0",
"metadata": {},
"outputs": [],
"source": [
"# imports\n",
"# If these fail, please check you're running from an 'activated' environment with (llms) in the command prompt\n",
"\n",
"import os\n",
"import requests\n",
"import json\n",
"from typing import List\n",
"from dotenv import load_dotenv\n",
"from bs4 import BeautifulSoup\n",
"from IPython.display import Markdown, display, update_display\n",
"from openai import OpenAI"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7794aa70-5962-4669-b86f-b53639f4f9ea",
"metadata": {},
"outputs": [],
"source": [
"# Initialize and constants\n",
"\n",
"load_dotenv(override=True)\n",
"api_key = os.getenv('OPENAI_API_KEY')\n",
"\n",
"if api_key and api_key.startswith('sk-proj-') and len(api_key)>10:\n",
" print(\"API key looks good so far\")\n",
"else:\n",
" print(\"There might be a problem with your API key? Please visit the troubleshooting notebook!\")\n",
" \n",
"MODEL = 'gpt-4o-mini'\n",
"openai = OpenAI()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "63bf8631-2746-4255-bec1-522855d3e812",
"metadata": {},
"outputs": [],
"source": [
"# A class to represent a Webpage\n",
"\n",
"# Some websites need you to use proper headers when fetching them:\n",
"headers = {\n",
" \"User-Agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36\"\n",
"}\n",
"\n",
"class Website:\n",
" \"\"\"\n",
" A utility class to represent a Website that we have scraped, now with links\n",
" \"\"\"\n",
"\n",
" def __init__(self, url):\n",
" self.url = url\n",
" response = requests.get(url, headers=headers)\n",
" self.body = response.content\n",
" soup = BeautifulSoup(self.body, 'html.parser')\n",
" self.title = soup.title.string if soup.title else \"No title found\"\n",
" if soup.body:\n",
" for irrelevant in soup.body([\"script\", \"style\", \"img\", \"input\"]):\n",
" irrelevant.decompose()\n",
" self.text = soup.body.get_text(separator=\"\\n\", strip=True)\n",
" else:\n",
" self.text = \"\"\n",
" links = [link.get('href') for link in soup.find_all('a')]\n",
" self.links = [link for link in links if link]\n",
"\n",
" def get_contents(self):\n",
" return f\"Webpage Title:\\n{self.title}\\nWebpage Contents:\\n{self.text}\\n\\n\""
]
},
{
"cell_type": "markdown",
"id": "1e7bb527-e769-4245-bb91-ae65e64593ff",
"metadata": {},
"source": [
"## First step: Have GPT-4o-mini figure out which links are relevant\n",
"\n",
"### Use a call to gpt-4o-mini to read the links on a webpage, and respond in structured JSON. "
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1ce303ae-b967-4261-aadc-02dafa54db4a",
"metadata": {},
"outputs": [],
"source": [
"link_system_prompt = \"You are provided with a list of links found on a webpage. \\\n",
"You are able to decide which of the links would be most relevant to include in a brochure about the company, \\\n",
"such as links to an About page, or a Company page, or Careers/Jobs pages.\\n\"\n",
"link_system_prompt += \"You should respond in JSON as in this example:\"\n",
"link_system_prompt += \"\"\"\n",
"{\n",
" \"links\": [\n",
" {\"type\": \"about page\", \"url\": \"https://full.url/goes/here/about\"},\n",
" {\"type\": \"careers page\", \"url\": \"https://another.full.url/careers\"}\n",
" ]\n",
"}\n",
"\"\"\""
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d24a4c0c-a1d1-4897-b2a7-4128d25c2e08",
"metadata": {},
"outputs": [],
"source": [
"def get_links_user_prompt(website):\n",
" user_prompt = f\"Here is the list of links on the website of {website.url} - \"\n",
" user_prompt += \"please decide which of these are relevant web links for a brochure about the company, respond with the full https URL in JSON format. \\\n",
"Do not include Terms of Service, Privacy, email links.\\n\"\n",
" user_prompt += \"Links (some might be relative links):\\n\"\n",
" user_prompt += \"\\n\".join(website.links)\n",
" return user_prompt"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "8103fc11-5bc0-41c4-8c97-502c9e96429c",
"metadata": {},
"outputs": [],
"source": [
"def get_links(url): # 1st inference\n",
" website = Website(url)\n",
" response = openai.chat.completions.create(\n",
" model=MODEL,\n",
" messages=[\n",
" {\"role\": \"system\", \"content\": link_system_prompt},\n",
" {\"role\": \"user\", \"content\": get_links_user_prompt(website)}\n",
" ],\n",
" response_format={\"type\": \"json_object\"}\n",
" )\n",
" result = response.choices[0].message.content\n",
" return json.loads(result)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "dc84a695-515d-4292-9a95-818f4fe3d20e",
"metadata": {},
"outputs": [],
"source": [
"huggingface = Website(\"https://huggingface.co\")"
]
},
{
"cell_type": "markdown",
"id": "91896908-1632-41fc-9b8b-39a7638d8dd1",
"metadata": {},
"source": [
"## Second step: make the brochure!\n",
"\n",
"Assemble all the details into another prompt to GPT4-o"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ab7c54e3-e654-4b1f-8671-09194b628aa0",
"metadata": {},
"outputs": [],
"source": [
"def get_all_details(url): # 1st inference wrapper\n",
" result = \"Landing page:\\n\"\n",
" result += Website(url).get_contents()\n",
" links = get_links(url) # inference\n",
" # print(\"Found links:\", links)\n",
" for link in links[\"links\"]:\n",
" result += f\"\\n\\n{link['type']}\\n\"\n",
" result += Website(link[\"url\"]).get_contents()\n",
" return result"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ea9f54d1-a248-4c56-a1de-6633193de5bf",
"metadata": {},
"outputs": [],
"source": [
"system_prompt = \"You are an assistant that analyzes the contents of several relevant pages from a company website \\\n",
"and creates a short humorous, entertaining, jokey brochure about the company for prospective customers, investors and recruits. Respond in markdown.\\\n",
"Include details of company culture, customers and careers/jobs if you have the information.\""
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "13412c85-badd-4d79-a5ac-8283e4bb832f",
"metadata": {},
"outputs": [],
"source": [
"def get_brochure_user_prompt(company_name, url):\n",
" user_prompt = f\"You are looking at a company called: {company_name}\\n\"\n",
" user_prompt += f\"Here are the contents of its landing page and other relevant pages; use this information to build a short brochure of the company in markdown.\\n\"\n",
" user_prompt += get_all_details(url) # inference wrapper\n",
" user_prompt = user_prompt[:5_000] # Truncate if more than 5,000 characters\n",
" return user_prompt"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "addc0047-ea73-4748-abc3-747ff343c134",
"metadata": {},
"outputs": [],
"source": [
"def create_brochure(company_name, url): # 2nd inference\n",
" response = openai.chat.completions.create(\n",
" model=MODEL,\n",
" messages=[\n",
" {\"role\": \"system\", \"content\": system_prompt},\n",
" {\"role\": \"user\", \"content\": get_brochure_user_prompt(company_name, url)}\n",
" ],\n",
" )\n",
" result = response.choices[0].message.content\n",
" return result"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "82a3b61a-da26-4265-840a-0a93f81cd048",
"metadata": {},
"outputs": [],
"source": [
"brochure_english = create_brochure(\"HuggingFace\", \"https://huggingface.co\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5d165e3f-8fe2-4712-b098-d34d9fabe583",
"metadata": {},
"outputs": [],
"source": [
"display(Markdown(brochure_english))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "107a2100-3f7d-4f16-8ba7-b5da602393c6",
"metadata": {},
"outputs": [],
"source": [
"def stream_brochure(company_name, url):\n",
" stream = openai.chat.completions.create(\n",
" model=MODEL,\n",
" messages=[\n",
" {\"role\": \"system\", \"content\": system_prompt},\n",
" {\"role\": \"user\", \"content\": get_brochure_user_prompt(company_name, url)}\n",
" ],\n",
" stream=True\n",
" )\n",
" \n",
" response = \"\"\n",
" display_handle = display(Markdown(\"\"), display_id=True)\n",
" for chunk in stream:\n",
" response += chunk.choices[0].delta.content or ''\n",
" response = response.replace(\"```\",\"\").replace(\"markdown\", \"\")\n",
" update_display(Markdown(response), display_id=display_handle.display_id)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "26cbe9b5-3603-49a1-a676-75c7ddaacdb8",
"metadata": {},
"outputs": [],
"source": [
"stream_brochure(\"HuggingFace\", \"https://huggingface.co\")"
]
},
{
"cell_type": "markdown",
"id": "c10d8189-7f79-4991-abc4-0764369b7d64",
"metadata": {},
"source": [
"### Third step: Translate the entire brochure to Spanish"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "666817eb-1e8b-4fee-bbab-c0dbfe2ea7c0",
"metadata": {},
"outputs": [],
"source": [
"system_prompt = \"You are an assistant that analyzes the contents of a brochure \\\n",
"and translates to Spanish. Respond in markdown.\""
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c48adb12-bc3c-48f9-ab38-b7ca895195f6",
"metadata": {},
"outputs": [],
"source": [
"def translate_user_prompt(company_name, url):\n",
" user_prompt = f\"Please translate the following brochure content to Spanish\\n\"\n",
" user_prompt += create_brochure(company_name, url) # inference wrapper\n",
" # user_prompt = user_prompt[:5_000] # Truncate if more than 5,000 characters\n",
" return user_prompt"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b92b61ac-3be3-4e84-9000-ec8233697b81",
"metadata": {},
"outputs": [],
"source": [
"translate_user_prompt(\"HuggingFace\", \"https://huggingface.co\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6bfd04f4-4381-4730-ac5d-c9fa02f906df",
"metadata": {},
"outputs": [],
"source": [
"def translate_brochure(): # 3rd inference\n",
" stream = openai.chat.completions.create(\n",
" model=MODEL,\n",
" messages=[\n",
" {\"role\": \"system\", \"content\": system_prompt},\n",
" {\"role\": \"user\", \"content\": translate_user_prompt(\"HuggingFace\", \"https://huggingface.co\")}\n",
" ],\n",
" stream=True\n",
" )\n",
" \n",
" response = \"\"\n",
" display_handle = display(Markdown(\"\"), display_id=True)\n",
" for chunk in stream:\n",
" response += chunk.choices[0].delta.content or ''\n",
" response = response.replace(\"```\",\"\").replace(\"markdown\", \"\")\n",
" update_display(Markdown(response), display_id=display_handle.display_id)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "bb78ed28-9ecd-4c08-ae96-d7473cbc97dd",
"metadata": {},
"outputs": [],
"source": [
"translate_brochure()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.13"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@@ -0,0 +1,270 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "f60dab2a-a377-4761-8be3-69a3b8124ca6",
"metadata": {
"editable": true,
"slideshow": {
"slide_type": ""
},
"tags": []
},
"outputs": [],
"source": [
"import pdfplumber\n",
"import re\n",
"import json\n",
"\n",
"def parse_transaction_line(line):\n",
" # More specific pattern that captures each component'\n",
" pattern = r'^(\\d{2}/\\d{2})\\s+(.+?)\\s+(-?[\\d,]+\\.\\d{2})\\s+(-?[\\d,]+\\.\\d{2})$'\n",
" match = re.match(pattern, line.strip())\n",
" \n",
" if match:\n",
" date, description, amount, balance = match.groups()\n",
" return {\n",
" 'date': date,\n",
" 'description': description.strip(),\n",
" 'amount': amount,\n",
" 'balance': balance\n",
" }\n",
" return None\n",
"\n",
"def parse_Credit_Card_transaction_line(line):\n",
" # More specific pattern that captures each component'\n",
" pattern = r'^(\\d{2}/\\d{2})\\s+(.+?)\\s+(-?[\\d,]+\\.\\d{2})$'\n",
" match = re.match(pattern, line.strip())\n",
" \n",
" if match:\n",
" date, description, amount = match.groups()\n",
" return {\n",
" 'date': date,\n",
" 'description': description.strip(),\n",
" 'amount': amount\n",
" }\n",
" return None\n",
"\n",
"# \n",
"def extract_transactions_CA_from_pdf(pdf_path):\n",
" transactions = []\n",
" \n",
" with pdfplumber.open(pdf_path) as pdf:\n",
" for page in pdf.pages:\n",
" text = page.extract_text()\n",
" for line in text.split(\"\\n\"):\n",
" parsed = parse_transaction_line(line)\n",
" if parsed:\n",
" transactions.append(parsed)\n",
" return transactions\n",
"\n",
"def extract_transactions_CreditCard_from_pdf(pdf_path):\n",
" transactions = []\n",
" \n",
" with pdfplumber.open(pdf_path) as pdf:\n",
" for page in pdf.pages:\n",
" text = page.extract_text()\n",
" for line in text.split(\"\\n\"):\n",
" parsed = parse_Credit_Card_transaction_line(line)\n",
" if parsed:\n",
" transactions.append(parsed)\n",
" return transactions\n",
"# print(transactions, len(transactions)) # check first 10 extracted lines\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "82c34eac-fc30-41d6-8325-77efc48d0dd8",
"metadata": {
"editable": true,
"slideshow": {
"slide_type": ""
},
"tags": []
},
"outputs": [],
"source": [
"# Load environment variables in a file called .env\n",
"from dotenv import load_dotenv\n",
"from openai import OpenAI\n",
"import os\n",
"\n",
"load_dotenv(override=True)\n",
"api_key = os.getenv('OPENAI_API_KEY')\n",
"\n",
"# Check the key\n",
"\n",
"if not api_key:\n",
" print(\"No API key was found - please head over to the troubleshooting notebook in this folder to identify & fix!\")\n",
"elif not api_key.startswith(\"sk-proj-\"):\n",
" print(\"An API key was found, but it doesn't start sk-proj-; please check you're using the right key - see troubleshooting notebook\")\n",
"elif api_key.strip() != api_key:\n",
" print(\"An API key was found, but it looks like it might have space or tab characters at the start or end - please remove them - see troubleshooting notebook\")\n",
"else:\n",
" print(\"API key found and looks good so far!\")"
]
},
{
"cell_type": "code",
"execution_count": 49,
"id": "769ee512-75f5-480a-9407-f9c4cd46b679",
"metadata": {
"editable": true,
"slideshow": {
"slide_type": ""
},
"tags": []
},
"outputs": [],
"source": [
"import pandas as pd\n",
"\n",
"# ---------- STEP 3: Build prompts ----------\n",
"\n",
"def build_prompts(transactions):\n",
" system_prompt = \"\"\"\n",
"You are a personal financial assistant.\n",
"Your job is to analyze bank transactions, categorize each expense into categories such as:\n",
"Food, Clothing, Rent, Utilities, Entertainment, Travel, Health, Miscellaneous, and Others.\n",
"\n",
"Your responsibilities:\n",
"\n",
"Categorize all transactions and compute total spending per category.\n",
"\n",
"Identify the top 5 categories by total spending.\n",
"\n",
"Detect high-frequency purchases, even if individual amounts are small (e.g., $4 coffee bought 40 times).\n",
"\n",
"For these, group transactions by merchant/description and count frequency.\n",
"\n",
"Highlight the top 5 frequent purchases, with both frequency and total spend.\n",
"\n",
"Provide a practical summary of spending habits, covering both biggest expenses and frequent small purchases.\n",
"\n",
"Suggest 23 actionable recommendations to reduce spending, targeting both:\n",
"\n",
"Big categories (e.g., Rent, Travel, Entertainment).\n",
"\n",
"Small but frequent “habit expenses” (e.g., coffee, fast food, subscriptions).\n",
"\n",
"The output should be a valid JSON object with this structure:\n",
"{\n",
" \"summary\": {\n",
" \"Food\": <amount>,\n",
" \"Clothing\": <amount>,\n",
" \"Rent\": <amount>,\n",
" \"Utilities\": <amount>,\n",
" \"Entertainment\": <amount>,\n",
" \"Travel\": <amount>,\n",
" \"Health\": <amount>,\n",
" \"Miscellaneous\": <amount>,\n",
" \"Others\": <amount>\n",
" },\n",
" \"total_expenses\": <total>,\n",
" \"top_5_categories\": [ {\"category\": <name>, \"amount\": <amount>} ],\n",
" \"top_5_frequent_purchases\": [ {\"item\": <merchant/description>, \"count\": <frequency>, \"total\": <amount>} ],\n",
" \"insights\": \"<short paragraph summary of spending, including both big categories and frequent small habits>\",\n",
" \"recommendations\": [ \"<tip1>\", \"<tip2>\", \"<tip3>\" ]\n",
"}\n",
"\n",
"\"\"\"\n",
"\n",
" user_prompt = \"Here are my bank account transactions for the past few months:\\n\\n\"\n",
" for txn in transactions:\n",
" user_prompt += f\"- Date: {txn['date']}, Description: {txn['description']}, Amount: {txn['amount']}\\n\"\n",
"\n",
" user_prompt += \"\"\"\n",
"Please analyze these transactions according to the instructions in the system prompt.\n",
"\"\"\"\n",
"\n",
" return system_prompt, user_prompt\n",
"\n",
"\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "307ca02b-2df6-4996-85e7-d073f74592f5",
"metadata": {
"editable": true,
"slideshow": {
"slide_type": ""
},
"tags": []
},
"outputs": [],
"source": [
"# ---------- STEP 4: Call OpenAI ----------\n",
"def analyze_transactions(pdf_path):\n",
" transactions = extract_transactions_CreditCard_from_pdf(pdf_path)\n",
" system_prompt, user_prompt = build_prompts(transactions)\n",
"\n",
" client = OpenAI() # assumes OPENAI_API_KEY is set in env\n",
"\n",
" response = client.chat.completions.create(\n",
" model = \"gpt-4o-mini\",\n",
" messages=[\n",
" {\"role\": \"system\", \"content\": system_prompt},\n",
" {\"role\": \"user\", \"content\": user_prompt}\n",
" ],\n",
" response_format={\"type\": \"json_object\"} # ensures valid JSON\n",
" )\n",
"\n",
" result = response.choices[0].message.content\n",
" return json.loads(result)\n",
"\n",
"# ---------- MAIN ----------\n",
"if __name__ == \"__main__\":\n",
" cc_pdf_file = \"cc_statement.pdf\"\n",
" # To Debug in case of failures\n",
" # transactions = extract_transactions_from_pdf(pdf_file)\n",
" # print(cc_transactions,len(cc_transactions))\n",
" # system_prompt, user_prompt = build_prompts(cc_transactions)\n",
" # print(system_prompt, user_prompt)\n",
"\n",
" # Analyse the function to create a smart alert\n",
" cc_transactions = extract_transactions_CreditCard_from_pdf(cc_pdf_file)\n",
" analysis = analyze_transactions(cc_pdf_file)\n",
" print(\"=========================================\")\n",
" print(\"=== Top 5 Spending Habits & Insights ====\")\n",
" print(\"=========================================\")\n",
" print(json.dumps(analysis, indent=2))\n",
" print(\"=========================================\")\n",
" print(\"=========================================\")\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "831922f4-5efd-4cba-9975-54767b65f6d6",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@@ -0,0 +1,305 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "581151c0-941e-47b3-a3e0-2da65ba70087",
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"import requests\n",
"from dotenv import load_dotenv\n",
"from IPython.display import Markdown, display\n",
"from openai import OpenAI"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "47353a41-4b47-499e-9460-fd645345f591",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"API key found and looks good so far\n"
]
}
],
"source": [
"load_dotenv()\n",
"api_key = os.getenv('OPENAI_API_KEY')\n",
"\n",
"if not api_key:\n",
" print('No API key was found')\n",
"elif not api_key.startswith(\"sk-proj-\"):\n",
" print(\"API key is found but is not in the proper format\")\n",
"else:\n",
" print(\"API key found and looks good so far\")"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "dbfbb29a-3452-45a0-b9b3-4e329ac776fb",
"metadata": {},
"outputs": [],
"source": [
"openai = OpenAI()"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "88ffe256-e46a-45e8-a616-0ac574aa7085",
"metadata": {},
"outputs": [],
"source": [
"system_prompt = \"\"\"You are a research summarizer specialized in wireless communication systems and propagation modeling. Your task is to summarize a research thesis in no more than 1000 words. The summary must be clear, structured, and written in markdown format.\n",
"\n",
"The summary should include the following sections:\n",
"\n",
"1. **Title and Authors** Provide the full title of the thesis and author name(s).\n",
"2. **Objective / Research Problem** Clearly state the core research goal or question addressed in the thesis.\n",
"3. **Scientific and Regional Background** Explain the technical context of radio wave propagation, and why studying it in the Horn of Africa region is important.\n",
"4. **Methodology** Summarize the modeling techniques, data sources, simulation tools, frequency bands (e.g., microwave, millimeter), and measurement or evaluation methods used.\n",
"5. **Key Findings** Highlight the quantitative and qualitative results, including differences between precipitation and clear-air conditions, and observed trends across geographic locations.\n",
"6. **Conclusion** Describe the primary outcomes and how they advance understanding in wireless communications.\n",
"7. **Limitations** Point out any constraints (e.g., lack of in-situ measurement, simulation assumptions).\n",
"8. **Future Work** Suggest next steps for improving or extending this research.\n",
"9. **Real-World Applications** Discuss how the models or findings could improve wireless network planning, 5G deployment, or link budgeting in East Africa and similar regions.\n",
"\n",
"Use academic language but keep it concise, clear, and structured for a technical reader. Output in markdown format only.\n",
"\"\"\"\n"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "5f3f7b1a-865f-44cc-854d-9e9e7771eb82",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Requirement already satisfied: ipywidgets in c:\\users\\esku4\\anaconda3\\envs\\llms\\lib\\site-packages (8.1.7)\n",
"Collecting pdfplumber\n",
" Downloading pdfplumber-0.11.7-py3-none-any.whl.metadata (42 kB)\n",
"Requirement already satisfied: comm>=0.1.3 in c:\\users\\esku4\\anaconda3\\envs\\llms\\lib\\site-packages (from ipywidgets) (0.2.3)\n",
"Requirement already satisfied: ipython>=6.1.0 in c:\\users\\esku4\\anaconda3\\envs\\llms\\lib\\site-packages (from ipywidgets) (9.4.0)\n",
"Requirement already satisfied: traitlets>=4.3.1 in c:\\users\\esku4\\anaconda3\\envs\\llms\\lib\\site-packages (from ipywidgets) (5.14.3)\n",
"Requirement already satisfied: widgetsnbextension~=4.0.14 in c:\\users\\esku4\\anaconda3\\envs\\llms\\lib\\site-packages (from ipywidgets) (4.0.14)\n",
"Requirement already satisfied: jupyterlab_widgets~=3.0.15 in c:\\users\\esku4\\anaconda3\\envs\\llms\\lib\\site-packages (from ipywidgets) (3.0.15)\n",
"Collecting pdfminer.six==20250506 (from pdfplumber)\n",
" Downloading pdfminer_six-20250506-py3-none-any.whl.metadata (4.2 kB)\n",
"Requirement already satisfied: Pillow>=9.1 in c:\\users\\esku4\\anaconda3\\envs\\llms\\lib\\site-packages (from pdfplumber) (11.3.0)\n",
"Collecting pypdfium2>=4.18.0 (from pdfplumber)\n",
" Downloading pypdfium2-4.30.0-py3-none-win_amd64.whl.metadata (48 kB)\n",
"Requirement already satisfied: charset-normalizer>=2.0.0 in c:\\users\\esku4\\anaconda3\\envs\\llms\\lib\\site-packages (from pdfminer.six==20250506->pdfplumber) (3.4.3)\n",
"Requirement already satisfied: cryptography>=36.0.0 in c:\\users\\esku4\\anaconda3\\envs\\llms\\lib\\site-packages (from pdfminer.six==20250506->pdfplumber) (45.0.6)\n",
"Requirement already satisfied: cffi>=1.14 in c:\\users\\esku4\\anaconda3\\envs\\llms\\lib\\site-packages (from cryptography>=36.0.0->pdfminer.six==20250506->pdfplumber) (1.17.1)\n",
"Requirement already satisfied: pycparser in c:\\users\\esku4\\anaconda3\\envs\\llms\\lib\\site-packages (from cffi>=1.14->cryptography>=36.0.0->pdfminer.six==20250506->pdfplumber) (2.22)\n",
"Requirement already satisfied: colorama in c:\\users\\esku4\\anaconda3\\envs\\llms\\lib\\site-packages (from ipython>=6.1.0->ipywidgets) (0.4.6)\n",
"Requirement already satisfied: decorator in c:\\users\\esku4\\anaconda3\\envs\\llms\\lib\\site-packages (from ipython>=6.1.0->ipywidgets) (5.2.1)\n",
"Requirement already satisfied: ipython-pygments-lexers in c:\\users\\esku4\\anaconda3\\envs\\llms\\lib\\site-packages (from ipython>=6.1.0->ipywidgets) (1.1.1)\n",
"Requirement already satisfied: jedi>=0.16 in c:\\users\\esku4\\anaconda3\\envs\\llms\\lib\\site-packages (from ipython>=6.1.0->ipywidgets) (0.19.2)\n",
"Requirement already satisfied: matplotlib-inline in c:\\users\\esku4\\anaconda3\\envs\\llms\\lib\\site-packages (from ipython>=6.1.0->ipywidgets) (0.1.7)\n",
"Requirement already satisfied: prompt_toolkit<3.1.0,>=3.0.41 in c:\\users\\esku4\\anaconda3\\envs\\llms\\lib\\site-packages (from ipython>=6.1.0->ipywidgets) (3.0.51)\n",
"Requirement already satisfied: pygments>=2.4.0 in c:\\users\\esku4\\anaconda3\\envs\\llms\\lib\\site-packages (from ipython>=6.1.0->ipywidgets) (2.19.2)\n",
"Requirement already satisfied: stack_data in c:\\users\\esku4\\anaconda3\\envs\\llms\\lib\\site-packages (from ipython>=6.1.0->ipywidgets) (0.6.3)\n",
"Requirement already satisfied: typing_extensions>=4.6 in c:\\users\\esku4\\anaconda3\\envs\\llms\\lib\\site-packages (from ipython>=6.1.0->ipywidgets) (4.14.1)\n",
"Requirement already satisfied: wcwidth in c:\\users\\esku4\\anaconda3\\envs\\llms\\lib\\site-packages (from prompt_toolkit<3.1.0,>=3.0.41->ipython>=6.1.0->ipywidgets) (0.2.13)\n",
"Requirement already satisfied: parso<0.9.0,>=0.8.4 in c:\\users\\esku4\\anaconda3\\envs\\llms\\lib\\site-packages (from jedi>=0.16->ipython>=6.1.0->ipywidgets) (0.8.4)\n",
"Requirement already satisfied: executing>=1.2.0 in c:\\users\\esku4\\anaconda3\\envs\\llms\\lib\\site-packages (from stack_data->ipython>=6.1.0->ipywidgets) (2.2.0)\n",
"Requirement already satisfied: asttokens>=2.1.0 in c:\\users\\esku4\\anaconda3\\envs\\llms\\lib\\site-packages (from stack_data->ipython>=6.1.0->ipywidgets) (3.0.0)\n",
"Requirement already satisfied: pure_eval in c:\\users\\esku4\\anaconda3\\envs\\llms\\lib\\site-packages (from stack_data->ipython>=6.1.0->ipywidgets) (0.2.3)\n",
"Downloading pdfplumber-0.11.7-py3-none-any.whl (60 kB)\n",
"Downloading pdfminer_six-20250506-py3-none-any.whl (5.6 MB)\n",
" ---------------------------------------- 0.0/5.6 MB ? eta -:--:--\n",
" --------------------------------------- 5.5/5.6 MB 30.7 MB/s eta 0:00:01\n",
" ---------------------------------------- 5.6/5.6 MB 22.9 MB/s 0:00:00\n",
"Downloading pypdfium2-4.30.0-py3-none-win_amd64.whl (2.9 MB)\n",
" ---------------------------------------- 0.0/2.9 MB ? eta -:--:--\n",
" ---------------------------------------- 2.9/2.9 MB 28.0 MB/s 0:00:00\n",
"Installing collected packages: pypdfium2, pdfminer.six, pdfplumber\n",
"\n",
" ---------------------------------------- 0/3 [pypdfium2]\n",
" ---------------------------------------- 0/3 [pypdfium2]\n",
" ------------- -------------------------- 1/3 [pdfminer.six]\n",
" ------------- -------------------------- 1/3 [pdfminer.six]\n",
" ------------- -------------------------- 1/3 [pdfminer.six]\n",
" ------------- -------------------------- 1/3 [pdfminer.six]\n",
" ------------- -------------------------- 1/3 [pdfminer.six]\n",
" ------------- -------------------------- 1/3 [pdfminer.six]\n",
" -------------------------- ------------- 2/3 [pdfplumber]\n",
" ---------------------------------------- 3/3 [pdfplumber]\n",
"\n",
"Successfully installed pdfminer.six-20250506 pdfplumber-0.11.7 pypdfium2-4.30.0\n",
"Note: you may need to restart the kernel to use updated packages.\n"
]
}
],
"source": [
"pip install ipywidgets pdfplumber"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "06dcfc1d-b106-4b9a-9346-6dd6af4a4015",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"UNIVERSITY OF KWAZULU-NATAL\n",
"Radio Wave Propagation Modeling under\n",
"Precipitation and Clear-air at Microwave\n",
"and Millimetric Bands over Wireless Links\n",
"in the Horn of Africa\n",
"Feyisa Debo Diba\n",
"February, 2017\n",
"Supervisor: Professor Thomas J. Afullo\n",
"Co-supervisor: Dr. Akintunde Ayodeji Alonge\n",
"Radio Wave Propagation Modeling under\n",
"Precipitation and Clear-air at Microwave\n",
"and Millimetric Bands over Wireless Links\n",
"in the Horn of Africa\n",
"Feyisa Debo Diba\n",
"In fulfillment of the Degree of Doctor of Philosophy in\n",
"Electronic Engineering, College of Agriculture, Engineering\n",
"and Science, University of KwaZulu-Natal, Durban\n",
"February, 2017\n",
"Supervisor:\n",
"As the candidates Supervisor, I agree/do not agree to the submission of this thesis\n",
"Professor T.J. Afullo ———————————-\n",
"Date—————————————————\n",
"Co-Supervisor:\n",
"Dr. Akintunde Ayodeji Alonge\n",
"As the candidates Co.Supervisor, I agree to the submission of this thesis\n",
"Dr. A. A. Alonge ———————————-\n",
"Date—————————————————\n",
"ii\n",
"DECLARATION 1 - PLAGIARISM\n",
"I, Feyisa Debo Diba\n"
]
}
],
"source": [
"# Cell 3: Download and extract from PDF URL\n",
"pdf_url = (\n",
" \"https://researchspace.ukzn.ac.za/server/api/core/bitstreams/\"\n",
" \"29218203-bfc8-4fcb-bc63-9afba3341910/content\"\n",
")\n",
"\n",
"response = requests.get(pdf_url)\n",
"if response.status_code != 200:\n",
" raise Exception(f\"Failed to download PDF (Status code: {response.status_code})\")\n",
"\n",
"with pdfplumber.open(BytesIO(response.content)) as pdf:\n",
" thesis_text = \"\\n\".join(page.extract_text() for page in pdf.pages if page.extract_text())\n",
"\n",
"# Optional Preview\n",
"print(thesis_text[:1000])\n"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "84c544db-64a0-4181-beb0-1cc72bc88466",
"metadata": {},
"outputs": [
{
"data": {
"text/markdown": [
"# Summary of the Research Thesis\n",
"\n",
"## 1. Title and Authors\n",
"**Title:** Radio Wave Propagation Modeling under Precipitation and Clear-air at Microwave and Millimetric Bands over Wireless Links in the Horn of Africa \n",
"**Author:** Feyisa Debo Diba \n",
"**Supervisors:** Professor Thomas J. Afullo, Dr. Akintunde Ayodeji Alonge \n",
"\n",
"## 2. Objective / Research Problem\n",
"The thesis investigates radio wave propagation modeling in clear air and precipitation conditions over wireless communication systems in the Horn of Africa, specifically Ethiopia. The research aims to address the attenuation problem caused by precipitation for systems operating at higher frequency bands.\n",
"\n",
"## 3. Scientific and Regional Background\n",
"The congestion of lower operating frequency bands has led to the rapid growth of utilizing higher frequency spectrum for wireless communication systems. However, the Horn of Africa, particularly Ethiopia, lacks comprehensive studies on propagation modeling under different atmospheric conditions. This research provides valuable insights for the region, contributing to the efficient operation of wireless networks.\n",
"\n",
"## 4. Methodology\n",
"The research uses three years of atmospheric data (temperature, pressure, relative humidity) from the National Meteorological Agency of Ethiopia and clear air signal measurements over terrestrial Line-of-Sight (LOS) links from EthioTelecom. Rainfall data from a Davis Vantage weather station installed at Jimma University, Ethiopia, are also used. The study applies the ITU-R model for refractivity gradient prediction and the Rice-Holmberg (R-H) model for one-minute rain rate distribution. A semi-Markovian model is used for rainfall event characterization and generation.\n",
"\n",
"## 5. Key Findings\n",
"The research derived radio climatological parameters for different rain and clear air fade models. It also proposed rainfall rate conversion factors for Ethiopian sites and developed rainfall rate and fade margin contour maps for Ethiopia. The study found that the sojourn time of spikes in every rain regime is appropriately described by Erlang-k distribution. The number of spikes of generated rainfall events and the corresponding sojourn times follow the power-law relationship.\n",
"\n",
"## 6. Conclusion\n",
"The research provides a comprehensive analysis of radio wave propagation under different atmospheric conditions in Ethiopia. The findings contribute to the understanding of the impact of atmospheric conditions on wireless communication systems operating at higher frequency bands.\n",
"\n",
"## 7. Limitations\n",
"The research is limited by the availability and quality of atmospheric and signal level data. The simulation models also have inherent assumptions that may affect the accuracy of the results.\n",
"\n",
"## 8. Future Work\n",
"Future research could focus on refining the models used in this study by incorporating more data and improving the simulation techniques. Studies could also be extended to other regions in the Horn of Africa.\n",
"\n",
"## 9. Real-World Applications\n",
"The findings of this research can improve wireless network planning and 5G deployment in East Africa. The models developed can also be used in link budgeting, which is crucial for the design and operation of wireless communication systems."
],
"text/plain": [
"<IPython.core.display.Markdown object>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Cell 4: Summarize via OpenAI\n",
"messages = [\n",
" {\"role\": \"system\", \"content\": system_prompt},\n",
" {\"role\": \"user\", \"content\": f\"Here is the thesis text (truncated):\\n\\n{thesis_text[:10000]}\"}\n",
"]\n",
"\n",
"response = openai.chat.completions.create(\n",
" model=\"gpt-4\",\n",
" messages=messages,\n",
" temperature=0.3\n",
")\n",
"\n",
"summary = response.choices[0].message.content.strip()\n",
"display(Markdown(summary))\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e1cdf9ec-5efb-4d4b-8de2-83648865f092",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.13"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@@ -0,0 +1,124 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "36ef4c36-2905-4485-a46e-dead68cc2dcb",
"metadata": {},
"outputs": [],
"source": [
"from openai import OpenAI\n",
"from dotenv import load_dotenv\n",
"from IPython.display import Markdown\n",
"\n",
"# Step 1: Create your prompts\n",
"\n",
"load_dotenv(override=True)\n",
"\n",
"system_prompt = \"You are an assistant that analyzes the contents of \\\n",
" email texts and suggests short subject lines for the email based \\\n",
" on the requested tone and language. Respond in markdown.\"\n",
"user_prompt = \"\"\"\n",
" Whats the hardest-working organ in your body? A lot of people will say the heart. After all, it beats 100,000 times a day. It doesnt rest. It doesnt sleep. And it keeps you alive.\n",
"\n",
"\n",
"But we wanted a second opinion. For this edition of From the Armchair, our monthly mental health newsletter, we asked our psychiatrists. Some of them had a different answer: The brain.\n",
"\n",
"\n",
"If that surprises you, think about how your body feels after a long workout—then compare it to how your brain feels after a day of meetings, emails, problem-solving, or just trying to hold it all together. That drained, foggy, overstimulated sensation? Thats cognitive fatigue. And even if your body hasnt moved much, the exhaustion is very real.\n",
"\n",
"The brains quiet hustle\n",
"\n",
"Cognitive fatigue happens when the mental load were carrying uses up more fuel than we realize. And the brain is a gas-guzzler. It makes up only 2% of our body weight but consumes about 20% of our energy—just to keep us functioning.\n",
"\n",
"Thats not just because were thinking deeply or making big decisions. Its because the brain is always on: Absorbing information, interpreting social cues, navigating ambiguity, switching between tasks, and trying to make sense of a noisy world. All of that takes effort.\n",
"\n",
"Which brings us to a fallacy: We sometimes think we havent “done anything” if we havent physically moved or checked something off a list. But the brain doesnt lift weights to get tired. Its heavy lifting is invisible.\n",
"\n",
"The myth: Motion = accomplishment\n",
"\n",
"Theres a cultural bias that equates movement with productivity. A tired body is seen as earned. A tired mind is often seen as weakness—or worse. Neuroscience disagrees.\n",
"\n",
"The truth is, mental labor—especially the constant decision-making, emotional regulation, and alertness that life demands—can be deeply taxing. Unlike a workout, there's often no clear beginning, middle, or end. Just a low-grade hum that builds over time.\n",
"\n",
"So if youve ever said, “Why am I so tired? I didnt even do anything today,” this could be your answer: Your brain has been sprinting in place.\n",
"\n",
"Mental health and the weight of thinking\n",
"\n",
"Talkiatry psychiatrists note that if youre living with a mental health condition, this load can feel even heavier. Decisions feel high stakes. Basic tasks can turn into uphill climbs. We can get overloaded with competing signals. Every day stress taxes the circuits we rely on to cope.\n",
"\n",
"While the brain is incredibly resilient, nearly every mental health condition adds friction to thought. That friction requires effort to overcome. That effort burns fuel. And that fuel runs out faster than we expect.\n",
"\n",
"Rest isnt laziness—its repair\n",
"\n",
"This isnt meant to sound hopeless. In fact, its the opposite.\n",
"\n",
" \n",
"\n",
"Recognizing that your brain works hard—even when you dont realize it—is the first step towards giving it the care it deserves. That might mean rest. It might mean therapy or medication to help you find balance. It might just mean giving yourself credit for the things no one else can see.\n",
"\n",
"\n",
"So if your mind feels tired, believe it. Youre not lazy. Youre human. And youve probably done a lot more than you think.\n",
"\"\"\"\n",
"\n",
"# Step 2: Make the messages list\n",
"\n",
"AIInputMessages = [\n",
" {\"role\": \"system\", \"content\": system_prompt},\n",
" {\"role\": \"user\", \"content\": user_prompt}\n",
"] # fill this in\n",
"\n",
"# Step 3: Call OpenAI\n",
"openAI = OpenAI()\n",
"\n",
"response = openAI.chat.completions.create(\n",
" model=\"gpt-4o-mini\",\n",
" messages = AIInputMessages\n",
")\n",
"\n",
"responseString = response.choices[0].message.content\n",
"# Step 4: print the result\n",
"\n",
"print(\"Printing....\")\n",
"print(responseString)\n",
"\n",
"print(\"\\n\")\n",
"print(\"Displaying....\")\n",
"display(responseString)\n",
"\n",
"print(\"\\n\")\n",
"print(\"Displaying Markdown....\")\n",
"display(Markdown(responseString))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0f8cc568-428d-4ff3-988d-6a31c35db5ba",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.13"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@@ -0,0 +1,230 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "7db973a2-c95e-4939-a0d7-b54edec4d2cf",
"metadata": {},
"source": [
"# Bitcoin Market Prediction uisng CoinmarketCap\n",
"An AI-powered project using historical CoinMarketCap data to predict Bitcoin price trends and offer actionable insights for traders."
]
},
{
"cell_type": "markdown",
"id": "b792b517-bbc8-4e2c-bff2-45fad1a784dc",
"metadata": {},
"source": [
"## Imports"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "51523d62-825a-4a15-aec2-7c910beb5fda",
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"import requests\n",
"from dotenv import load_dotenv\n",
"from bs4 import BeautifulSoup\n",
"from IPython.display import Markdown, display\n",
"from openai import OpenAI"
]
},
{
"cell_type": "markdown",
"id": "2e3816b0-4557-4225-bfb9-9933d813548a",
"metadata": {},
"source": [
"## .env configuration"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "02be59e7-01cc-41b5-88c3-a47860570078",
"metadata": {},
"outputs": [],
"source": [
"load_dotenv(override=True)\n",
"api_key = os.getenv('OPENAI_API_KEY')\n",
"\n",
"# Check the key\n",
"\n",
"if not api_key:\n",
" print(\"No API key was found - please head over to the troubleshooting notebook in this folder to identify & fix!\")\n",
"elif not api_key.startswith(\"sk-proj-\"):\n",
" print(\"An API key was found, but it doesn't start sk-proj-; please check you're using the right key - see troubleshooting notebook\")\n",
"elif api_key.strip() != api_key:\n",
" print(\"An API key was found, but it looks like it might have space or tab characters at the start or end - please remove them - see troubleshooting notebook\")\n",
"else:\n",
" print(\"API key found and looks good so far!\")"
]
},
{
"cell_type": "markdown",
"id": "3fc32555-ea4e-45fe-ad44-9dbf4441afd1",
"metadata": {},
"source": [
"### This line creates an authenticated OpenAI client instance, used to make API requests in your code."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0845c687-6610-4f83-89e8-fb94bc47ddd2",
"metadata": {},
"outputs": [],
"source": [
"from openai import OpenAI\n",
"openai = OpenAI(api_key=api_key)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d140db1a-dd72-4986-8f38-09f8d8f97b00",
"metadata": {},
"outputs": [],
"source": [
"headers = {\n",
" \"User-Agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36\"\n",
"}\n",
"\n",
"class Website:\n",
"\n",
" def __init__(self, url):\n",
" \"\"\"\n",
" Create this Website object from the given url using the BeautifulSoup library\n",
" \"\"\"\n",
" self.url = url\n",
" response = requests.get(url, headers=headers)\n",
" soup = BeautifulSoup(response.content, 'html.parser')\n",
" self.title = soup.title.string if soup.title else \"No title found\"\n",
" for irrelevant in soup.body([\"script\", \"style\", \"img\", \"input\"]):\n",
" irrelevant.decompose()\n",
" self.text = soup.body.get_text(separator=\"\\n\", strip=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "fdc96768-94a8-4a08-acf1-32a62b699b94",
"metadata": {},
"outputs": [],
"source": [
"system_prompt = \"\"\"\n",
"You are an intelligent assistant specialized in Bitcoin market prediction. Your tasks are:\n",
"\n",
"- Collect, preprocess, and analyze historical Bitcoin price and volume data sourced from CoinMarketCap historical data tables or API.\n",
"- Extract relevant time series and technical features from OHLC (open, high, low, close) and volume data.\n",
"- Use machine learning or statistical models to forecast future Bitcoin price trends.\n",
"- Output clear, concise, and actionable insights, focusing on predicted price direction and potential trading signals.\n",
"- Ensure all data collection respects CoinMarketCaps terms of service.\n",
"- Present findings in user-friendly language, explaining prediction confidence and market risks.\n",
"- Continuously improve prediction accuracy through back-testing on updated datasets.\n",
"\n",
"\"\"\"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7d39e983-5b65-4de1-bdf0-e4239c3eb03f",
"metadata": {},
"outputs": [],
"source": [
"def user_prompt_for(website):\n",
" user_prompt = f\"You are analyzing historical Bitcoin market data from the webpage titled '{website.title}'.\\n\"\n",
" user_prompt += (\n",
" \"The data includes daily open, high, low, close prices, trading volume, \"\n",
" \"and market capitalization presented in a table format.\\n\"\n",
" \"Please provide a clear and concise analysis in Markdown format, focusing on recent trends, \"\n",
" \"price movements, volatility, and any insights that could help forecast Bitcoin price directions.\\n\"\n",
" \"If possible, include technical indicators, significant patterns, or notable market events mentioned in the data.\\n\\n\"\n",
" )\n",
" user_prompt += website.text\n",
" return user_prompt\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d3d41ed3-4753-49f2-b51f-37e8be43102c",
"metadata": {},
"outputs": [],
"source": [
"def messages_for(website):\n",
" return [\n",
" {\"role\": \"system\", \"content\": system_prompt},\n",
" {\"role\": \"user\", \"content\": user_prompt_for(website)}\n",
" ]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0eb99fcf-75a2-41b8-bf53-568f94264438",
"metadata": {},
"outputs": [],
"source": [
"# And now: call the OpenAI API. You will get very familiar with this!\n",
"\n",
"def summarize(url):\n",
" website = Website(url)\n",
" response = openai.chat.completions.create(\n",
" model = \"gpt-4o-mini\",\n",
" messages = messages_for(website)\n",
" )\n",
" return response.choices[0].message.content\n",
"\n",
"# A function to display this nicely in the Jupyter output, using markdown\n",
"\n",
"def display_summary(summary): \n",
" display(Markdown(summary))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a0e57921-5132-40c6-834b-03a11a96425c",
"metadata": {},
"outputs": [],
"source": [
"url = \"https://coinmarketcap.com/currencies/bitcoin/historical-data/3\"\n",
"summary = summarize(url)\n",
"display_summary(summary)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "19d9b69a-6493-402d-a0b4-a486c322c816",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.13"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@@ -0,0 +1,632 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "d15d8294-3328-4e07-ad16-8a03e9bbfdb9",
"metadata": {},
"source": [
"# YOUR FIRST LAB\n",
"### Please read this section. This is valuable to get you prepared, even if it's a long read -- it's important stuff.\n",
"\n",
"## Your first Frontier LLM Project\n",
"\n",
"Let's build a useful LLM solution - in a matter of minutes.\n",
"\n",
"By the end of this course, you will have built an autonomous Agentic AI solution with 7 agents that collaborate to solve a business problem. All in good time! We will start with something smaller...\n",
"\n",
"Our goal is to code a new kind of Web Browser. Give it a URL, and it will respond with a summary. The Reader's Digest of the internet!!\n",
"\n",
"Before starting, you should have completed the setup for [PC](../SETUP-PC.md) or [Mac](../SETUP-mac.md) and you hopefully launched this jupyter lab from within the project root directory, with your environment activated.\n",
"\n",
"## If you're new to Jupyter Lab\n",
"\n",
"Welcome to the wonderful world of Data Science experimentation! Once you've used Jupyter Lab, you'll wonder how you ever lived without it. Simply click in each \"cell\" with code in it, such as the cell immediately below this text, and hit Shift+Return to execute that cell. As you wish, you can add a cell with the + button in the toolbar, and print values of variables, or try out variations. \n",
"\n",
"I've written a notebook called [Guide to Jupyter](Guide%20to%20Jupyter.ipynb) to help you get more familiar with Jupyter Labs, including adding Markdown comments, using `!` to run shell commands, and `tqdm` to show progress.\n",
"\n",
"## If you're new to the Command Line\n",
"\n",
"Please see these excellent guides: [Command line on PC](https://chatgpt.com/share/67b0acea-ba38-8012-9c34-7a2541052665) and [Command line on Mac](https://chatgpt.com/canvas/shared/67b0b10c93a081918210723867525d2b). \n",
"\n",
"## If you'd prefer to work in IDEs\n",
"\n",
"If you're more comfortable in IDEs like VSCode, Cursor or PyCharm, they both work great with these lab notebooks too. \n",
"If you'd prefer to work in VSCode, [here](https://chatgpt.com/share/676f2e19-c228-8012-9911-6ca42f8ed766) are instructions from an AI friend on how to configure it for the course.\n",
"\n",
"## If you'd like to brush up your Python\n",
"\n",
"I've added a notebook called [Intermediate Python](Intermediate%20Python.ipynb) to get you up to speed. But you should give it a miss if you already have a good idea what this code does: \n",
"`yield from {book.get(\"author\") for book in books if book.get(\"author\")}`\n",
"\n",
"## I am here to help\n",
"\n",
"If you have any problems at all, please do reach out. \n",
"I'm available through the platform, or at ed@edwarddonner.com, or at https://www.linkedin.com/in/eddonner/ if you'd like to connect (and I love connecting!) \n",
"And this is new to me, but I'm also trying out X/Twitter at [@edwarddonner](https://x.com/edwarddonner) - if you're on X, please show me how it's done 😂 \n",
"\n",
"## More troubleshooting\n",
"\n",
"Please see the [troubleshooting](troubleshooting.ipynb) notebook in this folder to diagnose and fix common problems. At the very end of it is a diagnostics script with some useful debug info.\n",
"\n",
"## For foundational technical knowledge (eg Git, APIs, debugging) \n",
"\n",
"If you're relatively new to programming -- I've got your back! While it's ideal to have some programming experience for this course, there's only one mandatory prerequisite: plenty of patience. 😁 I've put together a set of self-study guides that cover Git and GitHub, APIs and endpoints, beginner python and more.\n",
"\n",
"This covers Git and GitHub; what they are, the difference, and how to use them: \n",
"https://github.com/ed-donner/agents/blob/main/guides/03_git_and_github.ipynb\n",
"\n",
"This covers technical foundations: \n",
"ChatGPT vs API; taking screenshots; Environment Variables; Networking basics; APIs and endpoints: \n",
"https://github.com/ed-donner/agents/blob/main/guides/04_technical_foundations.ipynb\n",
"\n",
"This covers Python for beginners, and making sure that a `NameError` never trips you up: \n",
"https://github.com/ed-donner/agents/blob/main/guides/06_python_foundations.ipynb\n",
"\n",
"This covers the essential techniques for figuring out errors: \n",
"https://github.com/ed-donner/agents/blob/main/guides/08_debugging.ipynb\n",
"\n",
"And you'll find other useful guides in the same folder in GitHub. Some information applies to my other Udemy course (eg Async Python) but most of it is very relevant for LLM engineering.\n",
"\n",
"## If this is old hat!\n",
"\n",
"If you're already comfortable with today's material, please hang in there; you can move swiftly through the first few labs - we will get much more in depth as the weeks progress. Ultimately we will fine-tune our own LLM to compete with OpenAI!\n",
"\n",
"<table style=\"margin: 0; text-align: left;\">\n",
" <tr>\n",
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
" <img src=\"../important.jpg\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
" </td>\n",
" <td>\n",
" <h2 style=\"color:#900;\">Please read - important note</h2>\n",
" <span style=\"color:#900;\">The way I collaborate with you may be different to other courses you've taken. I prefer not to type code while you watch. Rather, I execute Jupyter Labs, like this, and give you an intuition for what's going on. My suggestion is that you carefully execute this yourself, <b>after</b> watching the lecture. Add print statements to understand what's going on, and then come up with your own variations. If you have a Github account, use this to showcase your variations. Not only is this essential practice, but it demonstrates your skills to others, including perhaps future clients or employers...</span>\n",
" </td>\n",
" </tr>\n",
"</table>\n",
"<table style=\"margin: 0; text-align: left;\">\n",
" <tr>\n",
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
" <img src=\"../resources.jpg\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
" </td>\n",
" <td>\n",
" <h2 style=\"color:#f71;\">This code is a live resource - keep an eye out for my emails</h2>\n",
" <span style=\"color:#f71;\">I push updates to the code regularly. As people ask questions, I add more examples or improved commentary. As a result, you'll notice that the code below isn't identical to the videos. Everything from the videos is here; but I've also added better explanations and new models like DeepSeek. Consider this like an interactive book.<br/><br/>\n",
" I try to send emails regularly with important updates related to the course. You can find this in the 'Announcements' section of Udemy in the left sidebar. You can also choose to receive my emails via your Notification Settings in Udemy. I'm respectful of your inbox and always try to add value with my emails!\n",
" </span>\n",
" </td>\n",
" </tr>\n",
"</table>\n",
"<table style=\"margin: 0; text-align: left;\">\n",
" <tr>\n",
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
" <img src=\"../business.jpg\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
" </td>\n",
" <td>\n",
" <h2 style=\"color:#181;\">Business value of these exercises</h2>\n",
" <span style=\"color:#181;\">A final thought. While I've designed these notebooks to be educational, I've also tried to make them enjoyable. We'll do fun things like have LLMs tell jokes and argue with each other. But fundamentally, my goal is to teach skills you can apply in business. I'll explain business implications as we go, and it's worth keeping this in mind: as you build experience with models and techniques, think of ways you could put this into action at work today. Please do contact me if you'd like to discuss more or if you have ideas to bounce off me.</span>\n",
" </td>\n",
" </tr>\n",
"</table>"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4e2a9393-7767-488e-a8bf-27c12dca35bd",
"metadata": {},
"outputs": [],
"source": [
"# imports\n",
"\n",
"import os\n",
"import requests\n",
"from dotenv import load_dotenv\n",
"from bs4 import BeautifulSoup\n",
"from IPython.display import Markdown, display\n",
"from openai import OpenAI\n",
"\n",
"# If you get an error running this cell, then please head over to the troubleshooting notebook!"
]
},
{
"cell_type": "markdown",
"id": "6900b2a8-6384-4316-8aaa-5e519fca4254",
"metadata": {},
"source": [
"# Connecting to OpenAI (or Ollama)\n",
"\n",
"The next cell is where we load in the environment variables in your `.env` file and connect to OpenAI. \n",
"\n",
"If you'd like to use free Ollama instead, please see the README section \"Free Alternative to Paid APIs\", and if you're not sure how to do this, there's a full solution in the solutions folder (day1_with_ollama.ipynb).\n",
"\n",
"## Troubleshooting if you have problems:\n",
"\n",
"Head over to the [troubleshooting](troubleshooting.ipynb) notebook in this folder for step by step code to identify the root cause and fix it!\n",
"\n",
"If you make a change, try restarting the \"Kernel\" (the python process sitting behind this notebook) by Kernel menu >> Restart Kernel and Clear Outputs of All Cells. Then try this notebook again, starting at the top.\n",
"\n",
"Or, contact me! Message me or email ed@edwarddonner.com and we will get this to work.\n",
"\n",
"Any concerns about API costs? See my notes in the README - costs should be minimal, and you can control it at every point. You can also use Ollama as a free alternative, which we discuss during Day 2."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7b87cadb-d513-4303-baee-a37b6f938e4d",
"metadata": {},
"outputs": [],
"source": [
"# Load environment variables in a file called .env\n",
"\n",
"load_dotenv(override=True)\n",
"api_key = os.getenv('OPENAI_API_KEY')\n",
"\n",
"# Check the key\n",
"\n",
"if not api_key:\n",
" print(\"No API key was found - please head over to the troubleshooting notebook in this folder to identify & fix!\")\n",
"elif not api_key.startswith(\"sk-proj-\"):\n",
" print(\"An API key was found, but it doesn't start sk-proj-; please check you're using the right key - see troubleshooting notebook\")\n",
"elif api_key.strip() != api_key:\n",
" print(\"An API key was found, but it looks like it might have space or tab characters at the start or end - please remove them - see troubleshooting notebook\")\n",
"else:\n",
" print(\"API key found and looks good so far!\")\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "019974d9-f3ad-4a8a-b5f9-0a3719aea2d3",
"metadata": {},
"outputs": [],
"source": [
"openai = OpenAI()\n",
"\n",
"# If this doesn't work, try Kernel menu >> Restart Kernel and Clear Outputs Of All Cells, then run the cells from the top of this notebook down.\n",
"# If it STILL doesn't work (horrors!) then please see the Troubleshooting notebook in this folder for full instructions"
]
},
{
"cell_type": "markdown",
"id": "442fc84b-0815-4f40-99ab-d9a5da6bda91",
"metadata": {},
"source": [
"# Let's make a quick call to a Frontier model to get started, as a preview!"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a58394bf-1e45-46af-9bfd-01e24da6f49a",
"metadata": {},
"outputs": [],
"source": [
"# To give you a preview -- calling OpenAI with these messages is this easy. Any problems, head over to the Troubleshooting notebook.\n",
"\n",
"message = \"Hello, GPT! This is my first ever message to you! Hi!\"\n",
"response = openai.chat.completions.create(model=\"gpt-4o-mini\", messages=[{\"role\":\"user\", \"content\":message}])\n",
"print(response.choices[0].message.content)"
]
},
{
"cell_type": "markdown",
"id": "2aa190e5-cb31-456a-96cc-db109919cd78",
"metadata": {},
"source": [
"## OK onwards with our first project"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c5e793b2-6775-426a-a139-4848291d0463",
"metadata": {},
"outputs": [],
"source": [
"# A class to represent a Webpage\n",
"# If you're not familiar with Classes, check out the \"Intermediate Python\" notebook\n",
"\n",
"# Some websites need you to use proper headers when fetching them:\n",
"headers = {\n",
" \"User-Agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36\"\n",
"}\n",
"\n",
"class Website:\n",
"\n",
" def __init__(self, url):\n",
" \"\"\"\n",
" Create this Website object from the given url using the BeautifulSoup library\n",
" \"\"\"\n",
" self.url = url\n",
" response = requests.get(url, headers=headers)\n",
" soup = BeautifulSoup(response.content, 'html.parser')\n",
" self.title = soup.title.string if soup.title else \"No title found\"\n",
" for irrelevant in soup.body([\"script\", \"style\", \"img\", \"input\"]):\n",
" irrelevant.decompose()\n",
" self.text = soup.body.get_text(separator=\"\\n\", strip=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "2ef960cf-6dc2-4cda-afb3-b38be12f4c97",
"metadata": {},
"outputs": [],
"source": [
"# Let's try one out. Change the website and add print statements to follow along.\n",
"\n",
"ed = Website(\"https://edwarddonner.com\")\n",
"print(ed.title)\n",
"print(ed.text)"
]
},
{
"cell_type": "markdown",
"id": "6a478a0c-2c53-48ff-869c-4d08199931e1",
"metadata": {},
"source": [
"## Types of prompts\n",
"\n",
"You may know this already - but if not, you will get very familiar with it!\n",
"\n",
"Models like GPT4o have been trained to receive instructions in a particular way.\n",
"\n",
"They expect to receive:\n",
"\n",
"**A system prompt** that tells them what task they are performing and what tone they should use\n",
"\n",
"**A user prompt** -- the conversation starter that they should reply to"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "abdb8417-c5dc-44bc-9bee-2e059d162699",
"metadata": {},
"outputs": [],
"source": [
"# Define our system prompt - you can experiment with this later, changing the last sentence to 'Respond in markdown in Spanish.\"\n",
"\n",
"system_prompt = \"You are an assistant that analyzes the contents of a website \\\n",
"and provides a short summary, ignoring text that might be navigation related. \\\n",
"Respond in markdown.\""
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f0275b1b-7cfe-4f9d-abfa-7650d378da0c",
"metadata": {},
"outputs": [],
"source": [
"# A function that writes a User Prompt that asks for summaries of websites:\n",
"\n",
"def user_prompt_for(website):\n",
" user_prompt = f\"You are looking at a website titled {website.title}\"\n",
" user_prompt += \"\\nThe contents of this website is as follows; \\\n",
"please provide a short summary of this website in markdown. \\\n",
"If it includes news or announcements, then summarize these too.\\n\\n\"\n",
" user_prompt += website.text\n",
" return user_prompt"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "bcd09f4b-a2c4-4274-acec-b3b7c0ac883a",
"metadata": {},
"outputs": [],
"source": [
"system_prompt"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "26448ec4-5c00-4204-baec-7df91d11ff2e",
"metadata": {},
"outputs": [],
"source": [
"print(user_prompt_for(ed))"
]
},
{
"cell_type": "markdown",
"id": "ea211b5f-28e1-4a86-8e52-c0b7677cadcc",
"metadata": {},
"source": [
"## Messages\n",
"\n",
"The API from OpenAI expects to receive messages in a particular structure.\n",
"Many of the other APIs share this structure:\n",
"\n",
"```python\n",
"[\n",
" {\"role\": \"system\", \"content\": \"system message goes here\"},\n",
" {\"role\": \"user\", \"content\": \"user message goes here\"}\n",
"]\n",
"```\n",
"To give you a preview, the next 2 cells make a rather simple call - we won't stretch the mighty GPT (yet!)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f25dcd35-0cd0-4235-9f64-ac37ed9eaaa5",
"metadata": {},
"outputs": [],
"source": [
"messages = [\n",
" {\"role\": \"system\", \"content\": \"You are a snarky assistant\"},\n",
" {\"role\": \"user\", \"content\": \"What is 2 + 2?\"}\n",
"]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "21ed95c5-7001-47de-a36d-1d6673b403ce",
"metadata": {},
"outputs": [],
"source": [
"# To give you a preview -- calling OpenAI with system and user messages:\n",
"\n",
"response = openai.chat.completions.create(model=\"gpt-4o-mini\", messages=messages)\n",
"print(response.choices[0].message.content)"
]
},
{
"cell_type": "markdown",
"id": "d06e8d78-ce4c-4b05-aa8e-17050c82bb47",
"metadata": {},
"source": [
"## And now let's build useful messages for GPT-4o-mini, using a function"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0134dfa4-8299-48b5-b444-f2a8c3403c88",
"metadata": {},
"outputs": [],
"source": [
"# See how this function creates exactly the format above\n",
"\n",
"def messages_for(website):\n",
" return [\n",
" {\"role\": \"system\", \"content\": system_prompt},\n",
" {\"role\": \"user\", \"content\": user_prompt_for(website)}\n",
" ]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "36478464-39ee-485c-9f3f-6a4e458dbc9c",
"metadata": {},
"outputs": [],
"source": [
"# Try this out, and then try for a few more websites\n",
"\n",
"messages_for(ed)"
]
},
{
"cell_type": "markdown",
"id": "16f49d46-bf55-4c3e-928f-68fc0bf715b0",
"metadata": {},
"source": [
"## Time to bring it together - the API for OpenAI is very simple!"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "905b9919-aba7-45b5-ae65-81b3d1d78e34",
"metadata": {},
"outputs": [],
"source": [
"# And now: call the OpenAI API. You will get very familiar with this!\n",
"\n",
"def summarize(url):\n",
" website = Website(url)\n",
" response = openai.chat.completions.create(\n",
" model = \"gpt-4o-mini\",\n",
" messages = messages_for(website)\n",
" )\n",
" return response.choices[0].message.content"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "05e38d41-dfa4-4b20-9c96-c46ea75d9fb5",
"metadata": {},
"outputs": [],
"source": [
"summarize(\"https://edwarddonner.com\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3d926d59-450e-4609-92ba-2d6f244f1342",
"metadata": {},
"outputs": [],
"source": [
"# A function to display this nicely in the Jupyter output, using markdown\n",
"\n",
"def display_summary(url):\n",
" summary = summarize(url)\n",
" display(Markdown(summary))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3018853a-445f-41ff-9560-d925d1774b2f",
"metadata": {},
"outputs": [],
"source": [
"display_summary(\"https://edwarddonner.com\")"
]
},
{
"cell_type": "markdown",
"id": "b3bcf6f4-adce-45e9-97ad-d9a5d7a3a624",
"metadata": {},
"source": [
"# Let's try more websites\n",
"\n",
"Note that this will only work on websites that can be scraped using this simplistic approach.\n",
"\n",
"Websites that are rendered with Javascript, like React apps, won't show up. See the community-contributions folder for a Selenium implementation that gets around this. You'll need to read up on installing Selenium (ask ChatGPT!)\n",
"\n",
"Also Websites protected with CloudFront (and similar) may give 403 errors - many thanks Andy J for pointing this out.\n",
"\n",
"But many websites will work just fine!"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "45d83403-a24c-44b5-84ac-961449b4008f",
"metadata": {},
"outputs": [],
"source": [
"display_summary(\"https://cnn.com\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "75e9fd40-b354-4341-991e-863ef2e59db7",
"metadata": {},
"outputs": [],
"source": [
"display_summary(\"https://anthropic.com\")"
]
},
{
"cell_type": "markdown",
"id": "c951be1a-7f1b-448f-af1f-845978e47e2c",
"metadata": {},
"source": [
"<table style=\"margin: 0; text-align: left;\">\n",
" <tr>\n",
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
" <img src=\"../business.jpg\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
" </td>\n",
" <td>\n",
" <h2 style=\"color:#181;\">Business applications</h2>\n",
" <span style=\"color:#181;\">In this exercise, you experienced calling the Cloud API of a Frontier Model (a leading model at the frontier of AI) for the first time. We will be using APIs like OpenAI at many stages in the course, in addition to building our own LLMs.\n",
"\n",
"More specifically, we've applied this to Summarization - a classic Gen AI use case to make a summary. This can be applied to any business vertical - summarizing the news, summarizing financial performance, summarizing a resume in a cover letter - the applications are limitless. Consider how you could apply Summarization in your business, and try prototyping a solution.</span>\n",
" </td>\n",
" </tr>\n",
"</table>\n",
"\n",
"<table style=\"margin: 0; text-align: left;\">\n",
" <tr>\n",
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
" <img src=\"../important.jpg\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
" </td>\n",
" <td>\n",
" <h2 style=\"color:#900;\">Before you continue - now try yourself</h2>\n",
" <span style=\"color:#900;\">Use the cell below to make your own simple commercial example. Stick with the summarization use case for now. Here's an idea: write something that will take the contents of an email, and will suggest an appropriate short subject line for the email. That's the kind of feature that might be built into a commercial email tool.</span>\n",
" </td>\n",
" </tr>\n",
"</table>"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "00743dac-0e70-45b7-879a-d7293a6f68a6",
"metadata": {},
"outputs": [],
"source": [
"# Step 1: Create your prompts\n",
"\n",
"system_prompt = \"You are an assistant that analyzes the data of an email and suggest appropriate subject line for the email\"\n",
"user_prompt = \"\"\"\n",
" Read the email below and suggest a short subject line for the email\n",
" Dear John\n",
"I hope this email finds you well.\n",
"I am writing to request a meeting to discuss the timeline for Project X. I believe a discussion would be beneficial to ensure alignment on deliverables and milestones.\n",
"Would you be available for a 30-minute meeting on Thursday, August 14th at 2:00 PM PDT, or Friday, August 15th at 10:00 AM PDT? Please let me know if either of these times work for you, or feel free to suggest an alternative time that better suits your schedule.\n",
"Thank you for your time and consideration. I look forward to hearing from you soon.\n",
"Best regards,\n",
"Scott\n",
"\"\"\"\n",
"\n",
"# Step 2: Make the messages list\n",
"\n",
"messages = [ {\"role\": \"system\", \"content\": system_prompt},\n",
" {\"role\": \"user\", \"content\": user_prompt}] # fill this in\n",
"\n",
"# Step 3: Call OpenAI\n",
"\n",
"response = openai.chat.completions.create(model=\"gpt-4o-mini\", messages=messages)\n",
"\n",
"# Step 4: print the result\n",
"\n",
"print(response.choices[0].message.content)"
]
},
{
"cell_type": "markdown",
"id": "36ed9f14-b349-40e9-a42c-b367e77f8bda",
"metadata": {},
"source": [
"## An extra exercise for those who enjoy web scraping\n",
"\n",
"You may notice that if you try `display_summary(\"https://openai.com\")` - it doesn't work! That's because OpenAI has a fancy website that uses Javascript. There are many ways around this that some of you might be familiar with. For example, Selenium is a hugely popular framework that runs a browser behind the scenes, renders the page, and allows you to query it. If you have experience with Selenium, Playwright or similar, then feel free to improve the Website class to use them. In the community-contributions folder, you'll find an example Selenium solution from a student (thank you!)"
]
},
{
"cell_type": "markdown",
"id": "eeab24dc-5f90-4570-b542-b0585aca3eb6",
"metadata": {},
"source": [
"# Sharing your code\n",
"\n",
"I'd love it if you share your code afterwards so I can share it with others! You'll notice that some students have already made changes (including a Selenium implementation) which you will find in the community-contributions folder. If you'd like add your changes to that folder, submit a Pull Request with your new versions in that folder and I'll merge your changes.\n",
"\n",
"If you're not an expert with git (and I am not!) then GPT has given some nice instructions on how to submit a Pull Request. It's a bit of an involved process, but once you've done it once it's pretty clear. As a pro-tip: it's best if you clear the outputs of your Jupyter notebooks (Edit >> Clean outputs of all cells, and then Save) for clean notebooks.\n",
"\n",
"Here are good instructions courtesy of an AI friend: \n",
"https://chatgpt.com/share/677a9cb5-c64c-8012-99e0-e06e88afd293"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f4484fcf-8b39-4c3f-9674-37970ed71988",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.13"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@@ -0,0 +1,115 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "4e2a9393-7767-488e-a8bf-27c12dca35bd",
"metadata": {},
"outputs": [],
"source": [
"# imports\n",
"\n",
"import os\n",
"from dotenv import load_dotenv\n",
"from openai import OpenAI\n",
"\n",
"# If you get an error running this cell, then please head over to the troubleshooting notebook!"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7b87cadb-d513-4303-baee-a37b6f938e4d",
"metadata": {},
"outputs": [],
"source": [
"# Load environment variables in a file called .env\n",
"\n",
"load_dotenv(override=True)\n",
"api_key = os.getenv('OPENAI_API_KEY')\n",
"\n",
"# Check the key\n",
"\n",
"if not api_key:\n",
" print(\"No API key was found - please head over to the troubleshooting notebook in this folder to identify & fix!\")\n",
"elif not api_key.startswith(\"sk-proj-\"):\n",
" print(\"An API key was found, but it doesn't start sk-proj-; please check you're using the right key - see troubleshooting notebook\")\n",
"elif api_key.strip() != api_key:\n",
" print(\"An API key was found, but it looks like it might have space or tab characters at the start or end - please remove them - see troubleshooting notebook\")\n",
"else:\n",
" print(\"API key found and looks good so far!\")\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "019974d9-f3ad-4a8a-b5f9-0a3719aea2d3",
"metadata": {},
"outputs": [],
"source": [
"openai = OpenAI()\n",
"\n",
"# If this doesn't work, try Kernel menu >> Restart Kernel and Clear Outputs Of All Cells, then run the cells from the top of this notebook down.\n",
"# If it STILL doesn't work (horrors!) then please see the Troubleshooting notebook in this folder for full instructions"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "00743dac-0e70-45b7-879a-d7293a6f68a6",
"metadata": {},
"outputs": [],
"source": [
"# Step 1: Create your prompts\n",
"\n",
"system_prompt = \"Eres un analista acostumbrado a trabajar con correos electrónicos que contiene un gran conocimiento sobre la mejor manera de resumir contenido releveante \\\n",
"dejando de lado cualquier información que no despierte interés o no sea el tema principal del correo. Tu función será leer contenido de correos y definir un listado de las 3 mejores opciones con el formato: Opción *numero de la opción*: *sujeto* Motivo: *que palabras clave dentro del texto has utilizado para llegar a esa conclusion y la relación semántica con tu idea\"\n",
"user_prompt = \"\"\"\n",
"Tengo un correo que le quiero enviar a mi profesor pero no se muy bien como llamarlo, ayudame. El correo es el siguiente:\n",
"Hola profe,\n",
"Ultimamente estoy disfrutando mucho sus clases y la información que presenta me parece muy importante. Este fin de semana me voy de vacaciones y no podré\n",
"ir a sus clases la semana que viene. Me gustaría si pudiera pasarme los pdfs de la siguiente semana para echarle un vistazo por mi cuenta durante mi ausencia en Francia.\n",
"\n",
"Un saludo,\n",
"Daniel.\n",
"\"\"\"\n",
"\n",
"# Step 2: Make the messages list\n",
"\n",
"messages = [{\"role\" : \"system\" , \"content\": system_prompt},\n",
" {\"role\": \"user\", \"content\": user_prompt}]\n",
"\n",
"# Step 3: Call OpenAI\n",
"\n",
"response = openai.chat.completions.create( \n",
" model = \"gpt-4o-mini\",\n",
" messages = messages)\n",
"\n",
"# Step 4: print the result\n",
"\n",
"print(response.choices[0].message.content)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.13"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@@ -0,0 +1,278 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 23,
"id": "5cbb8ddf-bc86-4da0-96eb-b4971b9bf3a3",
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"from bs4 import BeautifulSoup\n",
"from openai import OpenAI\n",
"from dotenv import load_dotenv\n",
"load_dotenv()\n",
"import requests\n",
"from IPython.display import Markdown"
]
},
{
"cell_type": "code",
"execution_count": 26,
"id": "f76bea4c-95cf-47ae-9236-75e866320470",
"metadata": {},
"outputs": [],
"source": [
"headers = {\n",
" \"User-Agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36\"\n",
"}\n",
"\n",
"class Website:\n",
" def __init__(self,url):\n",
" self.url = url\n",
" response = requests.get(self.url, headers)\n",
" soup = BeautifulSoup(response.content, 'html.parser')\n",
" self.title = soup.title.string if soup.title else \"No title found\"\n",
" self.text = soup.get_text(separator=\" \").lower() "
]
},
{
"cell_type": "code",
"execution_count": 27,
"id": "d0ed56fb-af44-42ad-9235-c588ca40edc8",
"metadata": {},
"outputs": [],
"source": [
"job_search = Website(\"https://www.google.com/about/careers/applications/jobs\")"
]
},
{
"cell_type": "code",
"execution_count": 28,
"id": "38468bd0-5d95-4944-b371-107300495ebf",
"metadata": {},
"outputs": [],
"source": [
"system_prompt = \"You are a job search assistant who finds real-time DevOps-related job listings from career pages, job boards, and developer platforms. Return results with job title, company name, and a link to the listing. Focus on DevOps, SRE, Platform Engineering, and CI/CD tooling roles.\"\n",
"user_prompt = f\"\"\"\n",
"Here is a list of job postings:\n",
"\n",
"{job_search.text}\n",
"\n",
"Please extract only the jobs that are clearly related to:\n",
"- DevOps\n",
"- Site Reliability Engineering (SRE)\n",
"- Platform Engineering\n",
"- CI/CD or Infrastructure\n",
"\n",
"Exclude roles like sales, instructors, analysts, and anything not related to DevOps tools.\n",
"\n",
"For each DevOps-related job, return:\n",
"- Job Title\n",
"- Company\n",
"- Location\n",
"- Years of Experience\n",
"- Skill set required\n",
"- (if available) Whether it's remote\n",
"\"\"\"\n"
]
},
{
"cell_type": "code",
"execution_count": 29,
"id": "927af617-0d87-48de-ac0a-751900b4a495",
"metadata": {},
"outputs": [],
"source": [
"messages = [\n",
" { \"role\": \"system\", \"content\": system_prompt },\n",
" { \"role\": \"user\", \"content\": user_prompt }\n",
"]\n",
"\n",
"openai = OpenAI(\n",
" api_key=os.getenv('GROQ_API_KEY'),\n",
" base_url=\"https://api.groq.com/openai/v1\" \n",
" )\n",
"response = openai.chat.completions.create(\n",
" model = \"gemma2-9b-it\",\n",
" messages = messages\n",
" )"
]
},
{
"cell_type": "code",
"execution_count": 30,
"id": "2ded75af-ba51-4e21-a581-d8da82439e2e",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Here are the DevOps-related jobs from your provided Google Careers listing:\n",
"\n",
"**1. Technical Lead Manager, Google Notifications** \n",
"\n",
"* **Company:** Google \n",
"* **Location:** Tel Aviv, Israel\n",
"* **Years of Experience:** 8+ years\n",
"* **Skillset:** 8+ years of software development experience, 5+ years of experience testing, launching software products, 3+ years of experience with software design and architecture, experience in a leadership role or people management.\n",
"\n",
" **2. Senior System Software Engineer, Embedded Systems, Firmware, Pixel**\n",
"\n",
"* **Company:** Google \n",
"* **Location:** New Taipei, Banqiao District, New Taipei City, Taiwan\n",
"* **Years of Experience:** 5+ years\n",
"* **Skillset:** 5+ years of experience with software development in C, Android BSP and Linux drivers, 3+ years of experience testing, maintaining, or launching software products, 1+ years of experience with software design and architecture, 3+ years of experience working with embedded operating systems.\n",
"\n",
" **3. Senior Technical Program Manager, Infrastructure Deployment Software**\n",
"\n",
"* **Company:** Google \n",
"* **Location:** Sunnyvale, CA, USA\n",
"* **Years of Experience:** 8+ years\n",
"* **Skillset:** Bachelors Degree in a relevant field, 8+ years of program management experience, 8+ years of infrastructure experience.\n",
"\n",
"**4. Network Engineer**\n",
"\n",
"* **Company:** Google\n",
"* **Location:** Bengaluru, Karnataka, India\n",
"* **Years of Experience:** 3+ years\n",
"* **Skillset:** Bachelors Degree in Computer Science, Engineering, a related field, or equivalent practical experience; 3+ years of experience with network routing protocols, design and troubleshooting, with network equipment providers; Experience with network operations at scale; broad understanding of cloud computing, Linux server environments, network design and deployment, managing large scale infrastructure; experience working in a customer-facing role at an ISP, carrier, content network, or CDN; experience programming in either Python or Go.\n",
"\n",
"**5. Field Activation Lead, Google Cloud**\n",
"\n",
"* **Company:** Google \n",
"* **Location:** Warsaw, Poland\n",
"* **Years of Experience:** 5+ years\n",
"* **Skillset:** Bachelor's degree or equivalent practical experience; 5+ years of experience in program or project management in an enterprise, cloud, or technology environment; experience working with executive-level clients or stakeholders.\n",
"\n",
"**6. Software Engineer III, Infrastructure, Google Cloud Business Platforms**\n",
"* **Company:** Google \n",
"* **Location:** Cambridge, MA, USA\n",
"* **Years of Experience:** 2+ years \n",
"* **Skillset:** Bachelors degree or equivalent practical experience; 2 years of experience with software development in one or more programming languages (C, C++, Python, or Go), or 1 year of experience with an advanced degree; 2 years of experience with developing large-scale infrastructure, distributed systems or networks, or experience with compute technologies, storage or hardware architecture.\n",
"\n",
" **7. Networking Performance Modeling Architect**\n",
"\n",
"* **Company:** Google\n",
"* **Location:** Tel Aviv, Israel; Haifa, Israel\n",
"* **Years of Experience:** 5+ years\n",
"* **Skillset:** Bachelors degree in Electrical engineering, computer engineering, Computer Science, or equivalent practical experience; 5 years of experience with software development in C++, and data structures/algorithms; experience in performance modeling, performance analysis, and workload characterization.\n",
"\n",
"\n",
"\n",
"Let me know if you'd like me to search for more specific roles or on other platforms!\n",
"\n"
]
}
],
"source": [
"print(response.choices[0].message.content)"
]
},
{
"cell_type": "code",
"execution_count": 31,
"id": "6b8bd531-c537-4792-a450-8c06e035172d",
"metadata": {},
"outputs": [
{
"data": {
"text/markdown": [
"Here are the DevOps-related jobs from your provided Google Careers listing:\n",
"\n",
"**1. Technical Lead Manager, Google Notifications** \n",
"\n",
"* **Company:** Google \n",
"* **Location:** Tel Aviv, Israel\n",
"* **Years of Experience:** 8+ years\n",
"* **Skillset:** 8+ years of software development experience, 5+ years of experience testing, launching software products, 3+ years of experience with software design and architecture, experience in a leadership role or people management.\n",
"\n",
" **2. Senior System Software Engineer, Embedded Systems, Firmware, Pixel**\n",
"\n",
"* **Company:** Google \n",
"* **Location:** New Taipei, Banqiao District, New Taipei City, Taiwan\n",
"* **Years of Experience:** 5+ years\n",
"* **Skillset:** 5+ years of experience with software development in C, Android BSP and Linux drivers, 3+ years of experience testing, maintaining, or launching software products, 1+ years of experience with software design and architecture, 3+ years of experience working with embedded operating systems.\n",
"\n",
" **3. Senior Technical Program Manager, Infrastructure Deployment Software**\n",
"\n",
"* **Company:** Google \n",
"* **Location:** Sunnyvale, CA, USA\n",
"* **Years of Experience:** 8+ years\n",
"* **Skillset:** Bachelors Degree in a relevant field, 8+ years of program management experience, 8+ years of infrastructure experience.\n",
"\n",
"**4. Network Engineer**\n",
"\n",
"* **Company:** Google\n",
"* **Location:** Bengaluru, Karnataka, India\n",
"* **Years of Experience:** 3+ years\n",
"* **Skillset:** Bachelors Degree in Computer Science, Engineering, a related field, or equivalent practical experience; 3+ years of experience with network routing protocols, design and troubleshooting, with network equipment providers; Experience with network operations at scale; broad understanding of cloud computing, Linux server environments, network design and deployment, managing large scale infrastructure; experience working in a customer-facing role at an ISP, carrier, content network, or CDN; experience programming in either Python or Go.\n",
"\n",
"**5. Field Activation Lead, Google Cloud**\n",
"\n",
"* **Company:** Google \n",
"* **Location:** Warsaw, Poland\n",
"* **Years of Experience:** 5+ years\n",
"* **Skillset:** Bachelor's degree or equivalent practical experience; 5+ years of experience in program or project management in an enterprise, cloud, or technology environment; experience working with executive-level clients or stakeholders.\n",
"\n",
"**6. Software Engineer III, Infrastructure, Google Cloud Business Platforms**\n",
"* **Company:** Google \n",
"* **Location:** Cambridge, MA, USA\n",
"* **Years of Experience:** 2+ years \n",
"* **Skillset:** Bachelors degree or equivalent practical experience; 2 years of experience with software development in one or more programming languages (C, C++, Python, or Go), or 1 year of experience with an advanced degree; 2 years of experience with developing large-scale infrastructure, distributed systems or networks, or experience with compute technologies, storage or hardware architecture.\n",
"\n",
" **7. Networking Performance Modeling Architect**\n",
"\n",
"* **Company:** Google\n",
"* **Location:** Tel Aviv, Israel; Haifa, Israel\n",
"* **Years of Experience:** 5+ years\n",
"* **Skillset:** Bachelors degree in Electrical engineering, computer engineering, Computer Science, or equivalent practical experience; 5 years of experience with software development in C++, and data structures/algorithms; experience in performance modeling, performance analysis, and workload characterization.\n",
"\n",
"\n",
"\n",
"Let me know if you'd like me to search for more specific roles or on other platforms!\n"
],
"text/plain": [
"<IPython.core.display.Markdown object>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"display(Markdown(response.choices[0].message.content))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c6bd6d14-e0df-45be-99e2-55aa4d96f53b",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.13"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@@ -0,0 +1,202 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "5c527a13-459e-4a46-b00e-f2c5056de155",
"metadata": {},
"source": [
"# Research Paper Summarizer with Text Highlighting"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "861a0be5-6da7-4f66-8f82-bc083a913f9f",
"metadata": {},
"outputs": [],
"source": [
"# imports\n",
"\n",
"import os\n",
"import requests\n",
"from dotenv import load_dotenv\n",
"from bs4 import BeautifulSoup\n",
"from IPython.display import Markdown, display\n",
"from openai import OpenAI"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "74bf6765-53b6-457b-ac2d-0d1afa7fbf8f",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"API key found and looks good so far!\n"
]
}
],
"source": [
"# Load environment variables in a file called .env\n",
"\n",
"load_dotenv(override=True)\n",
"api_key = os.getenv('OPENAI_API_KEY')\n",
"\n",
"# Check the key\n",
"\n",
"if not api_key:\n",
" print(\"No API key was found - please head over to the troubleshooting notebook in this folder to identify & fix!\")\n",
"elif not api_key.startswith(\"sk-proj-\"):\n",
" print(\"An API key was found, but it doesn't start sk-proj-; please check you're using the right key - see troubleshooting notebook\")\n",
"elif api_key.strip() != api_key:\n",
" print(\"An API key was found, but it looks like it might have space or tab characters at the start or end - please remove them - see troubleshooting notebook\")\n",
"else:\n",
" print(\"API key found and looks good so far!\")\n"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "227ed7af-d539-4c87-988b-80e6e049c863",
"metadata": {},
"outputs": [],
"source": [
"openai = OpenAI()\n",
"\n",
"# If this doesn't work, try Kernel menu >> Restart Kernel and Clear Outputs Of All Cells, then run the cells from the top of this notebook down.\n",
"# If it STILL doesn't work (horrors!) then please see the Troubleshooting notebook in this folder for full instructions"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "dcaadf8b-456d-48ca-af9d-9f57d3414308",
"metadata": {},
"outputs": [],
"source": [
"# A class to represent a Webpage\n",
"# If you're not familiar with Classes, check out the \"Intermediate Python\" notebook\n",
"\n",
"# Some websites need you to use proper headers when fetching them:\n",
"headers = {\n",
" \"User-Agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36\"\n",
"}\n",
"\n",
"class Website:\n",
"\n",
" def __init__(self, url):\n",
" \"\"\"\n",
" Create this Website object from the given url using the BeautifulSoup library\n",
" \"\"\"\n",
" self.url = url\n",
" response = requests.get(url, headers=headers)\n",
" soup = BeautifulSoup(response.content, 'html.parser')\n",
" self.title = soup.title.string if soup.title else \"No title found\"\n",
" for irrelevant in soup.body([\"script\", \"style\", \"img\", \"input\"]):\n",
" irrelevant.decompose()\n",
" self.text = soup.body.get_text(separator=\"\\n\", strip=True)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "6315093f-be68-408e-a5e1-6a2e4ea675e8",
"metadata": {},
"outputs": [],
"source": [
"def user_prompt_for(website):\n",
" user_prompt = f\"You are looking at an article website titled {website.title}\"\n",
" user_prompt += \"\\nThe contents of this website is as follows; \\\n",
"please provide a short summary of this website in markdown. \\\n",
"I'm also looking for complete statements containing the following keywords (if found): \\\n",
"'large circuit model', 'ChipGPT' \\n\\n\"\n",
" user_prompt += website.text\n",
" return user_prompt\n",
"\n",
"\n",
"article = Website(\"https://arxiv.org/html/2401.12224v1\")\n",
"# print(user_prompt_for(article))"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "ff8a4112-f118-4866-b6cf-82675de0a38d",
"metadata": {},
"outputs": [],
"source": [
"system_prompt = \"You are an assistant that analyzes the contents of a scientific \\\n",
"article for a PhD student (who has to read a lot of papers and journals). The \\\n",
"user will provide the article website and keyword(s) they are looking to learn and \\\n",
"cite from. Your job is to summarize the paper and point out all the statements \\\n",
"containing the specific keyword(s) the user typed. \\\n",
"Respond in markdown.\"\n",
"\n",
"\n",
"def messages_for(website):\n",
" return [\n",
" {\"role\": \"system\", \"content\": system_prompt},\n",
" {\"role\": \"user\", \"content\": user_prompt_for(website)}\n",
" ]\n",
"\n",
" \n",
"#messages_for(article)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "b5e47bea-403d-48c3-ab9d-4d6adef83241",
"metadata": {},
"outputs": [],
"source": [
"def summarize(url):\n",
" website = Website(url)\n",
" response = openai.chat.completions.create(\n",
" model = \"gpt-4o-mini\",\n",
" messages = messages_for(website)\n",
" )\n",
" return response.choices[0].message.content\n",
"\n",
"\n",
"def display_summary(url):\n",
" summary = summarize(url)\n",
" display(Markdown(summary))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9f6ac1bc-5bc8-4daa-8174-d201400e517a",
"metadata": {},
"outputs": [],
"source": [
"display_summary(\"https://arxiv.org/html/2401.12224v1\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.13"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@@ -0,0 +1,130 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "4e2a9393-7767-488e-a8bf-27c12dca35bd",
"metadata": {},
"outputs": [],
"source": [
"# imports\n",
"\n",
"import os\n",
"import requests\n",
"from dotenv import load_dotenv\n",
"from bs4 import BeautifulSoup\n",
"from IPython.display import Markdown, display\n",
"from openai import OpenAI\n",
"\n",
"# If you get an error running this cell, then please head over to the troubleshooting notebook!"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7b87cadb-d513-4303-baee-a37b6f938e4d",
"metadata": {},
"outputs": [],
"source": [
"# Load environment variables in a file called .env\n",
"\n",
"load_dotenv(override=True)\n",
"api_key = os.getenv('OPENAI_API_KEY')\n",
"\n",
"# Check the key\n",
"\n",
"if not api_key:\n",
" print(\"No API key was found - please head over to the troubleshooting notebook in this folder to identify & fix!\")\n",
"elif not api_key.startswith(\"sk-proj-\"):\n",
" print(\"An API key was found, but it doesn't start sk-proj-; please check you're using the right key - see troubleshooting notebook\")\n",
"elif api_key.strip() != api_key:\n",
" print(\"An API key was found, but it looks like it might have space or tab characters at the start or end - please remove them - see troubleshooting notebook\")\n",
"else:\n",
" print(\"API key found and looks good so far!\")\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "019974d9-f3ad-4a8a-b5f9-0a3719aea2d3",
"metadata": {},
"outputs": [],
"source": [
"openai = OpenAI()\n",
"\n",
"# If this doesn't work, try Kernel menu >> Restart Kernel and Clear Outputs Of All Cells, then run the cells from the top of this notebook down.\n",
"# If it STILL doesn't work (horrors!) then please see the Troubleshooting notebook in this folder for full instructions"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f4484fcf-8b39-4c3f-9674-37970ed71988",
"metadata": {},
"outputs": [],
"source": [
"# Step 1: Create your prompts\n",
"\n",
"system_prompt = f\"\"\"\n",
" You are an assistant that will analyze the ratings & reviews from :\\n\\n{reviews_text}\\n\\n and comeup with a summary of how many 5,4,3,2,1 star rating the restuarnat has. \n",
" You will also come up with a summary of the reviews showing what the customers love about the restaurant and what they dont like. Also extract the name of the restaurant,\n",
" the location and the cuisine. Respond in markdown\"\"\"\n",
"user_prompt = \"This is the summary for the restaurant: \"\n",
"\n",
"# Step 2: Make the messages list\n",
"\n",
"messages = [\n",
" {\"role\": \"system\", \"content\": system_prompt},\n",
" {\"role\": \"user\", \"content\": user_prompt}\n",
"] \n",
"\n",
"def generate_review_summary(reviews_text):\n",
" response = openai.chat.completions.create(\n",
" model = \"gpt-4o-mini\",\n",
" messages = messages\n",
" )\n",
" return response.choices[0].message.content\n",
"\n",
"try:\n",
" with open('restaurant_reviews.txt', 'r') as file:\n",
" reviews_text = file.read()\n",
" \n",
" # Generate review summary\n",
" summary = generate_review_summary(reviews_text)\n",
" display(Markdown(summary))\n",
"\n",
"except FileNotFoundError:\n",
" print(\"The specified reviews file was not found. Please ensure 'restaurant_reviews.txt' is in the correct directory.\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3eccbf35-0a0b-4a1b-b493-aa5c342109cc",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.13"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@@ -0,0 +1,260 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "2588fbba",
"metadata": {},
"source": [
"# Website Analysis and Summarization with Selenium and OpenAI\n",
"\n",
"> This notebook demonstrates how to extract and summarize the main content of any website using Selenium for dynamic extraction and OpenAI for generating concise summaries in Mexican Spanish.\n",
"\n",
"## Overview\n",
"This notebook provides a workflow to automatically analyze websites, extract relevant text, and generate a short summary using a language model. Navigation elements are ignored, focusing on news, announcements, and main content.\n",
"\n",
"## Features\n",
"- Extracts relevant text from web pages using Selenium and BeautifulSoup.\n",
"- Generates automatic summaries using OpenAI's language models.\n",
"- Presents results in markdown format.\n",
"\n",
"## Requirements\n",
"- Python 3.8+\n",
"- Google Chrome browser installed\n",
"- The following Python packages:\n",
" - selenium\n",
" - webdriver-manager\n",
" - beautifulsoup4\n",
" - openai\n",
" - python-dotenv\n",
" - requests\n",
"- An OpenAI API key (project key, starting with `sk-proj-`)\n",
"- Internet connection\n",
"\n",
"## How to Use\n",
"1. Install the required packages:\n",
" ```bash\n",
" pip install selenium webdriver-manager undetected-chromedriver beautifulsoup4 openai python-dotenv requests\n",
" ```\n",
"2. Add your OpenAI API key to a `.env` file as `OPENAI_API_KEY`.\n",
"3. Run the notebook cells in order. You can change the target website URL in the code to analyze different sites.\n",
"4. The summary will be displayed in markdown format below the code cell.\n",
"\n",
"**Note:** Some websites may block automated access. The notebook includes options to simulate a real user and avoid bot detection, but results may vary depending on the site's protections.\n",
"\n",
"---"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "dc7c2ade",
"metadata": {},
"outputs": [],
"source": [
"# Imports\n",
"import os\n",
"import requests\n",
"from dotenv import load_dotenv\n",
"from bs4 import BeautifulSoup\n",
"from IPython.display import Markdown, display\n",
"from openai import OpenAI\n",
"\n",
"from selenium import webdriver\n",
"from selenium.webdriver.chrome.service import Service\n",
"from selenium.webdriver.common.by import By\n",
"from selenium.webdriver.chrome.options import Options\n",
"from selenium.webdriver.support.ui import WebDriverWait\n",
"from selenium.webdriver.support import expected_conditions as EC\n",
"from webdriver_manager.chrome import ChromeDriverManager\n",
"import undetected_chromedriver as uc"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a2d21987",
"metadata": {},
"outputs": [],
"source": [
"# Load the environment variables from .env\n",
"load_dotenv(override=True)\n",
"api_key = os.getenv('OPENAI_API_KEY')\n",
"\n",
"# Check the key\n",
"\n",
"if not api_key:\n",
" print(\"No API key was found - please head over to the troubleshooting notebook in this folder to identify & fix!\")\n",
"elif not api_key.startswith(\"sk-proj-\"):\n",
" print(\"An API key was found, but it doesn't start sk-proj-; please check you're using the right key - see troubleshooting notebook\")\n",
"elif api_key.strip() != api_key:\n",
" print(\"An API key was found, but it looks like it might have space or tab characters at the start or end - please remove them - see troubleshooting notebook\")\n",
"else:\n",
" print(\"API key found and looks good so far!\")\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "bbb3a8ed",
"metadata": {},
"outputs": [],
"source": [
"openai = OpenAI()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5313aa64",
"metadata": {},
"outputs": [],
"source": [
"class Website:\n",
" def __init__(self, url, headless=True, wait_time=10):\n",
" self.url = url # Website URL to analyze\n",
" self.title = None # Title of the website\n",
" self.text = None # Extracted text from the website\n",
" \n",
" # Chrome options configuration for Selenium\n",
" options = Options()\n",
" if headless:\n",
" options.add_argument(\"--headless=new\") # Run Chrome in headless mode (no window)\n",
" options.add_argument(\"--disable-gpu\") # Disable GPU acceleration\n",
" options.add_argument(\"--no-sandbox\") # Disable Chrome sandbox (required for some environments)\n",
" options.add_argument(\"--window-size=1920,1080\") # Set window size to simulate a real user\n",
" # Simulate a real user-agent to avoid bot detection\n",
" options.add_argument(\"user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36\")\n",
" \n",
" # Initialize Chrome WebDriver\n",
" self.driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)\n",
" self.driver.get(url) # Open the URL in the browser\n",
" \n",
" try:\n",
" # Wait until the <body> element is present in the page\n",
" WebDriverWait(self.driver, wait_time).until(EC.presence_of_element_located((By.TAG_NAME, \"body\")))\n",
" html = self.driver.page_source # Get the full HTML of the page\n",
" soup = BeautifulSoup(html, 'html.parser') # Parse HTML with BeautifulSoup\n",
" self.title = soup.title.string if soup.title else 'No title found' # Extract the title\n",
" if soup.body:\n",
" # Remove irrelevant elements from the body\n",
" for irrelevant in soup.body([\"script\", \"style\", \"img\", \"input\"]):\n",
" irrelevant.decompose()\n",
" # Extract clean text from the body\n",
" self.text = soup.body.get_text(separator='\\n', strip=True)\n",
" else:\n",
" self.text = \"No body found\" # If no body is found, indicate it\n",
" except Exception as e:\n",
" print(f\"Error accessing the site: {e}\") # Print error to console\n",
" self.text = \"Error accessing the site\" # Store error in the attribute\n",
" finally:\n",
" self.driver.quit() # Always close the browser, whether or not an error occurred"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e902c6b2",
"metadata": {},
"outputs": [],
"source": [
"system_prompt = \"You are an assistant that analyzes the contents of a website \\\n",
"and provides a short summary, ignoring text that might be navigation related. \\\n",
"Respond in markdown in Mexican Spanish.\""
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "eaee8f36",
"metadata": {},
"outputs": [],
"source": [
"# A function that writes a User Prompt that asks for summaries of websites:\n",
"\n",
"def user_prompt_for(website):\n",
" user_prompt = f\"You are looking at a website titled {website.title}\"\n",
" user_prompt += \"\\nThe contents of this website is as follows; \\\n",
"please provide a short summary of this website in markdown. \\\n",
"If it includes news or announcements, then summarize these too.\\n\\n\"\n",
" user_prompt += website.text\n",
" return user_prompt"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9ac4ed8b",
"metadata": {},
"outputs": [],
"source": [
"# Creates messages for the OpenAI API\n",
"def messages_for(website):\n",
" return [\n",
" {\"role\": \"system\", \"content\": system_prompt},\n",
" {\"role\": \"user\", \"content\": user_prompt_for(website)}\n",
" ]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1536d537",
"metadata": {},
"outputs": [],
"source": [
"# Creates a summary for the given URL\n",
"def summarize(url):\n",
" website = Website(url)\n",
" response = openai.chat.completions.create(\n",
" model = \"gpt-4o-mini\",\n",
" messages = messages_for(website)\n",
" )\n",
" return response.choices[0].message.content"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "fe135339",
"metadata": {},
"outputs": [],
"source": [
"# Shows the summary for the given URL\n",
"def display_summary(url):\n",
" summary = summarize(url)\n",
" display(Markdown(summary))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a301ab4e",
"metadata": {},
"outputs": [],
"source": [
"display_summary(\"https://openai.com/\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.9"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@@ -0,0 +1,817 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "d15d8294-3328-4e07-ad16-8a03e9bbfdb9",
"metadata": {},
"source": [
"# YOUR FIRST LAB\n",
"### Please read this section. This is valuable to get you prepared, even if it's a long read -- it's important stuff.\n",
"\n",
"## Your first Frontier LLM Project\n",
"\n",
"Let's build a useful LLM solution - in a matter of minutes.\n",
"\n",
"By the end of this course, you will have built an autonomous Agentic AI solution with 7 agents that collaborate to solve a business problem. All in good time! We will start with something smaller...\n",
"\n",
"Our goal is to code a new kind of Web Browser. Give it a URL, and it will respond with a summary. The Reader's Digest of the internet!!\n",
"\n",
"Before starting, you should have completed the setup for [PC](../SETUP-PC.md) or [Mac](../SETUP-mac.md) and you hopefully launched this jupyter lab from within the project root directory, with your environment activated.\n",
"\n",
"## If you're new to Jupyter Lab\n",
"\n",
"Welcome to the wonderful world of Data Science experimentation! Once you've used Jupyter Lab, you'll wonder how you ever lived without it. Simply click in each \"cell\" with code in it, such as the cell immediately below this text, and hit Shift+Return to execute that cell. As you wish, you can add a cell with the + button in the toolbar, and print values of variables, or try out variations. \n",
"\n",
"I've written a notebook called [Guide to Jupyter](Guide%20to%20Jupyter.ipynb) to help you get more familiar with Jupyter Labs, including adding Markdown comments, using `!` to run shell commands, and `tqdm` to show progress.\n",
"\n",
"## If you're new to the Command Line\n",
"\n",
"Please see these excellent guides: [Command line on PC](https://chatgpt.com/share/67b0acea-ba38-8012-9c34-7a2541052665) and [Command line on Mac](https://chatgpt.com/canvas/shared/67b0b10c93a081918210723867525d2b). \n",
"\n",
"## If you'd prefer to work in IDEs\n",
"\n",
"If you're more comfortable in IDEs like VSCode, Cursor or PyCharm, they both work great with these lab notebooks too. \n",
"If you'd prefer to work in VSCode, [here](https://chatgpt.com/share/676f2e19-c228-8012-9911-6ca42f8ed766) are instructions from an AI friend on how to configure it for the course.\n",
"\n",
"## If you'd like to brush up your Python\n",
"\n",
"I've added a notebook called [Intermediate Python](Intermediate%20Python.ipynb) to get you up to speed. But you should give it a miss if you already have a good idea what this code does: \n",
"`yield from {book.get(\"author\") for book in books if book.get(\"author\")}`\n",
"\n",
"## I am here to help\n",
"\n",
"If you have any problems at all, please do reach out. \n",
"I'm available through the platform, or at ed@edwarddonner.com, or at https://www.linkedin.com/in/eddonner/ if you'd like to connect (and I love connecting!) \n",
"And this is new to me, but I'm also trying out X/Twitter at [@edwarddonner](https://x.com/edwarddonner) - if you're on X, please show me how it's done 😂 \n",
"\n",
"## More troubleshooting\n",
"\n",
"Please see the [troubleshooting](troubleshooting.ipynb) notebook in this folder to diagnose and fix common problems. At the very end of it is a diagnostics script with some useful debug info.\n",
"\n",
"## For foundational technical knowledge (eg Git, APIs, debugging) \n",
"\n",
"If you're relatively new to programming -- I've got your back! While it's ideal to have some programming experience for this course, there's only one mandatory prerequisite: plenty of patience. 😁 I've put together a set of self-study guides that cover Git and GitHub, APIs and endpoints, beginner python and more.\n",
"\n",
"This covers Git and GitHub; what they are, the difference, and how to use them: \n",
"https://github.com/ed-donner/agents/blob/main/guides/03_git_and_github.ipynb\n",
"\n",
"This covers technical foundations: \n",
"ChatGPT vs API; taking screenshots; Environment Variables; Networking basics; APIs and endpoints: \n",
"https://github.com/ed-donner/agents/blob/main/guides/04_technical_foundations.ipynb\n",
"\n",
"This covers Python for beginners, and making sure that a `NameError` never trips you up: \n",
"https://github.com/ed-donner/agents/blob/main/guides/06_python_foundations.ipynb\n",
"\n",
"This covers the essential techniques for figuring out errors: \n",
"https://github.com/ed-donner/agents/blob/main/guides/08_debugging.ipynb\n",
"\n",
"And you'll find other useful guides in the same folder in GitHub. Some information applies to my other Udemy course (eg Async Python) but most of it is very relevant for LLM engineering.\n",
"\n",
"## If this is old hat!\n",
"\n",
"If you're already comfortable with today's material, please hang in there; you can move swiftly through the first few labs - we will get much more in depth as the weeks progress. Ultimately we will fine-tune our own LLM to compete with OpenAI!\n",
"\n",
"<table style=\"margin: 0; text-align: left;\">\n",
" <tr>\n",
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
" <img src=\"../important.jpg\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
" </td>\n",
" <td>\n",
" <h2 style=\"color:#900;\">Please read - important note</h2>\n",
" <span style=\"color:#900;\">The way I collaborate with you may be different to other courses you've taken. I prefer not to type code while you watch. Rather, I execute Jupyter Labs, like this, and give you an intuition for what's going on. My suggestion is that you carefully execute this yourself, <b>after</b> watching the lecture. Add print statements to understand what's going on, and then come up with your own variations. If you have a Github account, use this to showcase your variations. Not only is this essential practice, but it demonstrates your skills to others, including perhaps future clients or employers...</span>\n",
" </td>\n",
" </tr>\n",
"</table>\n",
"<table style=\"margin: 0; text-align: left;\">\n",
" <tr>\n",
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
" <img src=\"../resources.jpg\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
" </td>\n",
" <td>\n",
" <h2 style=\"color:#f71;\">This code is a live resource - keep an eye out for my emails</h2>\n",
" <span style=\"color:#f71;\">I push updates to the code regularly. As people ask questions, I add more examples or improved commentary. As a result, you'll notice that the code below isn't identical to the videos. Everything from the videos is here; but I've also added better explanations and new models like DeepSeek. Consider this like an interactive book.<br/><br/>\n",
" I try to send emails regularly with important updates related to the course. You can find this in the 'Announcements' section of Udemy in the left sidebar. You can also choose to receive my emails via your Notification Settings in Udemy. I'm respectful of your inbox and always try to add value with my emails!\n",
" </span>\n",
" </td>\n",
" </tr>\n",
"</table>\n",
"<table style=\"margin: 0; text-align: left;\">\n",
" <tr>\n",
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
" <img src=\"../business.jpg\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
" </td>\n",
" <td>\n",
" <h2 style=\"color:#181;\">Business value of these exercises</h2>\n",
" <span style=\"color:#181;\">A final thought. While I've designed these notebooks to be educational, I've also tried to make them enjoyable. We'll do fun things like have LLMs tell jokes and argue with each other. But fundamentally, my goal is to teach skills you can apply in business. I'll explain business implications as we go, and it's worth keeping this in mind: as you build experience with models and techniques, think of ways you could put this into action at work today. Please do contact me if you'd like to discuss more or if you have ideas to bounce off me.</span>\n",
" </td>\n",
" </tr>\n",
"</table>"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "4e2a9393-7767-488e-a8bf-27c12dca35bd",
"metadata": {},
"outputs": [],
"source": [
"# imports\n",
"\n",
"import os\n",
"import requests\n",
"from dotenv import load_dotenv\n",
"from bs4 import BeautifulSoup\n",
"from IPython.display import Markdown, display\n",
"from openai import OpenAI\n",
"\n",
"# If you get an error running this cell, then please head over to the troubleshooting notebook!"
]
},
{
"cell_type": "markdown",
"id": "6900b2a8-6384-4316-8aaa-5e519fca4254",
"metadata": {},
"source": [
"# Connecting to OpenAI (or Ollama)\n",
"\n",
"The next cell is where we load in the environment variables in your `.env` file and connect to OpenAI. \n",
"\n",
"If you'd like to use free Ollama instead, please see the README section \"Free Alternative to Paid APIs\", and if you're not sure how to do this, there's a full solution in the solutions folder (day1_with_ollama.ipynb).\n",
"\n",
"## Troubleshooting if you have problems:\n",
"\n",
"Head over to the [troubleshooting](troubleshooting.ipynb) notebook in this folder for step by step code to identify the root cause and fix it!\n",
"\n",
"If you make a change, try restarting the \"Kernel\" (the python process sitting behind this notebook) by Kernel menu >> Restart Kernel and Clear Outputs of All Cells. Then try this notebook again, starting at the top.\n",
"\n",
"Or, contact me! Message me or email ed@edwarddonner.com and we will get this to work.\n",
"\n",
"Any concerns about API costs? See my notes in the README - costs should be minimal, and you can control it at every point. You can also use Ollama as a free alternative, which we discuss during Day 2."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7b87cadb-d513-4303-baee-a37b6f938e4d",
"metadata": {},
"outputs": [],
"source": [
"# Load environment variables in a file called .env\n",
"\n",
"load_dotenv(override=True)\n",
"api_key = os.getenv('OPENAI_API_KEY')\n",
"\n",
"# Check the key\n",
"\n",
"if not api_key:\n",
" print(\"No API key was found - please head over to the troubleshooting notebook in this folder to identify & fix!\")\n",
"elif not api_key.startswith(\"sk-proj-\"):\n",
" print(\"An API key was found, but it doesn't start sk-proj-; please check you're using the right key - see troubleshooting notebook\")\n",
"elif api_key.strip() != api_key:\n",
" print(\"An API key was found, but it looks like it might have space or tab characters at the start or end - please remove them - see troubleshooting notebook\")\n",
"else:\n",
" print(\"API key found and looks good so far!\")\n"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "019974d9-f3ad-4a8a-b5f9-0a3719aea2d3",
"metadata": {},
"outputs": [],
"source": [
"openai = OpenAI()\n",
"\n",
"# If this doesn't work, try Kernel menu >> Restart Kernel and Clear Outputs Of All Cells, then run the cells from the top of this notebook down.\n",
"# If it STILL doesn't work (horrors!) then please see the Troubleshooting notebook in this folder for full instructions"
]
},
{
"cell_type": "markdown",
"id": "442fc84b-0815-4f40-99ab-d9a5da6bda91",
"metadata": {},
"source": [
"# Let's make a quick call to a Frontier model to get started, as a preview!"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "a58394bf-1e45-46af-9bfd-01e24da6f49a",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Hello! Its great to hear from you! How can I help you today?\n"
]
}
],
"source": [
"# To give you a preview -- calling OpenAI with these messages is this easy. Any problems, head over to the Troubleshooting notebook.\n",
"\n",
"message = \"Hello, GPT! This is my first ever message to you! Hi!\"\n",
"response = openai.chat.completions.create(model=\"gpt-4o-mini\", messages=[{\"role\":\"user\", \"content\":message}])\n",
"print(response.choices[0].message.content)"
]
},
{
"cell_type": "markdown",
"id": "2aa190e5-cb31-456a-96cc-db109919cd78",
"metadata": {},
"source": [
"## OK onwards with our first project"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "c5e793b2-6775-426a-a139-4848291d0463",
"metadata": {},
"outputs": [],
"source": [
"# A class to represent a Webpage\n",
"# If you're not familiar with Classes, check out the \"Intermediate Python\" notebook\n",
"\n",
"# Some websites need you to use proper headers when fetching them:\n",
"headers = {\n",
" \"User-Agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36\"\n",
"}\n",
"\n",
"class Website:\n",
"\n",
" def __init__(self, url):\n",
" \"\"\"\n",
" Create this Website object from the given url using the BeautifulSoup library\n",
" \"\"\"\n",
" self.url = url\n",
" response = requests.get(url, headers=headers)\n",
" soup = BeautifulSoup(response.content, 'html.parser')\n",
" self.title = soup.title.string if soup.title else \"No title found\"\n",
" for irrelevant in soup.body([\"script\", \"style\", \"img\", \"input\"]):\n",
" irrelevant.decompose()\n",
" self.text = soup.body.get_text(separator=\"\\n\", strip=True)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "2ef960cf-6dc2-4cda-afb3-b38be12f4c97",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Home - Edward Donner\n",
"Home\n",
"Connect Four\n",
"Outsmart\n",
"An arena that pits LLMs against each other in a battle of diplomacy and deviousness\n",
"About\n",
"Posts\n",
"Well, hi there.\n",
"Im Ed. I like writing code and experimenting with LLMs, and hopefully youre here because you do too. I also enjoy DJing (but Im badly out of practice), amateur electronic music production (\n",
"very\n",
"amateur) and losing myself in\n",
"Hacker News\n",
", nodding my head sagely to things I only half understand.\n",
"Im the co-founder and CTO of\n",
"Nebula.io\n",
". Were applying AI to a field where it can make a massive, positive impact: helping people discover their potential and pursue their reason for being. Recruiters use our product today to source, understand, engage and manage talent. Im previously the founder and CEO of AI startup untapt,\n",
"acquired in 2021\n",
".\n",
"We work with groundbreaking, proprietary LLMs verticalized for talent, weve\n",
"patented\n",
"our matching model, and our award-winning platform has happy customers and tons of press coverage.\n",
"Connect\n",
"with me for more!\n",
"May 28, 2025\n",
"Connecting my courses become an LLM expert and leader\n",
"May 18, 2025\n",
"2025 AI Executive Briefing\n",
"April 21, 2025\n",
"The Complete Agentic AI Engineering Course\n",
"January 23, 2025\n",
"LLM Workshop Hands-on with Agents resources\n",
"Navigation\n",
"Home\n",
"Connect Four\n",
"Outsmart\n",
"An arena that pits LLMs against each other in a battle of diplomacy and deviousness\n",
"About\n",
"Posts\n",
"Get in touch\n",
"ed [at] edwarddonner [dot] com\n",
"www.edwarddonner.com\n",
"Follow me\n",
"LinkedIn\n",
"Twitter\n",
"Facebook\n",
"Subscribe to newsletter\n",
"Type your email…\n",
"Subscribe\n"
]
}
],
"source": [
"# Let's try one out. Change the website and add print statements to follow along.\n",
"\n",
"ed = Website(\"https://edwarddonner.com\")\n",
"print(ed.title)\n",
"print(ed.text)"
]
},
{
"cell_type": "markdown",
"id": "6a478a0c-2c53-48ff-869c-4d08199931e1",
"metadata": {},
"source": [
"## Types of prompts\n",
"\n",
"You may know this already - but if not, you will get very familiar with it!\n",
"\n",
"Models like GPT4o have been trained to receive instructions in a particular way.\n",
"\n",
"They expect to receive:\n",
"\n",
"**A system prompt** that tells them what task they are performing and what tone they should use\n",
"\n",
"**A user prompt** -- the conversation starter that they should reply to"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "abdb8417-c5dc-44bc-9bee-2e059d162699",
"metadata": {},
"outputs": [],
"source": [
"# Define our system prompt - you can experiment with this later, changing the last sentence to 'Respond in markdown in Spanish.\"\n",
"\n",
"system_prompt = \"You are an assistant that analyzes the contents of a website \\\n",
"and provides a short summary, ignoring text that might be navigation related. \\\n",
"Respond in markdown.\""
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "f0275b1b-7cfe-4f9d-abfa-7650d378da0c",
"metadata": {},
"outputs": [],
"source": [
"# A function that writes a User Prompt that asks for summaries of websites:\n",
"\n",
"def user_prompt_for(website):\n",
" user_prompt = f\"You are looking at a website titled {website.title}\"\n",
" user_prompt += \"\\nThe contents of this website is as follows; \\\n",
"please provide a short summary of this website in markdown. \\\n",
"If it includes news or announcements, then summarize these too.\\n\\n\"\n",
" user_prompt += website.text\n",
" return user_prompt"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "26448ec4-5c00-4204-baec-7df91d11ff2e",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"You are looking at a website titled Home - Edward Donner\n",
"The contents of this website is as follows; please provide a short summary of this website in markdown. If it includes news or announcements, then summarize these too.\n",
"\n",
"Home\n",
"Connect Four\n",
"Outsmart\n",
"An arena that pits LLMs against each other in a battle of diplomacy and deviousness\n",
"About\n",
"Posts\n",
"Well, hi there.\n",
"Im Ed. I like writing code and experimenting with LLMs, and hopefully youre here because you do too. I also enjoy DJing (but Im badly out of practice), amateur electronic music production (\n",
"very\n",
"amateur) and losing myself in\n",
"Hacker News\n",
", nodding my head sagely to things I only half understand.\n",
"Im the co-founder and CTO of\n",
"Nebula.io\n",
". Were applying AI to a field where it can make a massive, positive impact: helping people discover their potential and pursue their reason for being. Recruiters use our product today to source, understand, engage and manage talent. Im previously the founder and CEO of AI startup untapt,\n",
"acquired in 2021\n",
".\n",
"We work with groundbreaking, proprietary LLMs verticalized for talent, weve\n",
"patented\n",
"our matching model, and our award-winning platform has happy customers and tons of press coverage.\n",
"Connect\n",
"with me for more!\n",
"May 28, 2025\n",
"Connecting my courses become an LLM expert and leader\n",
"May 18, 2025\n",
"2025 AI Executive Briefing\n",
"April 21, 2025\n",
"The Complete Agentic AI Engineering Course\n",
"January 23, 2025\n",
"LLM Workshop Hands-on with Agents resources\n",
"Navigation\n",
"Home\n",
"Connect Four\n",
"Outsmart\n",
"An arena that pits LLMs against each other in a battle of diplomacy and deviousness\n",
"About\n",
"Posts\n",
"Get in touch\n",
"ed [at] edwarddonner [dot] com\n",
"www.edwarddonner.com\n",
"Follow me\n",
"LinkedIn\n",
"Twitter\n",
"Facebook\n",
"Subscribe to newsletter\n",
"Type your email…\n",
"Subscribe\n"
]
}
],
"source": [
"print(user_prompt_for(ed))"
]
},
{
"cell_type": "markdown",
"id": "ea211b5f-28e1-4a86-8e52-c0b7677cadcc",
"metadata": {},
"source": [
"## Messages\n",
"\n",
"The API from OpenAI expects to receive messages in a particular structure.\n",
"Many of the other APIs share this structure:\n",
"\n",
"```python\n",
"[\n",
" {\"role\": \"system\", \"content\": \"system message goes here\"},\n",
" {\"role\": \"user\", \"content\": \"user message goes here\"}\n",
"]\n",
"```\n",
"To give you a preview, the next 2 cells make a rather simple call - we won't stretch the mighty GPT (yet!)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "f25dcd35-0cd0-4235-9f64-ac37ed9eaaa5",
"metadata": {},
"outputs": [],
"source": [
"messages = [\n",
" {\"role\": \"system\", \"content\": \"You are a snarky assistant\"},\n",
" {\"role\": \"user\", \"content\": \"What is 2 + 2?\"}\n",
"]"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "21ed95c5-7001-47de-a36d-1d6673b403ce",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Oh, youre going for the big math questions now, huh? Well, if you insist on dragging me into elementary school territory, the answer is 4. Shocking, I know.\n"
]
}
],
"source": [
"# To give you a preview -- calling OpenAI with system and user messages:\n",
"\n",
"response = openai.chat.completions.create(model=\"gpt-4o-mini\", messages=messages)\n",
"print(response.choices[0].message.content)"
]
},
{
"cell_type": "markdown",
"id": "d06e8d78-ce4c-4b05-aa8e-17050c82bb47",
"metadata": {},
"source": [
"## And now let's build useful messages for GPT-4o-mini, using a function"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "0134dfa4-8299-48b5-b444-f2a8c3403c88",
"metadata": {},
"outputs": [],
"source": [
"# See how this function creates exactly the format above\n",
"\n",
"def messages_for(website):\n",
" return [\n",
" {\"role\": \"system\", \"content\": system_prompt},\n",
" {\"role\": \"user\", \"content\": user_prompt_for(website)}\n",
" ]"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "36478464-39ee-485c-9f3f-6a4e458dbc9c",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[{'role': 'system',\n",
" 'content': 'You are an assistant that analyzes the contents of a website and provides a short summary, ignoring text that might be navigation related. Respond in markdown.'},\n",
" {'role': 'user',\n",
" 'content': 'You are looking at a website titled Home - Edward Donner\\nThe contents of this website is as follows; please provide a short summary of this website in markdown. If it includes news or announcements, then summarize these too.\\n\\nHome\\nConnect Four\\nOutsmart\\nAn arena that pits LLMs against each other in a battle of diplomacy and deviousness\\nAbout\\nPosts\\nWell, hi there.\\nIm Ed. I like writing code and experimenting with LLMs, and hopefully youre here because you do too. I also enjoy DJing (but Im badly out of practice), amateur electronic music production (\\nvery\\namateur) and losing myself in\\nHacker News\\n, nodding my head sagely to things I only half understand.\\nIm the co-founder and CTO of\\nNebula.io\\n. Were applying AI to a field where it can make a massive, positive impact: helping people discover their potential and pursue their reason for being. Recruiters use our product today to source, understand, engage and manage talent. Im previously the founder and CEO of AI startup untapt,\\nacquired in 2021\\n.\\nWe work with groundbreaking, proprietary LLMs verticalized for talent, weve\\npatented\\nour matching model, and our award-winning platform has happy customers and tons of press coverage.\\nConnect\\nwith me for more!\\nMay 28, 2025\\nConnecting my courses become an LLM expert and leader\\nMay 18, 2025\\n2025 AI Executive Briefing\\nApril 21, 2025\\nThe Complete Agentic AI Engineering Course\\nJanuary 23, 2025\\nLLM Workshop Hands-on with Agents resources\\nNavigation\\nHome\\nConnect Four\\nOutsmart\\nAn arena that pits LLMs against each other in a battle of diplomacy and deviousness\\nAbout\\nPosts\\nGet in touch\\ned [at] edwarddonner [dot] com\\nwww.edwarddonner.com\\nFollow me\\nLinkedIn\\nTwitter\\nFacebook\\nSubscribe to newsletter\\nType your email…\\nSubscribe'}]"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Try this out, and then try for a few more websites\n",
"\n",
"messages_for(ed)"
]
},
{
"cell_type": "markdown",
"id": "16f49d46-bf55-4c3e-928f-68fc0bf715b0",
"metadata": {},
"source": [
"## Time to bring it together - the API for OpenAI is very simple!"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "905b9919-aba7-45b5-ae65-81b3d1d78e34",
"metadata": {},
"outputs": [],
"source": [
"# And now: call the OpenAI API. You will get very familiar with this!\n",
"\n",
"def summarize(url):\n",
" website = Website(url)\n",
" response = openai.chat.completions.create(\n",
" model = \"gpt-4o-mini\",\n",
" messages = messages_for(website)\n",
" )\n",
" return response.choices[0].message.content"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "05e38d41-dfa4-4b20-9c96-c46ea75d9fb5",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'# Summary of Edward Donner\\'s Website\\n\\nThe website is the personal platform of Ed Donner, a software engineer and co-founder/CTO of Nebula.io, an AI-focused company that aims to help individuals discover their potential through technology. Ed expresses his passion for coding, experimenting with large language models (LLMs), and interests in DJing and electronic music production.\\n\\n## Key Sections:\\n- **About Ed**: Provides personal background, detailing his experience in AI startups, including his previous venture, untapt, which was acquired in 2021. He highlights the use of patented matching models and LLMs in talent management.\\n- **Connect Four & Outsmart**: Features interactive games or platforms where LLMs engage in diplomatic and strategic challenges.\\n- **Courses & Announcements**:\\n - **May 28, 2025**: Announced a course focused on becoming an LLM expert and leader.\\n - **May 18, 2025**: Announcement for the 2025 AI Executive Briefing.\\n - **April 21, 2025**: Introduction of \"The Complete Agentic AI Engineering Course.\"\\n - **January 23, 2025**: A workshop providing hands-on experience with agents and associated resources.\\n\\nThe website also encourages visitors to connect and engage through various social media platforms and a newsletter subscription.'"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"summarize(\"https://edwarddonner.com\")"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "3d926d59-450e-4609-92ba-2d6f244f1342",
"metadata": {},
"outputs": [],
"source": [
"# A function to display this nicely in the Jupyter output, using markdown\n",
"\n",
"def display_summary(url):\n",
" summary = summarize(url)\n",
" display(Markdown(summary))"
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "3018853a-445f-41ff-9560-d925d1774b2f",
"metadata": {},
"outputs": [
{
"data": {
"text/markdown": [
"# Summary of \"Home - Edward Donner\"\n",
"\n",
"The website is dedicated to Edward Donner, a co-founder and CTO of Nebula.io, a platform focused on leveraging AI to assist individuals in discovering their potential and engaging with talent. Edward expresses an interest in coding, experiments with large language models (LLMs), DJing, and electronic music production. He has a history as the founder of an AI startup, untapt, which was acquired in 2021.\n",
"\n",
"## Key Features:\n",
"- **Connect Four**: A game involving LLMs competing in diplomacy and strategy.\n",
"- **About**: Information about Edward's professional background and interests.\n",
"- **Courses and Workshops**: \n",
" - **Recent Announcements**:\n",
" - **May 28, 2025**: Launch of a program to become an LLM expert and leader.\n",
" - **May 18, 2025**: Announcement of a 2025 AI Executive Briefing.\n",
" - **April 21, 2025**: Introduction of the Complete Agentic AI Engineering Course.\n",
" - **January 23, 2025**: A hands-on LLM Workshop focusing on resources related to agents.\n",
"\n",
"The content emphasizes his passion for AI and education within the industry."
],
"text/plain": [
"<IPython.core.display.Markdown object>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"display_summary(\"https://edwarddonner.com\")"
]
},
{
"cell_type": "markdown",
"id": "b3bcf6f4-adce-45e9-97ad-d9a5d7a3a624",
"metadata": {},
"source": [
"# Let's try more websites\n",
"\n",
"Note that this will only work on websites that can be scraped using this simplistic approach.\n",
"\n",
"Websites that are rendered with Javascript, like React apps, won't show up. See the community-contributions folder for a Selenium implementation that gets around this. You'll need to read up on installing Selenium (ask ChatGPT!)\n",
"\n",
"Also Websites protected with CloudFront (and similar) may give 403 errors - many thanks Andy J for pointing this out.\n",
"\n",
"But many websites will work just fine!"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "45d83403-a24c-44b5-84ac-961449b4008f",
"metadata": {},
"outputs": [
{
"data": {
"text/markdown": [
"# CNN Website Summary\n",
"\n",
"CNN is a major news outlet that provides comprehensive coverage of world events, featuring sections on U.S. and international news, political analysis, business, health, entertainment, and sports. The site emphasizes real-time updates and includes various multimedia formats such as videos and articles.\n",
"\n",
"### Notable Articles and Updates\n",
"\n",
"- **Ukraine-Russia War**: Pro-Ukraine protests are ongoing, and there are discussions regarding dignitaries meeting on U.S. soil amidst rising tensions.\n",
" \n",
"- **Israel-Hamas Conflict**: Analysis and reports highlight significant developments including Israels settlement plans which may impact the future of a Palestinian state.\n",
"\n",
"- **Health**: New heart health guidelines suggest going alcohol-free; studies indicate a high level of stress among teenagers.\n",
"\n",
"- **Entertainment**:\n",
" - Megadeth is set to release its final album and embark on a farewell tour.\n",
" - Taylor Swift's recent appearances are noted for cultural impact.\n",
"\n",
"- **Science**: Climate-related findings unveil vulnerabilities in GPS and satellites due to pollution.\n",
"\n",
"- **Business**: Discussions are ongoing about potential government stake in Intel, affecting stock prices.\n",
"\n",
"### Additional Features\n",
"CNN also offers a variety of interactive content including quizzes, games, and newsletters tailored to reader interests. The site encourages user engagement through feedback on advertisements and technical issues.\n",
"\n",
"Overall, CNN remains a significant source for breaking news and in-depth analysis across a broad spectrum of topics."
],
"text/plain": [
"<IPython.core.display.Markdown object>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"display_summary(\"https://cnn.com\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "75e9fd40-b354-4341-991e-863ef2e59db7",
"metadata": {},
"outputs": [],
"source": [
"display_summary(\"https://anthropic.com\")"
]
},
{
"cell_type": "code",
"execution_count": 20,
"id": "00743dac-0e70-45b7-879a-d7293a6f68a6",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Subject: Leave Notification: Medical Emergency (Aug 17-21, 2025)\n"
]
}
],
"source": [
"# Step 1: Create your prompts\n",
"\n",
"system_prompt = \"you are the email assistant, which provide the subject of the email\"\n",
"user_prompt = \"\"\"\n",
" please provide the appropriate subject for below email\n",
"hi team,\n",
"due to some medical emergency , i will be on leave for 5 days starting\n",
"from 17-08-2025 to 21-08-2025.\n",
"\n",
"please call me in case of any urgency.\n",
"\n",
"regards\n",
"Rahul\n",
"\"\"\"\n",
"\n",
"# Step 2: Make the messages list\n",
"\n",
"messages = [\n",
" {\"role\": \"system\", \"content\": system_prompt},\n",
" {\"role\": \"user\", \"content\": user_prompt}\n",
"]\n",
"\n",
"# Step 3: Call OpenAI\n",
"response = openai.chat.completions.create(model=\"gpt-4o-mini\", messages=messages)\n",
"\n",
"\n",
"# Step 4: print the result\n",
"\n",
"print(response.choices[0].message.content)"
]
},
{
"cell_type": "markdown",
"id": "36ed9f14-b349-40e9-a42c-b367e77f8bda",
"metadata": {},
"source": [
"## An extra exercise for those who enjoy web scraping\n",
"\n",
"You may notice that if you try `display_summary(\"https://openai.com\")` - it doesn't work! That's because OpenAI has a fancy website that uses Javascript. There are many ways around this that some of you might be familiar with. For example, Selenium is a hugely popular framework that runs a browser behind the scenes, renders the page, and allows you to query it. If you have experience with Selenium, Playwright or similar, then feel free to improve the Website class to use them. In the community-contributions folder, you'll find an example Selenium solution from a student (thank you!)"
]
},
{
"cell_type": "markdown",
"id": "eeab24dc-5f90-4570-b542-b0585aca3eb6",
"metadata": {},
"source": [
"# Sharing your code\n",
"\n",
"I'd love it if you share your code afterwards so I can share it with others! You'll notice that some students have already made changes (including a Selenium implementation) which you will find in the community-contributions folder. If you'd like add your changes to that folder, submit a Pull Request with your new versions in that folder and I'll merge your changes.\n",
"\n",
"If you're not an expert with git (and I am not!) then GPT has given some nice instructions on how to submit a Pull Request. It's a bit of an involved process, but once you've done it once it's pretty clear. As a pro-tip: it's best if you clear the outputs of your Jupyter notebooks (Edit >> Clean outputs of all cells, and then Save) for clean notebooks.\n",
"\n",
"Here are good instructions courtesy of an AI friend: \n",
"https://chatgpt.com/share/677a9cb5-c64c-8012-99e0-e06e88afd293"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f4484fcf-8b39-4c3f-9674-37970ed71988",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.13"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@@ -0,0 +1,211 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "d955d75d-4970-48fe-983e-a2a850cecfc5",
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"import requests\n",
"from dotenv import load_dotenv\n",
"from bs4 import BeautifulSoup\n",
"from IPython.display import Markdown, display\n",
"from openai import OpenAI\n",
"\n",
"import PyPDF2\n",
"from selenium import webdriver\n",
"from selenium.webdriver.chrome.options import Options\n",
"from selenium.webdriver.chrome.service import Service\n",
"from webdriver_manager.chrome import ChromeDriverManager\n",
"from bs4 import BeautifulSoup\n",
"import time"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6e1e5dd3-f91a-466b-8fd4-2dbf4eedf101",
"metadata": {},
"outputs": [],
"source": [
"load_dotenv(override = True)\n",
"api_key = os.getenv(\"OPENAI_API_KEY\")\n",
"\n",
"if not api_key:\n",
" print(\"No API key\")\n",
"elif not api_key.startswith(\"sk-proj-\"):\n",
" print(\"API key doesn't look correct, check it\")\n",
"elif api_key.strip() != api_key:\n",
" print(\"It looks like API key has an extra space - check it\")\n",
"else:\n",
" print(\"API key looks good, moving on!\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "67a6e583-1ef7-4b77-8886-c0e8c619933c",
"metadata": {},
"outputs": [],
"source": [
"openai = OpenAI()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "34a07806-dd68-4a86-8b6e-e1b2aaf0daa1",
"metadata": {},
"outputs": [],
"source": [
"# path to the CV\n",
"path = \"/Users/yanasklar/Documents/For applying/CV/СV_YanaSklyar_c.pdf\"\n",
"headers = {\n",
" \"User-Agent\": \"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/139.0.0.0 Safari/537.36\"\n",
"}\n",
"\n",
"class Vacancy:\n",
" def __init__(self, url, instructions = \"\"):\n",
" self.url = url\n",
" \n",
" # configure Chrome settings\n",
" options = Options()\n",
" # options.add_argument(\"--headless\") \n",
" \"\"\"\n",
" Headless mode runs the browser in the background (invisible).\n",
" However, some websites (like openai.com) block headless browsers.\n",
" So if this line is active, the page may not load correctly and you may not get the full content.\n",
" \"\"\"\n",
" options.add_argument(\"--disable-gpu\")\n",
" options.add_argument(\"--no-sandbox\")\n",
" options.add_argument(\"--window-size=1920x1080\")\n",
"\n",
" # use webdriver-manager to manage ChromeDriver\n",
" service = Service(ChromeDriverManager().install())\n",
" driver = webdriver.Chrome(service=service, options=options)\n",
" driver.get(url)\n",
" time.sleep(3) # let the page load\n",
"\n",
" # take the source of the page\n",
" page_source = driver.page_source\n",
" driver.quit()\n",
"\n",
" # analyse with BeautifulSoup\n",
" soup = BeautifulSoup(page_source, 'html.parser')\n",
"\n",
" self.title = soup.title.string if soup.title else \"No title found\"\n",
" for irrelevant in soup.body([\"img\", \"script\", \"style\", \"input\"]):\n",
" irrelevant.decompose()\n",
" self.text = soup.body.get_text(separator='\\n', strip=True)\n",
"\n",
" # read CV\n",
" with open(path, 'rb') as f:\n",
" reader = PyPDF2.PdfReader(f)\n",
" cv_text = \"\"\n",
" for page in reader.pages:\n",
" text = page.extract_text()\n",
" if text:\n",
" cv_text += text + \"\\n\"\n",
" self.cv_text = cv_text\n",
"\n",
" # summarise and print the description of the job\n",
" message = f\"\"\"Here is the content of a webpage: {self.text}.\n",
" Find job description on that page,\n",
" summarise it, include the list requirements and other important details.\n",
" \"\"\"\n",
" messages = [{\"role\":\"user\", \"content\":message}]\n",
" response = openai.chat.completions.create(model='gpt-4o-mini', messages = messages)\n",
" print(\"The job description: \", response.choices[0].message.content)\n",
"\n",
" # create prompts\n",
" self.system_prompt = \"\"\"You are a career assistant specializing in writing cover letter.\n",
" Your tasks:\n",
" 1. Read the candidate's CV (provided as text).\n",
" 2. Read the job description (provided from a webpage).\n",
" 3. Write a concise and compelling cover letter, that:\n",
" - Hightlights the most relevant experience and skills from the CV,\n",
" - Aligns directly wit the requirements in the job description,\n",
" - Adapts to cultural and professional norms in Israel.\n",
" The letter should be no longer than half a page, persuasive and tailored to make the applicant stand out.\n",
" \"\"\"\n",
"\n",
" user_prompt = f\"\"\"\n",
" Here is my CV:\n",
" {self.cv_text}\n",
" \n",
" The job vacancy is from the website {self.title}.\n",
" Here is the decription of the vacancy:\n",
" {self.text}\n",
" Please write a cover letter that connects my background to this vacancy.\n",
" Make it persuasive and suitable for Israeli job market.\n",
" \"\"\"\n",
" \n",
" if instructions:\n",
" user_prompt += f\"Additional instructions: {instructions}\"\n",
" self.user_prompt = user_prompt"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9160b9f5-177b-4477-8e54-3a212f275a22",
"metadata": {},
"outputs": [],
"source": [
"def cover_letter(url, instructions = \"\"):\n",
" vacancy = Vacancy(url, instructions)\n",
" messages = [\n",
" {\"role\":\"system\", \"content\":vacancy.system_prompt},\n",
" {\"role\":\"user\", \"content\":vacancy.user_prompt}\n",
" ]\n",
" response = openai.chat.completions.create(model='gpt-4o-mini', messages=messages)\n",
" if not response:\n",
" print(\"smt went wrong\")\n",
" print(response.choices[0].message.content)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1de4b55c-a8da-445f-9865-c7a8bafdbc3c",
"metadata": {},
"outputs": [],
"source": [
"a = \"https://www.linkedin.com/jobs/view/4285898438/?alternateChannel=search&eBP=CwEAAAGY3R5LOabDLOVTy6xvBcSlWyAkIXQz8IRkSM3rgsqTPtvcEvUSnq980O7oLV2Hh_ldTpc2cBBmRq1IRnLtp7TzEcUvndFEXeCuviA5yo7oFYfW7KoEp4SPNzmf3D9LtnSgk9Iudy3skk6n3hVOtyDpx8Zm0AiTWPvdwCaZ_w5Xu8lAG797NRNDco71ynm99LmCOC9Go7DdDQ2eLewamc4SOsA4xWcXy0GmZVy3kBF1AprK3ylAYR2wrm5-hp4lRpbbfUxXjkEOG6H_GbPpKtN-N8mYnMd9w_cej5qQmTFX86gqSi6HuXFtK0h46TbOS5r-YQksVd1Yb4kYZnDznWXPLbxp04xVJSPzsHoa05wQdOfZ2UUSoMTJmic3n3qfV2u9Bp8n4sLYtINpzKdvm4eADGGkN-nR3O2oPeas9XjGbBwNdjXHAcX_PJoRwlFdQ1gVkYQEF1T7qAfXUJoUt-fv4oLxGnIgV6yJuMgw&refId=9NA7Bvt%2FhCqDkFNRGu1dPA%3D%3D&trackingId=W11hvpcIjHA%2FjU%2FFZ%2B1uAA%3D%3D\"\n",
"b = \"The style of the cover letter should informal, as if i talked to a friend about my background\"\n",
"cover_letter(a, b)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0feb3cbe-686a-4a97-9ca3-a0cb32a24c5d",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python (llms)",
"language": "python",
"name": "llms"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.13"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@@ -0,0 +1,239 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "cab13efd-a1f4-4077-976e-e3912511117f",
"metadata": {},
"outputs": [],
"source": [
"# imports\n",
"\n",
"import os\n",
"import re\n",
"from dotenv import load_dotenv\n",
"from IPython.display import Markdown, display\n",
"from openai import OpenAI"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "c226f54b-325c-49b1-9d99-207a8e306682",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Requirement already satisfied: youtube_transcript_api in /Users/nachogonzalezbullon/miniconda3/envs/llms/lib/python3.11/site-packages (1.1.1)\n",
"Requirement already satisfied: defusedxml<0.8.0,>=0.7.1 in /Users/nachogonzalezbullon/miniconda3/envs/llms/lib/python3.11/site-packages (from youtube_transcript_api) (0.7.1)\n",
"Requirement already satisfied: requests in /Users/nachogonzalezbullon/miniconda3/envs/llms/lib/python3.11/site-packages (from youtube_transcript_api) (2.32.4)\n",
"Requirement already satisfied: charset_normalizer<4,>=2 in /Users/nachogonzalezbullon/miniconda3/envs/llms/lib/python3.11/site-packages (from requests->youtube_transcript_api) (3.4.2)\n",
"Requirement already satisfied: idna<4,>=2.5 in /Users/nachogonzalezbullon/miniconda3/envs/llms/lib/python3.11/site-packages (from requests->youtube_transcript_api) (3.10)\n",
"Requirement already satisfied: urllib3<3,>=1.21.1 in /Users/nachogonzalezbullon/miniconda3/envs/llms/lib/python3.11/site-packages (from requests->youtube_transcript_api) (2.5.0)\n",
"Requirement already satisfied: certifi>=2017.4.17 in /Users/nachogonzalezbullon/miniconda3/envs/llms/lib/python3.11/site-packages (from requests->youtube_transcript_api) (2025.7.9)\n"
]
}
],
"source": [
"!pip install youtube_transcript_api"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "717fc2a4-b6c5-4027-9e6b-05e83c38d02f",
"metadata": {},
"outputs": [],
"source": [
"from youtube_transcript_api import YouTubeTranscriptApi"
]
},
{
"metadata": {},
"cell_type": "code",
"outputs": [],
"execution_count": 4,
"source": [
"# Load environment variables in a file called .env\n",
"\n",
"load_dotenv(override=True)\n",
"api_key = os.getenv('OPENAI_API_KEY')"
],
"id": "3caca469-5f39-4592-bf12-c8832c44de19"
},
{
"metadata": {},
"cell_type": "code",
"outputs": [],
"execution_count": null,
"source": [
"class YouTubeRecipeExtractor:\n",
"\n",
" def __init__(self):\n",
" self.openai = OpenAI()\n",
" self.system_prompt = self.get_system_prompt()\n",
"\n",
" def get_system_prompt(self):\n",
" return \"\"\"\n",
" You are a professional chef and nutritionist specializing in recipe writting.\n",
"\n",
" Your task is to write recipes in a very comprehensive and consistent manner.\n",
" Each recipe will contain a list of ingredients and a list of steps to follow.\n",
" The quantities of the ingredients should always be referred to an official unit (grams, litres, etc). If the original recipe uses a different unit (such as cup, teaspoons, etc.) make the transformation but keep the original instruction between parenthesis.\n",
" The steps should be described in a very synthetic and concise manner. You should avoid being verbose, but the step should be understandable and easy to follow for non-expert people.\n",
" To each recipe add a general analysis from nutrition perspective (number of calories per serving, proteins, fat, etc.).\n",
" Use Markdown to improve readability.\n",
" If the text you receive is not a recipe, return a kind message explaining the situation.\n",
" \"\"\"\n",
"\n",
" def extract_video_id(self, url):\n",
" \"\"\"Extract video ID from YouTube URL\"\"\"\n",
" pattern = r'(?:youtube\\.com/watch\\?v=|youtu\\.be/|youtube\\.com/embed/)([^&\\n?#]+)'\n",
" match = re.search(pattern, url)\n",
" return match.group(1) if match else None\n",
"\n",
" def get_transcription(self, video_id):\n",
" try:\n",
" print(f\"Fetching video transcript for video {video_id}...\")\n",
" transcript = YouTubeTranscriptApi.get_transcript(video_id)\n",
" return \" \".join([item['text'] for item in transcript])\n",
" except Exception as e:\n",
" print(f\"Error fetching transcript: {e}\")\n",
" return None\n",
"\n",
" def format_recipe(self, transcript):\n",
" try:\n",
" response = self.openai.chat.completions.create(\n",
" model=\"gpt-4o-mini\",\n",
" messages=[\n",
" {\"role\": \"system\", \"content\": self.system_prompt},\n",
" {\"role\": \"user\", \"content\": f\"Summarize the following YouTube recipe:\\n\\n{transcript}\"}\n",
" ]\n",
" )\n",
" return response.choices[0].message.content\n",
" except Exception as e:\n",
" print(f\"Error summarizing text: {e}\")\n",
" return None\n",
"\n",
" def display_recipe(self, url):\n",
" transcript = self.get_transcription(self.extract_video_id(url))\n",
" recipe = self.format_recipe(transcript)\n",
" display(Markdown(recipe))\n"
],
"id": "29e44cb5-0928-4ac9-9681-efd6ba1e359f"
},
{
"cell_type": "code",
"execution_count": 6,
"id": "98ea2d01-f949-4e03-9154-fe524cf64ca4",
"metadata": {},
"outputs": [],
"source": [
"test_bad_url = \"https://www.youtube.com/watch?v=hzGiTUTi060\"\n",
"test_good_url = \"https://www.youtube.com/watch?v=D_2DBLAt57c\""
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "362e39e8-a254-4f2f-8653-5fbb7ff0e1e9",
"metadata": {},
"outputs": [],
"source": [
"extractor = YouTubeRecipeExtractor()\n"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "0cc259bd-46bb-4472-b3cb-f39da54e324a",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Fetching video transcript...\n"
]
},
{
"data": {
"text/markdown": [
"Thank you for your interest, but the text you provided is not a recipe. If you're looking for cooking instructions, ingredient lists, or nutrition analysis, please provide a specific food or dish you would like to know about, and I'd be happy to help!"
],
"text/plain": [
"<IPython.core.display.Markdown object>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"extractor.display_recipe(test_bad_url)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "3f43e320-ca55-4db5-bc95-71fcb342cf3c",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Fetching video transcript for video D_2DBLAt57c...\n",
"Error fetching transcript: YouTubeTranscriptApi.fetch() missing 1 required positional argument: 'self'\n"
]
},
{
"data": {
"text/markdown": [
"It seems like you haven't provided a recipe or any details to summarize. If you have a specific recipe in mind, please share it, and I'll be happy to help!"
],
"text/plain": [
"<IPython.core.display.Markdown object>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"extractor.display_recipe(test_good_url)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "11c5c2b3-498a-43eb-9b68-d2b920c56b10",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.13"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@@ -0,0 +1,340 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "ddc7185d",
"metadata": {},
"source": [
"# Resume-to-Job Gap Analysis Tool"
]
},
{
"cell_type": "markdown",
"id": "fe7462c2",
"metadata": {},
"source": [
"### **Project Summary**\n",
"This project demonstrates the use of a Large Language Model (LLM) to perform a sophisticated analysis task with real-world business value. The tool automates the tedious process of manually comparing a candidate's resume against a job description. By providing a job description URL and a candidate's resume text, this notebook generates a detailed cover letter and \"gap analysis\" report. This report highlights which skills are matched, which are missing, and provides an overall suitability score, enabling recruiters to screen candidates more efficiently and helping applicants tailor their resumes effectively.\n",
"\n",
"### **How to Use**\n",
"1. **Set up your Environment**: Make sure you have a `.env` file in the root directory with your `OPENAI_API_KEY`.\n",
"2. **Input the Job URL**: In **Section 2**, paste the URL of a web-based job description into the `job_description_url` variable.\n",
"3. **Input the Resume**: In **Section 2**, paste the candidate's full resume text into the `resume_text` variable.\n",
"4. **Run the Notebook**: Execute the cells from top to bottom. The final cell in **Section 6** will display the formatted analysis report.\n",
"\n",
"### **A Note on Ethical Web Scraping**\n",
"This tool uses the `requests` library to fetch website content. To ensure compliance and responsible use:\n",
"* We send a standard `User-Agent` header to identify our script as a web browser, which is a common practice for preventing being blocked.\n",
"* **Always be mindful of the website's terms of service.** Automated scraping may be disallowed on some sites. This tool is intended for educational purposes and should be used on publicly accessible job postings where such activity is permitted."
]
},
{
"cell_type": "markdown",
"id": "1a01b5d2",
"metadata": {},
"source": [
"## 1. Setup:"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "caca8d9a",
"metadata": {},
"outputs": [],
"source": [
"# Imports\n",
"import os\n",
"import requests\n",
"from dotenv import load_dotenv\n",
"from bs4 import BeautifulSoup\n",
"from IPython.display import Markdown, display\n",
"from openai import OpenAI"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e2db03e8",
"metadata": {},
"outputs": [],
"source": [
"# Load Environment Variables\n",
"load_dotenv(override=True)\n",
"api_key = os.getenv('OPENAI_API_KEY')"
]
},
{
"cell_type": "markdown",
"id": "7c702fcc",
"metadata": {},
"source": [
"#### Test OpenAI API Key"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5347ee38",
"metadata": {},
"outputs": [],
"source": [
"# Validate API key\n",
"if not api_key:\n",
" print(\"ERROR: No API key found - please add OPENAI_API_KEY to your .env file\")\n",
"elif not api_key.startswith(\"sk-proj-\"):\n",
" print(\"WARNING: API key format may be incorrect\")\n",
"elif api_key.strip() != api_key:\n",
" print(\"ERROR: API key has whitespace - please remove extra spaces/tabs\")\n",
"else:\n",
" print(\"SUCCESS: API key loaded successfully\")\n",
"\n",
"# Initialize OpenAI client\n",
"openai = OpenAI()"
]
},
{
"cell_type": "markdown",
"id": "dce21512",
"metadata": {},
"source": [
"## 2. Data Input"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b5d90d56",
"metadata": {},
"outputs": [],
"source": [
"# The URL for the Y Combinator job posting you want to analyze. (ycombinator.com/companies/y-combinator/jobs//jobs)\n",
"job_url = \"https://www.ycombinator.com/companies/y-combinator/jobs/rq3DaTs-product-engineer\"\n",
"\n",
"# Replace this example resume with the actual candidate's resume text.\n",
"resume_text = \"\"\"\n",
"John Doe\n",
"123 Main Street, Anytown, USA | (123) 456-7890 | john.doe@email.com\n",
"\n",
"Summary\n",
"Software Engineer with 5 years of experience in web applications. \n",
"Proficient in Python and JavaScript with a strong background in AWS.\n",
"\n",
"Experience\n",
"Senior Software Engineer | Tech Solutions Inc. | 2021 - Present\n",
"- Led development of analytics dashboard using React and Python\n",
"- Architected microservices backend on AWS\n",
"- Mentored junior engineers\n",
"\n",
"Software Engineer | Innovate Corp. | 2018 - 2021\n",
"- Developed e-commerce platform using Python and Django\n",
"- Wrote comprehensive unit and integration tests\n",
"\n",
"Skills\n",
"Python, JavaScript, React, Flask, Django, AWS, Docker, Git\n",
"\"\"\"\n",
"\n"
]
},
{
"cell_type": "markdown",
"id": "a3d5e484",
"metadata": {},
"source": [
"## 3. Prompt Engineering"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6b2b3d1b",
"metadata": {},
"outputs": [],
"source": [
"SYSTEM_PROMPT = \"\"\"\n",
"You are a strategic career advisor. Your task is to synthesize a candidate's resume and a job description into a compelling, two-part analysis. Your goal is to create a narrative connecting the candidate's specific accomplishments to the company's needs.\n",
"\n",
"**Formatting:** Use markdown with bolding for emphasis. Do not use placeholders like '[Job Title]'; infer the details from the text.\n",
"\n",
"---\n",
"\n",
"# Part 1: Candidate Suitability Analysis\n",
"\n",
"## Executive Summary\n",
"Provide a 2-3 sentence summary of the candidate's alignment with the role, stating your professional opinion on their potential.\n",
"\n",
"## Key Strengths & Evidence\n",
"List the top 3 strengths the candidate brings. For each strength, **quote or paraphrase evidence directly from the resume's 'Experience' section**.\n",
"* **Strength:** [Example: Scalable Backend Development] - **Evidence:** [Example: \"Architected microservices backend on AWS,\" demonstrating hands-on experience.]\n",
"\n",
"## Areas for Growth & Discussion\n",
"Identify key requirements from the job description not explicitly covered in the resume. Frame these as **strategic points to address in an interview**.\n",
"* **Topic:** [Example: TypeScript Proficiency] - **Suggested Question:** \"The role heavily uses TypeScript. Could you discuss your experience level with it and your approach to learning new languages?\"\n",
"\n",
"## Holistic Suitability Score\n",
"Provide a score (e.g., 85/100) and justify it in one sentence.\n",
"\n",
"---\n",
"\n",
"# Part 2: Dynamic Cover Letter Draft\n",
"Generate a compelling and authentic cover letter from the candidate's perspective.\n",
"\"\"\""
]
},
{
"cell_type": "markdown",
"id": "5146a406",
"metadata": {},
"source": [
"## 4. Webscraper"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4d23965d",
"metadata": {},
"outputs": [],
"source": [
"# Scraper Function\n",
"def scrape_ycombinator_job(url: str) -> str:\n",
" \"\"\"\n",
" Scrapes a single job posting from a ycombinator.com URL.\n",
"\n",
" Args:\n",
" url: The URL of the Y Combinator job posting.\n",
"\n",
" Returns:\n",
" The cleaned text of the job description, or an error message.\n",
" \"\"\"\n",
" print(f\"INFO: Attempting to scrape YC job posting from: {url}\")\n",
" \n",
" headers = {\n",
" 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'\n",
" }\n",
" \n",
" try:\n",
" # Fetch the page content\n",
" response = requests.get(url, headers=headers, timeout=10)\n",
" # Raise an error if the page is not found (e.g., 404)\n",
" response.raise_for_status()\n",
"\n",
" # Parse the HTML with BeautifulSoup\n",
" soup = BeautifulSoup(response.content, 'html.parser')\n",
"\n",
" # Extract the job title (specifically from the <h1> tag)\n",
" title_element = soup.select_one('h1')\n",
" title = title_element.get_text(strip=True) if title_element else \"Job Title Not Found\"\n",
"\n",
" # Extract the main job description content (from the <div class=\"prose\">)\n",
" description_element = soup.select_one('.prose')\n",
" description = description_element.get_text(separator='\\n', strip=True) if description_element else \"\"\n",
" \n",
" # Combine them for the final text\n",
" full_text = f\"Job Title: {title}\\n\\n{description}\"\n",
" \n",
" print(\"SUCCESS: Scraping complete.\")\n",
" return full_text\n",
"\n",
" except requests.exceptions.RequestException as e:\n",
" print(f\"ERROR: Scraping failed. Could not fetch URL. {e}\")\n",
" return \"[Scraping failed: Could not connect to the server]\"\n",
" except Exception as e:\n",
" print(f\"ERROR: An unexpected error occurred during scraping: {e}\")\n",
" return \"[Scraping failed: An unexpected error occurred]\"\n"
]
},
{
"cell_type": "markdown",
"id": "e159596d",
"metadata": {},
"source": [
"## 5. Gap Analysis"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d0dc8f72",
"metadata": {},
"outputs": [],
"source": [
"def get_analysis(job_description: str, resume: str) -> str:\n",
" \"\"\"Sends the job description and resume to the AI and returns the analysis.\"\"\"\n",
" print(\"INFO: Sending data to the AI for analysis...\")\n",
" user_prompt = f\"\"\"Please generate the analysis based on the following documents.\n",
"\n",
" **JOB DESCRIPTION:**\n",
" ---\n",
" {job_description}\n",
" ---\n",
"\n",
" **CANDIDATE RESUME:**\n",
" ---\n",
" {resume}\n",
" ---\n",
" \"\"\"\n",
" messages = [\n",
" {\"role\": \"system\", \"content\": SYSTEM_PROMPT},\n",
" {\"role\": \"user\", \"content\": user_prompt}\n",
" ]\n",
" response = openai.chat.completions.create(\n",
" model=\"gpt-4o-mini\",\n",
" messages=messages\n",
" )\n",
" print(\"SUCCESS: Analysis complete.\")\n",
" return response.choices[0].message.content"
]
},
{
"cell_type": "markdown",
"id": "f1deb906",
"metadata": {},
"source": [
"## 6. Execution"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d3e57129",
"metadata": {},
"outputs": [],
"source": [
"# Scrape the job description text from the URL\n",
"job_description_text = scrape_ycombinator_job(job_url)\n",
"\n",
"# Only proceed if scraping was successful\n",
"if not job_description_text.startswith(\"[Scraping failed\"):\n",
" # Run the analysis with the scraped text\n",
" analysis_report = get_analysis(job_description_text, resume_text)\n",
" # Display the final report\n",
" display(Markdown(analysis_report))\n",
"else:\n",
" # If scraping failed, display the error message\n",
" display(Markdown(f\"## {job_description_text}\"))"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "llms",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.13"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@@ -0,0 +1,123 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "3ba06289-d17a-4ccd-85f5-2b79956d4e59",
"metadata": {},
"outputs": [],
"source": [
"!pip install selenium"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "935fe7b1-1807-4f75-863d-4c118e425a19",
"metadata": {},
"outputs": [],
"source": [
"pip show selenium"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "eabbbc62-1de1-4883-9b3e-9c90145ea6c5",
"metadata": {},
"outputs": [],
"source": [
"from selenium import webdriver\n",
"from selenium.webdriver.edge.options import Options as EdgeOptions # Import EdgeOptions\n",
"from selenium.webdriver.edge.service import Service as EdgeService # Import EdgeService\n",
"from bs4 import BeautifulSoup\n",
"import time\n",
"import os\n",
"\n",
"class Website:\n",
" def __init__(self, url, driver_path=None, wait_time=3):\n",
" self.url = url\n",
" self.wait_time = wait_time\n",
"\n",
" # Headless Edge settings\n",
" options = EdgeOptions() # Use EdgeOptions\n",
" # options.add_argument(\"--headless\")\n",
" options.add_argument(\"--disable-gpu\")\n",
" options.add_argument(\"--no-sandbox\")\n",
" options.add_argument(\"--window-size=1920x1080\")\n",
"\n",
" # Driver path\n",
" if driver_path:\n",
" # For Edge, you might need to specify the path to msedgedriver\n",
" # For driver download, https://developer.microsoft.com/en-us/microsoft-edge/tools/webdriver/?form=MA13LH#downloads\n",
" service = EdgeService(executable_path=driver_path) # Use EdgeService\n",
" else:\n",
" # If msedgedriver.exe is in your system's PATH, you can omit executable_path\n",
" service = EdgeService()\n",
"\n",
" # Start browser\n",
" # Use webdriver.Edge() for Microsoft Edge\n",
" driver = webdriver.Edge(service=service, options=options)\n",
" driver.get(url)\n",
"\n",
" # Wait for the loading page\n",
" time.sleep(self.wait_time)\n",
"\n",
" # Take page source\n",
" html = driver.page_source\n",
" driver.quit()\n",
"\n",
" # Analysis with BeautifulSoup \n",
" soup = BeautifulSoup(html, 'html.parser')\n",
" self.title = soup.title.string if soup.title else \"No title found\"\n",
"\n",
" # Clean irrelevant tags\n",
" for irrelevant in soup.body([\"script\", \"style\", \"img\", \"input\"]):\n",
" irrelevant.decompose()\n",
"\n",
" self.text = soup.body.get_text(separator=\"\\n\", strip=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "852c52e2-bd4d-4bb9-94ef-e498c33f1a89",
"metadata": {},
"outputs": [],
"source": [
"site = Website(\"https://openai.com\", driver_path=\"/Users/klee/Documents/edgedriver_mac64_m1/msedgedriver\")\n",
"print(\"Title:\", site.title)\n",
"print(\"\\nFirst 500 character:\\n\", site.text[:500])"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7620c685-c35c-4d6b-aaf1-a3da98f19ca7",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.13"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@@ -0,0 +1,129 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "3bf6bba3-cea5-4e28-8e57-bddef9c80013",
"metadata": {},
"outputs": [],
"source": [
"# imports\n",
"\n",
"import os\n",
"import requests\n",
"from dotenv import load_dotenv\n",
"from bs4 import BeautifulSoup\n",
"from IPython.display import Markdown, display\n",
"from openai import OpenAI\n",
"\n",
"\n",
"# If you get an error running this cell, then please head over to the troubleshooting notebook!"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "62f20a01-2d4f-45ac-a890-fce46d552301",
"metadata": {},
"outputs": [],
"source": [
"load_dotenv(override=True)\n",
"api_key = os.getenv('OPENAI_API_KEY')\n",
"\n",
"if not api_key:\n",
" print('No Api Key was found')\n",
"elif not api_key.startswith('sk-proj-'):\n",
" print(\"An api key was found, but it doesnt start with sk-proj\")\n",
"elif api_key.strip() != api_key:\n",
" print(\"An api key was found, but it might have space in the first or end\")\n",
"else:\n",
" print(\"Api key found and looks good so far!\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f9a1fea0-f228-4310-8c0c-2074cd09ab53",
"metadata": {},
"outputs": [],
"source": [
"openai = OpenAI()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9db0ddcd-befd-445b-817b-d30c50de9206",
"metadata": {},
"outputs": [],
"source": [
"message = \"Hello GPT, this is my first message\"\n",
"response = openai.chat.completions.create(model=\"gpt-4o-mini\", messages=[{\"role\": \"user\", \"content\":message}])\n",
"print(response.choices[0].message.content)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "00743dac-0e70-45b7-879a-d7293a6f68a6",
"metadata": {},
"outputs": [],
"source": [
"# Step 1: Create your prompts\n",
"\n",
"system_prompt = \"You are a personal tennis coach who helps children learn how to play. \\\n",
"Write a short summary of advice for a child who is just starting to get interested in tennis. \\\n",
"Respond in Markdown\"\n",
"user_prompt = \"\"\"\n",
" Even if youve both geared up and warmed up, you still need to know all the tennis basics before you step onto the court and play. You can use any combination of tennis grips (like the Semi-Western or Continental grip) and moves like drop shots, lobs, backhand volleys, or forehand strokes to try and win each point. However, learning all the fundamentals of tennis is imperative to play your best tennis:\n",
"Keep it inside the lines. For singles tennis, the serve must always land over the net, and within the opponents opposite service box (the box on either side of the center mark at the service line, also known as the “T”). If the ball hits the net and still lands in the proper service box, its called a “let,” and the server gets to start over from the first serve again. Even if the ball technically lands outside the box, as long as any part of it still touches the line, it is still in-play. During a rally, the ball must stay within the singles court boundaries, which are the inner sidelines. For doubles tennis, the outer alleys are in-play. However, most beginner players wont have a line judge present, so they must call the ball out or raise their finger if the ball lands outside the lines.\n",
"Keep score. Tennis has a unique scoring system, and its important to keep track of your points to determine who will win (and which side you should be serving from). The server always says their score first, even if it is lower than their opponents. For example, if the server loses the first three points in a row, the score is love-40.\n",
"Avoid touching the net. You can rush the net and perform any volleying maneuver you like. However, if any part of you or your racket physically touches the net at any time during a point, you automatically lose. The net is the equal divider between both sides, and any alteration to its positioning, even accidental, is not allowed.\n",
"Hold onto your racket. Your racket must stay in your hands at all times. If you drop or throw the racket at the ball, you will lose the point. You can only return the ball with your racket and no other part of your body. However, the ball doesnt necessarily have to touch the racket face—its still in-play even if it hits the handle or triangle as well.\n",
"Hit the ball after one bounce. Once the ball bounces twice, the point is over. Similarly, you can only hit the ball once as well. Even if you clip the ball and it lands in front of you again, the point is over if the ball doesnt reach your opponents side.\n",
"A ball in the air is a ball in play. Even if your opponent is well behind the baseline in “out” territory, if they make contact with the ball or it hits a part of their body before the bounce, its still in-play. A ball cant be called until it bounces.\n",
"Win by two. Both games and points must be won by two in a tennis match. In the event of a tie, where both players each win six games in a set resulting in a score of 6-6, a tiebreak is introduced. This is where players must face off in a seven-point mini-match. The players switch sides after each serve point, and the end of the court when the sum of the points equals six or multiples thereof. The first player to reach seven points (leading by two) wins. If the tiebreaker occurs in the last set, the points are instead played first to 10, and the winning player must still win by two points.\n",
"\"\"\"\n",
"\n",
"# Step 2: Make the messages list\n",
"\n",
"messages = [\n",
" {\"role\": \"system\", \"content\": system_prompt},\n",
" {\"role\": \"user\", \"content\": user_prompt}\n",
"] # fill this in\n",
"\n",
"# Step 3: Call OpenAI\n",
"\n",
"response =openai.chat.completions.create(\n",
" model=\"gpt-4o-mini\",\n",
" messages=messages\n",
")\n",
"\n",
"# Step 4: print the result\n",
"\n",
"print(response.choices[0].message.content)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.13"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,561 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "d15d8294-3328-4e07-ad16-8a03e9bbfdb9",
"metadata": {},
"source": [
"# Welcome to your first assignment!\n",
"\n",
"Instructions are below. Please give this a try, and look in the solutions folder if you get stuck (or feel free to ask me!)"
]
},
{
"cell_type": "markdown",
"id": "ada885d9-4d42-4d9b-97f0-74fbbbfe93a9",
"metadata": {},
"source": [
"<table style=\"margin: 0; text-align: left;\">\n",
" <tr>\n",
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
" <img src=\"../resources.jpg\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
" </td>\n",
" <td>\n",
" <h2 style=\"color:#f71;\">Just before we get to the assignment --</h2>\n",
" <span style=\"color:#f71;\">I thought I'd take a second to point you at this page of useful resources for the course. This includes links to all the slides.<br/>\n",
" <a href=\"https://edwarddonner.com/2024/11/13/llm-engineering-resources/\">https://edwarddonner.com/2024/11/13/llm-engineering-resources/</a><br/>\n",
" Please keep this bookmarked, and I'll continue to add more useful links there over time.\n",
" </span>\n",
" </td>\n",
" </tr>\n",
"</table>"
]
},
{
"cell_type": "markdown",
"id": "6e9fa1fc-eac5-4d1d-9be4-541b3f2b3458",
"metadata": {},
"source": [
"# HOMEWORK EXERCISE ASSIGNMENT\n",
"\n",
"Upgrade the day 1 project to summarize a webpage to use an Open Source model running locally via Ollama rather than OpenAI\n",
"\n",
"You'll be able to use this technique for all subsequent projects if you'd prefer not to use paid APIs.\n",
"\n",
"**Benefits:**\n",
"1. No API charges - open-source\n",
"2. Data doesn't leave your box\n",
"\n",
"**Disadvantages:**\n",
"1. Significantly less power than Frontier Model\n",
"\n",
"## Recap on installation of Ollama\n",
"\n",
"Simply visit [ollama.com](https://ollama.com) and install!\n",
"\n",
"Once complete, the ollama server should already be running locally. \n",
"If you visit: \n",
"[http://localhost:11434/](http://localhost:11434/)\n",
"\n",
"You should see the message `Ollama is running`. \n",
"\n",
"If not, bring up a new Terminal (Mac) or Powershell (Windows) and enter `ollama serve` \n",
"And in another Terminal (Mac) or Powershell (Windows), enter `ollama pull llama3.2` \n",
"Then try [http://localhost:11434/](http://localhost:11434/) again.\n",
"\n",
"If Ollama is slow on your machine, try using `llama3.2:1b` as an alternative. Run `ollama pull llama3.2:1b` from a Terminal or Powershell, and change the code below from `MODEL = \"llama3.2\"` to `MODEL = \"llama3.2:1b\"`"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "4e2a9393-7767-488e-a8bf-27c12dca35bd",
"metadata": {},
"outputs": [],
"source": [
"# imports\n",
"\n",
"import requests\n",
"from bs4 import BeautifulSoup\n",
"from IPython.display import Markdown, display"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "29ddd15d-a3c5-4f4e-a678-873f56162724",
"metadata": {},
"outputs": [],
"source": [
"# Constants\n",
"\n",
"OLLAMA_API = \"http://localhost:11434/api/chat\"\n",
"HEADERS = {\"Content-Type\": \"application/json\"}\n",
"MODEL = \"llama3.2\""
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "dac0a679-599c-441f-9bf2-ddc73d35b940",
"metadata": {},
"outputs": [],
"source": [
"# Create a messages list using the same format that we used for OpenAI\n",
"\n",
"messages = [\n",
" {\"role\": \"user\", \"content\": \"Describe some of the business applications of Generative AI\"}\n",
"]"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "7bb9c624-14f0-4945-a719-8ddb64f66f47",
"metadata": {},
"outputs": [],
"source": [
"payload = {\n",
" \"model\": MODEL,\n",
" \"messages\": messages,\n",
" \"stream\": False\n",
" }"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "479ff514-e8bd-4985-a572-2ea28bb4fa40",
"metadata": {},
"outputs": [],
"source": [
"# Let's just make sure the model is loaded\n",
"\n",
"!ollama pull llama3.2"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "42b9f644-522d-4e05-a691-56e7658c0ea9",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Generative AI has numerous business applications across various industries. Here are some examples:\n",
"\n",
"1. **Content Generation**: Generative AI can be used to generate high-quality content such as articles, social media posts, product descriptions, and more. This can help businesses save time and resources while maintaining consistency in their content.\n",
"2. **Product Design and Development**: Generative AI can be used to design and develop new products, such as furniture, electronics, and other consumer goods. It can also be used to optimize existing product designs for better performance and efficiency.\n",
"3. **Marketing Automation**: Generative AI can be used to automate marketing tasks such as email campaigns, ad copywriting, and social media posting. This can help businesses personalize their marketing efforts and reach a wider audience.\n",
"4. **Customer Service**: Generative AI can be used to power chatbots and virtual assistants that provide customer support and answer frequently asked questions. This can help businesses improve their customer service experience and reduce response times.\n",
"5. **Data Analysis and Visualization**: Generative AI can be used to analyze large datasets and generate insights, such as identifying trends and patterns. It can also be used to visualize complex data in a more intuitive and user-friendly way.\n",
"6. **Predictive Maintenance**: Generative AI can be used to predict equipment failures and schedule maintenance tasks. This can help businesses reduce downtime and improve overall efficiency.\n",
"7. **Supply Chain Optimization**: Generative AI can be used to optimize supply chain operations, such as predicting demand, managing inventory, and identifying bottlenecks.\n",
"8. **Financial Modeling**: Generative AI can be used to build financial models and predict future market trends. This can help businesses make more informed investment decisions and avoid potential pitfalls.\n",
"9. **Creative Writing and Art**: Generative AI can be used to generate creative content such as poetry, short stories, and art. This can help businesses tap into new sources of inspiration and innovation.\n",
"10. **Cybersecurity**: Generative AI can be used to detect and respond to cyber threats in real-time. It can also be used to predict potential vulnerabilities and develop more effective security strategies.\n",
"\n",
"Some specific examples of companies using generative AI include:\n",
"\n",
"* Google's AutoML (Automated Machine Learning) tool, which uses generative AI to automate machine learning tasks.\n",
"* Amazon's SageMaker, which provides a range of tools and services for building and deploying generative AI models.\n",
"* Microsoft's Azure Machine Learning, which offers a range of features and tools for building and deploying generative AI models.\n",
"* IBM's Watson, which uses generative AI to provide a range of services including customer service, content generation, and predictive maintenance.\n",
"\n",
"These are just a few examples of the many business applications of generative AI. As the technology continues to evolve, we can expect to see even more innovative use cases emerge.\n"
]
}
],
"source": [
"# If this doesn't work for any reason, try the 2 versions in the following cells\n",
"# And double check the instructions in the 'Recap on installation of Ollama' at the top of this lab\n",
"# And if none of that works - contact me!\n",
"\n",
"response = requests.post(OLLAMA_API, json=payload, headers=HEADERS)\n",
"print(response.json()['message']['content'])"
]
},
{
"cell_type": "markdown",
"id": "6a021f13-d6a1-4b96-8e18-4eae49d876fe",
"metadata": {},
"source": [
"# Introducing the ollama package\n",
"\n",
"And now we'll do the same thing, but using the elegant ollama python package instead of a direct HTTP call.\n",
"\n",
"Under the hood, it's making the same call as above to the ollama server running at localhost:11434"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "7745b9c4-57dc-4867-9180-61fa5db55eb8",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Generative AI has numerous business applications across various industries. Here are some examples:\n",
"\n",
"1. **Content Creation**: Generative AI can be used to generate high-quality content such as articles, social media posts, and product descriptions. This can help businesses reduce content creation costs and improve efficiency.\n",
"2. **Marketing Automation**: Generative AI-powered chatbots can be used to automate customer service, provide personalized recommendations, and enhance the overall customer experience.\n",
"3. **Product Design**: Generative AI can be used to generate design concepts for products, packaging, and branding. This can help businesses reduce design costs and improve product appeal.\n",
"4. **Recommendation Systems**: Generative AI can be used to build recommendation systems that suggest products or services based on user behavior, preferences, and search history.\n",
"5. **Financial Analysis**: Generative AI can be used to analyze financial data, predict market trends, and identify potential investment opportunities.\n",
"6. **Supply Chain Optimization**: Generative AI can be used to optimize supply chain operations by predicting demand, identifying bottlenecks, and suggesting alternative routes.\n",
"7. **Customer Service**: Generative AI-powered chatbots can be used to provide 24/7 customer support, answer frequently asked questions, and route complex issues to human agents.\n",
"8. **Sales Forecasting**: Generative AI can be used to predict sales performance based on historical data, market trends, and competitor activity.\n",
"9. **Brand Identity**: Generative AI can be used to generate brand identities, logos, and visual styles that are consistent with a company's values and mission.\n",
"10. **Quality Control**: Generative AI can be used to detect defects in products, analyze quality control metrics, and suggest improvements.\n",
"\n",
"Some specific examples of businesses using generative AI include:\n",
"\n",
"* Amazon using generative AI to optimize its recommendation system\n",
"* IBM using generative AI to generate new designs for products and packaging\n",
"* NVIDIA using generative AI to develop more realistic graphics and animations for gaming and movie production\n",
"* Siemens using generative AI to optimize supply chain operations and reduce costs\n",
"\n",
"Overall, generative AI has the potential to transform businesses by automating tasks, improving efficiency, and providing new insights into customer behavior and market trends.\n"
]
}
],
"source": [
"import ollama\n",
"\n",
"response = ollama.chat(model=MODEL, messages=messages)\n",
"print(response['message']['content'])"
]
},
{
"cell_type": "markdown",
"id": "a4704e10-f5fb-4c15-a935-f046c06fb13d",
"metadata": {},
"source": [
"## Alternative approach - using OpenAI python library to connect to Ollama"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "23057e00-b6fc-4678-93a9-6b31cb704bff",
"metadata": {},
"outputs": [],
"source": [
"# There's actually an alternative approach that some people might prefer\n",
"# You can use the OpenAI client python library to call Ollama:\n",
"\n",
"from openai import OpenAI\n",
"ollama_via_openai = OpenAI(base_url='http://localhost:11434/v1', api_key='ollama')\n",
"\n",
"response = ollama_via_openai.chat.completions.create(\n",
" model=MODEL,\n",
" messages=messages\n",
")\n",
"\n",
"print(response.choices[0].message.content)"
]
},
{
"cell_type": "markdown",
"id": "9f9e22da-b891-41f6-9ac9-bd0c0a5f4f44",
"metadata": {},
"source": [
"## Are you confused about why that works?\n",
"\n",
"It seems strange, right? We just used OpenAI code to call Ollama?? What's going on?!\n",
"\n",
"Here's the scoop:\n",
"\n",
"The python class `OpenAI` is simply code written by OpenAI engineers that makes calls over the internet to an endpoint. \n",
"\n",
"When you call `openai.chat.completions.create()`, this python code just makes a web request to the following url: \"https://api.openai.com/v1/chat/completions\"\n",
"\n",
"Code like this is known as a \"client library\" - it's just wrapper code that runs on your machine to make web requests. The actual power of GPT is running on OpenAI's cloud behind this API, not on your computer!\n",
"\n",
"OpenAI was so popular, that lots of other AI providers provided identical web endpoints, so you could use the same approach.\n",
"\n",
"So Ollama has an endpoint running on your local box at http://localhost:11434/v1/chat/completions \n",
"And in week 2 we'll discover that lots of other providers do this too, including Gemini and DeepSeek.\n",
"\n",
"And then the team at OpenAI had a great idea: they can extend their client library so you can specify a different 'base url', and use their library to call any compatible API.\n",
"\n",
"That's it!\n",
"\n",
"So when you say: `ollama_via_openai = OpenAI(base_url='http://localhost:11434/v1', api_key='ollama')` \n",
"Then this will make the same endpoint calls, but to Ollama instead of OpenAI."
]
},
{
"cell_type": "markdown",
"id": "bc7d1de3-e2ac-46ff-a302-3b4ba38c4c90",
"metadata": {},
"source": [
"## Also trying the amazing reasoning model DeepSeek\n",
"\n",
"Here we use the version of DeepSeek-reasoner that's been distilled to 1.5B. \n",
"This is actually a 1.5B variant of Qwen that has been fine-tuned using synethic data generated by Deepseek R1.\n",
"\n",
"Other sizes of DeepSeek are [here](https://ollama.com/library/deepseek-r1) all the way up to the full 671B parameter version, which would use up 404GB of your drive and is far too large for most!"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "cf9eb44e-fe5b-47aa-b719-0bb63669ab3d",
"metadata": {},
"outputs": [],
"source": [
"!ollama pull deepseek-r1:1.5b"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1d3d554b-e00d-4c08-9300-45e073950a76",
"metadata": {},
"outputs": [],
"source": [
"# This may take a few minutes to run! You should then see a fascinating \"thinking\" trace inside <think> tags, followed by some decent definitions\n",
"\n",
"response = ollama_via_openai.chat.completions.create(\n",
" model=\"deepseek-r1:1.5b\",\n",
" messages=[{\"role\": \"user\", \"content\": \"Please give definitions of some core concepts behind LLMs: a neural network, attention and the transformer\"}]\n",
")\n",
"\n",
"print(response.choices[0].message.content)"
]
},
{
"cell_type": "markdown",
"id": "1622d9bb-5c68-4d4e-9ca4-b492c751f898",
"metadata": {},
"source": [
"# NOW the exercise for you\n",
"\n",
"Take the code from day1 and incorporate it here, to build a website summarizer that uses Llama 3.2 running locally instead of OpenAI; use either of the above approaches."
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "6de38216-6d1c-48c4-877b-86d403f4e0f8",
"metadata": {},
"outputs": [],
"source": [
"# A class to represent a Webpage\n",
"# If you're not familiar with Classes, check out the \"Intermediate Python\" notebook\n",
"\n",
"# Some websites need you to use proper headers when fetching them:\n",
"headers = {\n",
" \"User-Agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36\"\n",
"}\n",
"\n",
"class Website:\n",
"\n",
" def __init__(self, url):\n",
" \"\"\"\n",
" Create this Website object from the given url using the BeautifulSoup library\n",
" \"\"\"\n",
" self.url = url\n",
" response = requests.get(url, headers=headers)\n",
" soup = BeautifulSoup(response.content, 'html.parser')\n",
" self.title = soup.title.string if soup.title else \"No title found\"\n",
" for irrelevant in soup.body([\"script\", \"style\", \"img\", \"input\"]):\n",
" irrelevant.decompose()\n",
" self.text = soup.body.get_text(separator=\"\\n\", strip=True)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "d29564f8-13f6-48b6-ab0b-450e53f3e3aa",
"metadata": {},
"outputs": [],
"source": [
"ed = Website(\"https://edwarddonner.com\")"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "721a4ec9-0b66-419d-92e1-8b24e9a38b39",
"metadata": {},
"outputs": [],
"source": [
"# A function that writes a User Prompt that asks for summaries of websites:\n",
"\n",
"def user_prompt_for(website):\n",
" user_prompt = f\"You are looking at a website titled {website.title}\"\n",
" user_prompt += \"\\nThe contents of this website is as follows; \\\n",
"please provide a short summary of this website in markdown. \\\n",
"If it includes news or announcements, then summarize these too.\\n\\n\"\n",
" user_prompt += website.text\n",
" return user_prompt"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "e0ae9815-4643-4fc7-88d0-72db83fa569f",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"You are looking at a website titled Home - Edward Donner\n",
"The contents of this website is as follows; please provide a short summary of this website in markdown. If it includes news or announcements, then summarize these too.\n",
"\n",
"Home\n",
"Connect Four\n",
"Outsmart\n",
"An arena that pits LLMs against each other in a battle of diplomacy and deviousness\n",
"About\n",
"Posts\n",
"Well, hi there.\n",
"Im Ed. I like writing code and experimenting with LLMs, and hopefully youre here because you do too. I also enjoy DJing (but Im badly out of practice), amateur electronic music production (\n",
"very\n",
"amateur) and losing myself in\n",
"Hacker News\n",
", nodding my head sagely to things I only half understand.\n",
"Im the co-founder and CTO of\n",
"Nebula.io\n",
". Were applying AI to a field where it can make a massive, positive impact: helping people discover their potential and pursue their reason for being. Recruiters use our product today to source, understand, engage and manage talent. Im previously the founder and CEO of AI startup untapt,\n",
"acquired in 2021\n",
".\n",
"We work with groundbreaking, proprietary LLMs verticalized for talent, weve\n",
"patented\n",
"our matching model, and our award-winning platform has happy customers and tons of press coverage.\n",
"Connect\n",
"with me for more!\n",
"May 28, 2025\n",
"Connecting my courses become an LLM expert and leader\n",
"May 18, 2025\n",
"2025 AI Executive Briefing\n",
"April 21, 2025\n",
"The Complete Agentic AI Engineering Course\n",
"January 23, 2025\n",
"LLM Workshop Hands-on with Agents resources\n",
"Navigation\n",
"Home\n",
"Connect Four\n",
"Outsmart\n",
"An arena that pits LLMs against each other in a battle of diplomacy and deviousness\n",
"About\n",
"Posts\n",
"Get in touch\n",
"ed [at] edwarddonner [dot] com\n",
"www.edwarddonner.com\n",
"Follow me\n",
"LinkedIn\n",
"Twitter\n",
"Facebook\n",
"Subscribe to newsletter\n",
"Type your email…\n",
"Subscribe\n"
]
}
],
"source": [
"print(user_prompt_for(ed))"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "cf718345-9084-4a16-ae1c-6099b4c82d89",
"metadata": {},
"outputs": [],
"source": [
"def messages_for(website):\n",
" return [\n",
" {\"role\": \"user\", \"content\": user_prompt_for(website)}\n",
" ]"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "ac40aa1a-3121-471d-bd9a-12eab4daa063",
"metadata": {},
"outputs": [],
"source": [
"payloadExercise = {\n",
" \"model\": MODEL,\n",
" \"messages\": messages_for(ed),\n",
" \"stream\": False\n",
" }"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "6d79ad65-37de-413e-bd2e-4e99aad46d5b",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"**Summary of the Website**\n",
"==========================\n",
"\n",
"### About the Owner\n",
"\n",
"The website is owned by Edward Donner, a co-founder and CTO of Nebula.io, an AI startup that applies machine learning (LLM) to help people discover their potential. He has previous experience as the founder and CEO of another AI startup, untapt, which was acquired in 2021.\n",
"\n",
"### Latest News/Announcements\n",
"\n",
"* **Courses:** Edward Donner is offering courses on LLM expert and leader development.\n",
" * January 23, 2025: The Complete Agentic AI Engineering Course\n",
" * May 28, 2025: Connecting my courses become an LLM expert and leader\n",
" * Other upcoming courses include \"LLM Workshop Hands-on with Agents resources\"\n",
"* **AI Executive Briefing:** A series of events for executive-level individuals.\n",
" * April 21, 2025: 2025 AI Executive Briefing\n"
]
}
],
"source": [
"# If this doesn't work for any reason, try the 2 versions in the following cells\n",
"# And double check the instructions in the 'Recap on installation of Ollama' at the top of this lab\n",
"# And if none of that works - contact me!\n",
"\n",
"responseExercise = requests.post(OLLAMA_API, json=payloadExercise, headers=HEADERS)\n",
"print(responseExercise.json()['message']['content'])"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "2b8420ce-1934-4dbf-8f46-d5accbce9560",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.13"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@@ -0,0 +1,105 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "663695bd-d1f2-4acf-8669-02d9f75f1bf4",
"metadata": {},
"source": [
"# Day 2: Ollama Solution for Website Summarization\n",
"### Building and Deploying Website Summarization Tools with Ollama"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "112ef04a-136e-4e65-b94e-8674a64606ed",
"metadata": {},
"outputs": [],
"source": [
"import requests\n",
"from bs4 import BeautifulSoup\n",
"from IPython.display import Markdown, display\n",
"\n",
"OLLAMA_API = \"http://localhost:11434/api/chat\"\n",
"HEADERS = {\"Content-Type\": \"application/json\"}\n",
"MODEL = \"llama3.2\"\n",
"\n",
"headers = {\n",
" \"User-Agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36\"\n",
"}\n",
"\n",
"class Website:\n",
"\n",
" def __init__(self, url):\n",
" \"\"\"\n",
" Create this Website object from the given url using the BeautifulSoup library\n",
" \"\"\"\n",
" self.url = url\n",
" response = requests.get(url, headers=headers)\n",
" soup = BeautifulSoup(response.content, 'html.parser')\n",
" self.title = soup.title.string if soup.title else \"No title found\"\n",
" for irrelevant in soup.body([\"script\", \"style\", \"img\", \"input\"]):\n",
" irrelevant.decompose()\n",
" self.text = soup.body.get_text(separator=\"\\n\", strip=True)\n",
"\n",
"\n",
"system_prompt = \"You are an assistant that analyzes the contents of a website \\\n",
"and provides a short summary, ignoring text that might be navigation related. \\\n",
"Respond in markdown.\"\n",
"\n",
"def user_prompt_for(website):\n",
" user_prompt = f\"You are looking at a website titled {website.title}\"\n",
" user_prompt += \"\\nThe contents of this website is as follows; \\\n",
"please provide a short summary of this website in markdown. \\\n",
"If it includes news or announcements, then summarize these too.\\n\\n\"\n",
" user_prompt += website.text\n",
" return user_prompt\n",
"\n",
"def messages_for(website):\n",
" return [\n",
" {\"role\": \"system\", \"content\": system_prompt},\n",
" {\"role\": \"user\", \"content\": user_prompt_for(website)}\n",
" ]\n",
"\n",
"url = \"https://sitemakerlab.com/\" \n",
"site = Website(url)\n",
"messages = messages_for(site)\n",
"\n",
"def summarize(url):\n",
" website = Website(url)\n",
" response = ollama_via_openai.chat.completions.create(\n",
" model = MODEL,\n",
" messages = messages_for(website)\n",
" )\n",
" return response.choices[0].message.content\n",
"\n",
"def display_summary(url):\n",
" summary = summarize(url)\n",
" display(Markdown(summary))\n",
"\n",
"display_summary(\"https://edwarddonner.com\")\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.13"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@@ -0,0 +1,319 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "d15d8294-3328-4e07-ad16-8a03e9bbfdb9",
"metadata": {},
"source": [
"# Welcome to your first assignment!\n",
"\n",
"Instructions are below. Please give this a try, and look in the solutions folder if you get stuck (or feel free to ask me!)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4e2a9393-7767-488e-a8bf-27c12dca35bd",
"metadata": {},
"outputs": [],
"source": [
"# imports\n",
"\n",
"import requests\n",
"from bs4 import BeautifulSoup\n",
"from IPython.display import Markdown, display"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "29ddd15d-a3c5-4f4e-a678-873f56162724",
"metadata": {},
"outputs": [],
"source": [
"# Constants\n",
"\n",
"OLLAMA_API = \"http://localhost:11434/api/chat\"\n",
"HEADERS = {\"Content-Type\": \"application/json\"}\n",
"MODEL = \"llama3.2\""
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "dac0a679-599c-441f-9bf2-ddc73d35b940",
"metadata": {},
"outputs": [],
"source": [
"# Create a messages list using the same format that we used for OpenAI\n",
"\n",
"messages = [\n",
" {\"role\": \"user\", \"content\": \"Describe some of the business applications of Generative AI\"}\n",
"]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7bb9c624-14f0-4945-a719-8ddb64f66f47",
"metadata": {},
"outputs": [],
"source": [
"payload = {\n",
" \"model\": MODEL,\n",
" \"messages\": messages,\n",
" \"stream\": False\n",
" }"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7745b9c4-57dc-4867-9180-61fa5db55eb8",
"metadata": {},
"outputs": [],
"source": [
"import ollama\n",
"\n",
"response = ollama.chat(model=MODEL, messages=messages)\n",
"print(response['message']['content'])"
]
},
{
"cell_type": "markdown",
"id": "a4704e10-f5fb-4c15-a935-f046c06fb13d",
"metadata": {},
"source": [
"## Alternative approach - using OpenAI python library to connect to Ollama"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "23057e00-b6fc-4678-93a9-6b31cb704bff",
"metadata": {},
"outputs": [],
"source": [
"# There's actually an alternative approach that some people might prefer\n",
"# You can use the OpenAI client python library to call Ollama:\n",
"\n",
"from openai import OpenAI\n",
"ollama_via_openai = OpenAI(base_url='http://localhost:11434/v1', api_key='ollama')\n",
"\n",
"response = ollama_via_openai.chat.completions.create(\n",
" model=MODEL,\n",
" messages=messages\n",
")\n",
"\n",
"print(response.choices[0].message.content)"
]
},
{
"cell_type": "markdown",
"id": "1622d9bb-5c68-4d4e-9ca4-b492c751f898",
"metadata": {},
"source": [
"# NOW the exercise for you\n",
"\n",
"Take the code from day1 and incorporate it here, to build a website summarizer that uses Llama 3.2 running locally instead of OpenAI; use either of the above approaches."
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "0c1f84c4-4cc0-4085-8ea5-871a8ca46a47",
"metadata": {},
"outputs": [],
"source": [
"# imports\n",
"\n",
"import ollama"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "890852ab-2cd4-41dc-b168-6bd1360b967a",
"metadata": {},
"outputs": [],
"source": [
"MODEL = \"llama3.2\""
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "6de38216-6d1c-48c4-877b-86d403f4e0f8",
"metadata": {},
"outputs": [],
"source": [
"# A class to represent a Webpage\n",
"\n",
"# Some websites need you to use proper headers when fetching them:\n",
"headers = {\n",
" \"User-Agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36\"\n",
"}\n",
"\n",
"class Website:\n",
"\n",
" def __init__(self, url):\n",
" \"\"\"\n",
" Create this Website object from the given url using the BeautifulSoup library\n",
" \"\"\"\n",
" self.url = url\n",
" response = requests.get(url, headers=headers)\n",
" soup = BeautifulSoup(response.content, 'html.parser')\n",
" self.title = soup.title.string if soup.title else \"No title found\"\n",
" for irrelevant in soup.body([\"script\", \"style\", \"img\", \"input\"]):\n",
" irrelevant.decompose()\n",
" self.text = soup.body.get_text(separator=\"\\n\", strip=True)"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "9d398f9a-c66e-42b5-91b4-5417944b8408",
"metadata": {},
"outputs": [],
"source": [
"def user_prompt_generator(website) -> str:\n",
" user_prompt = f\"You will act as a website summarizer with knowledge of Web Content Accessibility Guidelines. You will look into the web: {website.title} and \"\n",
" user_prompt += \"break down the relevant information about it in this categories: What is the website about, \\\n",
" to whom the website belongs and what practises should improve to have a better user experience. \\n\\n\"\n",
" user_prompt += website.text\n",
"\n",
" return user_prompt"
]
},
{
"cell_type": "code",
"execution_count": 23,
"id": "156d7c67-b714-4156-9f69-faf0c50aaf13",
"metadata": {},
"outputs": [],
"source": [
"def messages_generator(user_prompt : str) -> list[dict[str, str]]:\n",
" messages = [{\"role\" : \"user\", \"content\" : user_prompt}]\n",
"\n",
" return messages"
]
},
{
"cell_type": "code",
"execution_count": 21,
"id": "f07c4143-6cc5-4d28-846c-a373564e9264",
"metadata": {},
"outputs": [],
"source": [
"def user_request_reader() -> str:\n",
" while True:\n",
" website_url = input(\"Define what website you want to summarize by giving the url: \")\n",
" if website_url.lower().startswith(\"http\"):\n",
" return website_url\n",
" print(\"URL not valid. Please provide a full url.\\n\")"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "94933255-2ca8-40b5-8f74-865d3e781058",
"metadata": {},
"outputs": [],
"source": [
"def summarizer_bot():\n",
" website_url = user_request_reader()\n",
" website = Website(website_url)\n",
" \n",
" user_prompt = user_prompt_generator(website)\n",
" messages = messages_generator(user_prompt)\n",
"\n",
" response = ollama.chat(model=MODEL, messages=messages)\n",
" print(response['message']['content'])"
]
},
{
"cell_type": "code",
"execution_count": 24,
"id": "2d81faa4-25b3-4d5d-8f36-93772e449b5c",
"metadata": {},
"outputs": [
{
"name": "stdin",
"output_type": "stream",
"text": [
"Define what website you want to summarize by giving the url: test.com\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"URL not valid. Please provide a full url.\n",
"\n"
]
},
{
"name": "stdin",
"output_type": "stream",
"text": [
"Define what website you want to summarize by giving the url: https://edwarddonner.com\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"**Summary:**\n",
"\n",
"The website \"Home - Edward Donner\" belongs to Edward Donner, a co-founder and CTO of Nebula.io, an AI startup. The website is about Edward's interests in writing code, experimenting with Large Language Models (LLMs), and DJing, as well as his work in applying AI to help people discover their potential.\n",
"\n",
"**Categories:**\n",
"\n",
"### What is the website about?\n",
"\n",
"The website is primarily about Edward Donner's personal brand, showcasing his expertise in AI and LLMs. It includes information about his work at Nebula.io, which applies AI to talent management. The website also features a \"Connect Four\" arena where LLMs compete against each other, as well as sections for learning more about LLMs and staying up-to-date with Edward's courses and publications.\n",
"\n",
"### To whom does the website belong?\n",
"\n",
"The website belongs to Edward Donner, a co-founder and CTO of Nebula.io. It appears to be a personal website or blog, showcasing his expertise and interests in AI and LLMs.\n",
"\n",
"### Practices to improve for better user experience:\n",
"\n",
"1. **Clearer navigation**: The website's menu is simple but not intuitive. Adding clear categories or sections would help users quickly find the information they're looking for.\n",
"2. **More detailed about section**: The \"About\" section provides a brief overview of Edward's work and interests, but it could be more detailed and comprehensive.\n",
"3. **Improved accessibility**: While the website is likely following general web accessibility guidelines, there are no clear indications of this on the page. Adding alt text to images, providing a clear font size and color scheme, and ensuring sufficient contrast between background and foreground would improve the user experience for people with disabilities.\n",
"4. **Better calls-to-action (CTAs)**: The website could benefit from more prominent CTAs, guiding users towards specific actions such as signing up for courses or following Edward on social media.\n",
"5. **SEO optimization**: The website's content and meta tags appear to be optimized for search engines, but a more thorough SEO analysis would help identify areas for improvement.\n",
"\n",
"Overall, the website provides a clear overview of Edward Donner's interests and expertise in AI and LLMs, but could benefit from some tweaks to improve accessibility, navigation, and CTAs.\n"
]
}
],
"source": [
"# The call\n",
"summarizer_bot()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.13"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@@ -0,0 +1,87 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "245fe5e2-9a3d-42f6-a39a-2a0f8750dd89",
"metadata": {},
"outputs": [],
"source": [
"OLLAMA_API = \"http://localhost:11434/api/chat\"\n",
"HEADERS = {\"Content-Type\": \"application/json\"}\n",
"MODEL = \"llama3.2\""
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c4b598b5-2b8f-4004-88de-1fa03050a11f",
"metadata": {},
"outputs": [],
"source": [
"messages = [\n",
" {\"role\": \"user\", \"content\": \"Write a short summary of advice for a child who is just starting to get interested in tennis.\"}\n",
"]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9e0ffcc6-1489-41d9-9cd3-6656101bff2e",
"metadata": {},
"outputs": [],
"source": [
"payload = {\n",
" \"model\": MODEL,\n",
" \"messages\": messages,\n",
" \"stream\": False\n",
" }"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "33290d04-b7f2-4f36-956b-170685faa78c",
"metadata": {},
"outputs": [],
"source": [
"response = requests.post(OLLAMA_API, json=payload, headers=HEADERS)\n",
"print(response.json()['message']['content'])"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b0dfabb0-dd31-4508-8f72-34482e2bef4a",
"metadata": {},
"outputs": [],
"source": [
"import ollama\n",
"\n",
"response = ollama.chat(model=MODEL, messages=messages)\n",
"print(response['message']['content'])\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.13"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@@ -0,0 +1,338 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "7759922b-12c9-44e0-8ac3-5f2a02b321d7",
"metadata": {},
"outputs": [],
"source": [
"import fitz # PyMuPDF\n",
"import os\n",
"import requests\n",
"import json\n",
"from typing import List\n",
"from dotenv import load_dotenv\n",
"from IPython.display import Markdown, display, update_display\n",
"from openai import OpenAI"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a266273a-05e3-451e-a318-428726cfa39c",
"metadata": {},
"outputs": [],
"source": [
"# Initialize and constants\n",
"\n",
"load_dotenv(override=True)\n",
"api_key = os.getenv('OPENAI_API_KEY')\n",
"\n",
"if api_key and api_key.startswith('sk-proj-') and len(api_key)>10:\n",
" print(\"API key looks good so far\")\n",
"else:\n",
" print(\"There might be a problem with your API key? Please visit the troubleshooting notebook!\")\n",
" \n",
"MODEL = 'gpt-4o-mini'\n",
"openai = OpenAI()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "45566572-dd66-48dc-ab7b-6adbe26eacba",
"metadata": {},
"outputs": [],
"source": [
"exceptions = []"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "effc0e7b-d668-48b3-86d0-dbb5d8fe3d55",
"metadata": {},
"outputs": [],
"source": [
"# Building system prompt\n",
"def get_verse_system_prompt():\n",
" system_prompt = \"You are a spiritual student who classifies the versus of the BhagavadGita according to a given theme.\\n\"\n",
" system_prompt += \"Given a theme, you should pick a verse from any chapter and give it's location in the form of index chapter.verse_number (6.2)\\n\"\n",
" system_prompt += \"You should respond in JSON as in this example:\\n\"\n",
" system_prompt += \"\"\"\n",
" {\"title\": \"Chapter 3, Verse 21 (3.21)\", \"verse\": \"कर्मणा ह्यपि संसिद्धिम्‌\n",
" आस्थिता जनकादय:।\n",
" लोकसंग्रहमेवापि\n",
" सम्पश्यन्कर्तुमर्हसि॥\"}\n",
" \"\"\"\n",
" return system_prompt"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "bbfb1035-b183-4481-9b49-3cc1b12b42e8",
"metadata": {},
"outputs": [],
"source": [
"print(get_verse_system_prompt())"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6acdcd6c-1fc5-4c71-81d0-665e25808e46",
"metadata": {},
"outputs": [],
"source": [
"# Define user prompt\n",
"def get_verse_user_prompt(theme):\n",
" user_prompt = f'''\n",
" Here is the theme : {theme},\n",
" Please find a verse from BhagavadGita excluding {exceptions} for a given theme {theme}\n",
" '''#excluding those results which are already used\n",
" \n",
" user_prompt += \"If the verse is not in the exceptions for a given theme and used for a different theme, you are free to suggest it for a different theme.\"\n",
" return user_prompt"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "72f5c755-ec2d-4545-9a31-0f6b2e5ed4da",
"metadata": {},
"outputs": [],
"source": [
"print(get_verse_user_prompt('motivation'))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "304d432c-7216-4a90-a5d8-db36b193657d",
"metadata": {},
"outputs": [],
"source": [
"#Call openAI to return versus\n",
"def get_verses(theme):\n",
" response = openai.chat.completions.create(\n",
" model=MODEL,\n",
" messages=[\n",
" {\"role\": \"system\", \"content\": get_verse_system_prompt()},\n",
" {\"role\": \"user\", \"content\": get_verse_user_prompt(theme)}\n",
" ],\n",
" response_format={\"type\": \"json_object\"}\n",
" )\n",
" result = response.choices[0].message.content\n",
" result = json.loads(result)\n",
"\n",
" #Remember those results which are suggested now\n",
" combination = (theme, result['title'])\n",
" exceptions.append(combination)\n",
" return result"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b90eeb35-e10e-48ee-ade6-e0594da8c51b",
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"print(get_verses('motivation'))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b5b8925e-52e4-4cb7-9205-51c65ed88fb8",
"metadata": {},
"outputs": [],
"source": [
"# So far we have fetched the new verses relevant to a given theme \n",
"# Lets generate a script for producting youtube video"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "8ff0862b-0310-4174-ad12-64047932dc9e",
"metadata": {},
"outputs": [],
"source": [
"#def function for system prompt\n",
"def get_script_system_prompt(tone, theme, format):\n",
" sys_prompt = 'You are a script writer for a youtube spiritual channel\\n'\n",
" sys_prompt += 'You are given a verse like below: \\n'\n",
" sys_prompt += str(get_verses(theme))\n",
" sys_prompt += '\\n'\n",
" sys_prompt += f'Give me an engaging script in a {tone} tone for a {format} format video for audience like youth seeking purpose, spiritual seekers, indians abroad, scholars and curious minds.'\n",
"\n",
" return sys_prompt"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "47476516-cd2f-4b16-b378-a70617bbe284",
"metadata": {},
"outputs": [],
"source": [
"print(get_script_system_prompt('Motivating','motivation','long'))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e305525b-8dde-4e93-927a-e24531827498",
"metadata": {},
"outputs": [],
"source": [
"# def function for user prompt\n",
"def get_script_user_prompt(format, theme):\n",
" user_prompt = f'Given the verse, help me generate a detailed script suitable for {format} format video.\\n'\n",
" user_prompt += f'Please give me the complete verse, its meaning, a relevant story having a dilemma which the verse solves and the interpretation of the verse with respect to {theme}.\\n'\n",
" user_prompt += 'Let the script give cues about video editing, host actions.'\n",
" user_prompt += 'given the below example, please follow the format:\\n'\n",
" user_prompt += \"\"\"\n",
" [Opening Scene - Soft Instrumental Music Playing]\n",
"\n",
" [Cut to Host in a serene setting, perhaps by a river or in a lush green garden.]\n",
"\n",
" Host: (Smiling at the camera) \"Namaste, dear viewers! Welcome back to our channel, where we explore the depths of spirituality and seek to ignite the flame of wisdom within you. Today, we delve into a profound verse from the Bhagavad Gita that speaks to the very essence of life and identity.\"\n",
"\n",
" [Text On Screen: Chapter 2, Verse 13 (2.13)]\n",
"\n",
" Host: (With a sense of reverence) \"Lets first take a moment to recite this verse together. It goes like this:\n",
"\n",
" देहिनोऽस्मिन्न्यथा देहे कौमारं यौवनं जरा।\n",
" तथादेहान्तरप्राप्तिर्धीरस्तत्र न मुह्यति॥\n",
"\n",
" Now, lets understand the essence of this verse.\"\n",
"\n",
" [Cut to Graphic: Verse Translation with Key Concepts Highlighted]\n",
"\n",
" Host Voiceover: (Calm and engaging tone) \"The meaning of this beautiful verse translates to: 'Just as the body undergoes changes from childhood to old age, similarly, the soul transitions from one body to another. The wise, who understand this, are never bewildered by these changes.'\n",
"\n",
" [Cut back to Host]\n",
"\n",
" Host: (Nodding, creating a connection)\n",
" \"So, why is this verse so important, especially for us as young seekers of purpose? It highlights a profound truth—that our identities are not confined by our physical forms or the stages of life we experience. Instead, we are eternal beings who are constantly evolving.\"\n",
"\n",
" [Scene Transition - Soft Music Playing]\n",
"\n",
" [Cut to a Story Animation - A young man named Arjun in a busy city]\n",
"\n",
" Host (Voiceover): \"Let me share a relatable story. Meet Arjun. Like many of us, he was once full of dreams and aspirations. He excelling in school, pursuing a career in engineering. But as the years passed, he faced a crossroads. As the pressure mounted, he began to question his identity.\n",
"\n",
" (Visuals show Arjun overwhelmed by societal expectations, with people pushing him in different directions.)\n",
"\n",
" He felt distinct phases of life pulling at him: childhood dreams, youthful ambitions, and the looming responsibilities of adulthood. The changing seasons of his life left him confused and wondering if he had lost his true self.\"\n",
"\n",
" [Cut back to Host, empathetic tone]\n",
"\n",
" Host: \"Have you ever felt like Arjun? Its a dilemma we all face, especially in today's fast-paced world where expectations can cloud our true identity. But just like our verse suggests, we should recognize that these changes dont define us. They are simply part of the journey.\"\n",
"\n",
" [Scene Transition - Calm Music Playing while Host meditates]\n",
"\n",
" Host: (Speaking gently) \"Lets take a moment to reflect. When we are sad, does that sadness define us? Or when we achieve success, do we become defined solely by that success? The answer isn't as straightforward as it seems. Heres the catch: our essence is beyond these transient states. Like the body, our identities are fluid.\"\n",
"\n",
" [Cut to Visuals of Nature - flowing rivers, trees shedding leaves, etc.]\n",
"\n",
" Host Voiceover: \"Imagine the endless cycle of nature—the changing seasons, the growth, the decay, and rebirth. Just like the leaves that drop to make way for new growth, our experiences contribute to our spiritual evolution.\"\n",
"\n",
" [Cut back to Host - Inviting and Warm Tone]\n",
"\n",
" Host: \"Just as the wise who understand the transformation of the soul remain unshaken, we, too, can cultivate that wisdom to rise above the chaos of change. Recognize your true essence—beyond the body, the roles, the titles. Understand that your spirit is eternal.\"\n",
"\n",
" [Scene Transition - Soft Inspirational Music Begins]\n",
"\n",
" Host: (Passionately) \"So how can we embody this truth in our daily lives? Heres a small exercise: Each day, take a few moments to meditate on who you really are. Write down what aspects of your identity are tied to transient things. Challenge yourself—what happens when you peel these layers away?\"\n",
"\n",
" [Cut to host with a pad, writing ideas]\n",
"\n",
" [Scene Transition - Editing Cues - Show engaging graphics of identity, layers of a person, etc.]\n",
"\n",
" Host Voiceover: \"Each effort towards understanding and embracing our true self draws us closer to the realization that we are eternal souls, having a human experience. This is the wisdom that can empower you to stand tall against the adversities of life.\"\n",
"\n",
" [Cut back to Host]\n",
"\n",
" Host: (Concluding) \"Thank you for joining me today in this exploration of Chapter 2, Verse 13 of the Bhagavad Gita. Remember, when you feel lost in the complexities of life, return to this teachings and remind yourself that you are not just a body; you are an eternal being on a magnificent journey.\n",
"\n",
" [Closing Scene - Uplifting Music Playing]\n",
"\n",
" Host: \"Dont forget to like, share, and subscribe if you found resonance in this message. And share your thoughts in the comments below. What did you find most challenging in your own journey of self-identity? Lets connect and support each other in our spiritual quests. Until next time, stay enlightened, stay inspired!\"\n",
"\n",
" [End Screen with Subscribe Button and Previous Video Suggestions]\n",
"\n",
" [End of Script]\n",
" \"\"\"\n",
" \n",
" return user_prompt"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c4b29cb9-d8d1-413a-8152-4250e2430a42",
"metadata": {},
"outputs": [],
"source": [
"print(get_script_user_prompt('long','motivation'))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1dfa60ce-9e88-4f7d-8e60-ac37a0aafc15",
"metadata": {},
"outputs": [],
"source": [
"def create_script(tone, theme, format):\n",
" response = openai.chat.completions.create(\n",
" model=MODEL,\n",
" messages=[\n",
" {\"role\": \"system\", \"content\": get_script_system_prompt(tone,theme,format)},\n",
" {\"role\": \"user\", \"content\": get_script_user_prompt(format,theme)}\n",
" ],\n",
" )\n",
" result = response.choices[0].message.content\n",
" display(Markdown(result))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ec86c436-42ae-4313-b12f-4fad42ab2227",
"metadata": {},
"outputs": [],
"source": [
"create_script('motivating','self-identity','long')"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.13"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@@ -0,0 +1,171 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "fe12c203-e6a6-452c-a655-afb8a03a4ff5",
"metadata": {},
"source": [
"# End of week 1 exercise\n",
"\n",
"To demonstrate your familiarity with OpenAI API, and also Ollama, build a tool that takes a technical question, \n",
"and responds with an explanation. This is a tool that you will be able to use yourself during the course!"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c1070317-3ed9-4659-abe3-828943230e03",
"metadata": {},
"outputs": [],
"source": [
"# imports\n",
"import os\n",
"import requests\n",
"import json\n",
"from typing import List\n",
"from dotenv import load_dotenv\n",
"from IPython.display import Markdown, display, update_display\n",
"from openai import OpenAI\n",
"import ollama"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4a456906-915a-4bfd-bb9d-57e505c5093f",
"metadata": {},
"outputs": [],
"source": [
"# constants\n",
"\n",
"MODEL_GPT = 'gpt-4o-mini'\n",
"MODEL_LLAMA = 'llama3.2'"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a8d7923c-5f28-4c30-8556-342d7c8497c1",
"metadata": {},
"outputs": [],
"source": [
"# set up environment\n",
"load_dotenv(override=True)\n",
"api_key = os.getenv('OPENAI_API_KEY')\n",
"\n",
"if api_key and api_key.startswith('sk-proj-') and len(api_key)>10:\n",
" print(\"API key looks good so far\")\n",
"else:\n",
" print(\"There might be a problem with your API key? Please visit the troubleshooting notebook!\")\n",
" \n",
"MODEL = 'gpt-4o-mini'\n",
"openai = OpenAI()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3f0d0137-52b0-47a8-81a8-11a90a010798",
"metadata": {},
"outputs": [],
"source": [
"# here is the question; type over this to ask something new\n",
"\n",
"question = \"\"\"\n",
"Please explain what this code does and why:\n",
"yield from {book.get(\"author\") for book in books if book.get(\"author\")}\n",
"\"\"\""
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "80d3b735-02a2-4d33-8773-05fc3d5934ef",
"metadata": {},
"outputs": [],
"source": [
"system_prompt=\"You are a helpful technical tutor who answers questions about python code, software engineering, data science and LLMs\"\n",
"user_prompt=\"Please give a detailed explanation to the following question: \" + question"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "85fe8d74-762e-4fed-b326-c9a17de9d485",
"metadata": {},
"outputs": [],
"source": [
"# messages\n",
"\n",
"messages=[\n",
" {\"role\":\"system\",\"content\":system_prompt},\n",
" {\"role\":\"user\",\"content\":user_prompt}\n",
"]"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "60ce7000-a4a5-4cce-a261-e75ef45063b4",
"metadata": {},
"outputs": [],
"source": [
"# Get gpt-4o-mini to answer, with streaming\n",
"stream = openai.chat.completions.create(\n",
" model=MODEL_GPT,\n",
" messages=messages,\n",
" stream=True\n",
" )\n",
"response = \"\"\n",
"display_handle = display(Markdown(\"\"), display_id=True)\n",
"for chunk in stream:\n",
" response += chunk.choices[0].delta.content or ''\n",
" response = response.replace(\"```\",\"\").replace(\"markdown\", \"\")\n",
" update_display(Markdown(response), display_id=display_handle.display_id)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "8f7c8ea8-4082-4ad0-8751-3301adcf6538",
"metadata": {},
"outputs": [],
"source": [
"# Get Llama 3.2 to answer\n",
"response_llama = ollama.chat(model=MODEL_LLAMA, messages=messages)\n",
"result = response_llama['message']['content']\n",
"\n",
"display(Markdown(result))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0faaa38e-82de-473c-a5f4-1b378b08469f",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.13"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@@ -0,0 +1,191 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "75e66023-eccf-46a9-8b70-7b21ede16ddd",
"metadata": {},
"source": [
"# End of week 1 exercise\n",
"\n",
"To demonstrate your familiarity with OpenAI API, and also Ollama, build a tool that takes a technical question, \n",
"and responds with an explanation. This is a tool that you will be able to use yourself during the course!"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "72d21373-edbd-4432-a29d-db8e6c9c5808",
"metadata": {},
"outputs": [],
"source": [
"# imports\n",
"\n",
"import os\n",
"from dotenv import load_dotenv\n",
"from IPython.display import Markdown, display, update_display\n",
"from openai import OpenAI\n",
"import ollama"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d4e4c15b-7ae8-43e9-839d-7cc49345be5a",
"metadata": {},
"outputs": [],
"source": [
"!ollama pull llama3.2"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7fb44166-1c65-42fc-9950-1960bc3cc432",
"metadata": {},
"outputs": [],
"source": [
"# constants\n",
"\n",
"MODEL_GPT = 'gpt-4o-mini'\n",
"MODEL_LLAMA = 'llama3.2'"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "58f5f1e1-5296-4631-9698-8645d4621a0c",
"metadata": {},
"outputs": [],
"source": [
"# set up environment\n",
"\n",
"# Get the openai key\n",
"\n",
"load_dotenv(override=True)\n",
"openai_api_key = os.getenv('OPENAI_API_KEY')\n",
"\n",
"if openai_api_key and openai_api_key.startswith('sk-proj-') and len(openai_api_key)>10:\n",
" print(\"API key looks good so far\")\n",
"else:\n",
" print(\"There might be a problem with your API key? Please visit the troubleshooting notebook!\")\n",
"\n",
"openai = OpenAI()\n",
"# Get the ollama key using the llama model\n",
"\n",
"ollama_via_openai = OpenAI(base_url='http://localhost:11434/v1', api_key='ollama')\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "12f07b33-76b9-42fa-9962-21f2a5796126",
"metadata": {},
"outputs": [],
"source": [
"system_prompt = \"You are a knowledgeable technical instructor who helps students understand \\\n",
"complex concepts across a wide range of technical topics. Your expertise includes artificial]\\\n",
"intelligence, machine learning, large language models (LLMs), and programming in languages \\\n",
"such as Python, JavaScript, Java, and more. You also provide in-depth support for \\\n",
"AI engineering questions and other advanced technical subjects.\""
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "330abeb7-7db2-4f23-9d19-dd698058a400",
"metadata": {},
"outputs": [],
"source": [
"# here is the question; type over this to ask something new\n",
"\n",
"question = \"\"\"\n",
"Please explain what this code does and why:\n",
"yield from {book.get(\"author\") for book in books if book.get(\"author\")}\n",
"\"\"\""
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "bd11ad48-91ec-4cdf-9c57-99a0451e7a2f",
"metadata": {},
"outputs": [],
"source": [
"# Get gpt-4o-mini to answer, with streaming\n",
"stream_GPT = openai.chat.completions.create(\n",
" model=MODEL_GPT,\n",
" messages=[\n",
" {\"role\": \"system\", \"content\": system_prompt},\n",
" {\"role\": \"user\", \"content\": question}\n",
" ],\n",
" stream = True\n",
" )\n",
"response_GPT = \"\"\n",
"display_handle = display(Markdown(\"\"), display_id=True)\n",
"for chunk in stream_GPT:\n",
" response_GPT += chunk.choices[0].delta.content or ''\n",
" response_GPT = response_GPT.replace(\"```\",\"\").replace(\"markdown\", \"\")\n",
" update_display(Markdown(response_GPT), display_id=display_handle.display_id)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "dd2527ae-0d75-4f15-a45f-92075e3059d6",
"metadata": {},
"outputs": [],
"source": [
"# Get Llama 3.2 to answer\n",
"\n",
"response_llama = ollama_via_openai.chat.completions.create(\n",
" model=MODEL_LLAMA,\n",
" messages=[\n",
" {\"role\": \"system\", \"content\": system_prompt},\n",
" {\"role\": \"user\", \"content\": question}\n",
" ],\n",
" )\n",
"result = response_llama.choices[0].message.content\n",
"\n",
"display(Markdown(result))\n",
"\n",
"# import ollama\n",
"\n",
"# response = ollama.chat(model=MODEL_LLAMA, messages=[\n",
"# {\"role\": \"system\", \"content\": system_prompt},\n",
"# {\"role\": \"user\", \"content\": question}\n",
"# ])\n",
"# print(response['message']['content'])"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c2747739-ba64-4067-902f-c1acc0dbdaca",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.13"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@@ -0,0 +1,366 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "53b9681c-896a-4e5d-b62c-44c90612e67c",
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"import requests\n",
"import json\n",
"from typing import List\n",
"from dotenv import load_dotenv\n",
"from bs4 import BeautifulSoup\n",
"from IPython.display import Markdown, display, update_display\n",
"from openai import OpenAI"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3c6f1133-5c17-4ca7-819c-f64cc48212ec",
"metadata": {},
"outputs": [],
"source": [
"# Initialize constants and get api_key\n",
"\n",
"load_dotenv(override=True)\n",
"api_key = os.getenv('OPENAI_API_KEY')\n",
"\n",
"#Check if api_key is correct\n",
"if api_key and api_key.startswith('sk-proj-') and len(api_key)>10:\n",
" print(\"API key looks good so far\")\n",
"else:\n",
" print(\"There might be a problem with your API key? Please visit the troubleshooting notebook!\")\n",
" \n",
"MODEL = 'gpt-4o-mini'\n",
"openai = OpenAI()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4cdb0a59-b5e1-4df5-a17e-8c36c80695b4",
"metadata": {},
"outputs": [],
"source": [
"# A class to represent a Webpage\n",
"\n",
"# Some websites need you to use proper headers when fetching them:\n",
"headers = {\n",
" \"User-Agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36\"\n",
"}\n",
"\n",
"class Website:\n",
" \"\"\"\n",
" A utility class to represent a Website that we have scraped, now with links\n",
" \"\"\"\n",
"\n",
" def __init__(self, url):\n",
" self.url = url\n",
" response = requests.get(url, headers=headers)\n",
" self.body = response.content\n",
" soup = BeautifulSoup(self.body, 'html.parser')\n",
" self.title = soup.title.string if soup.title else \"No title found\"\n",
" if soup.body:\n",
" for irrelevant in soup.body([\"script\", \"style\", \"img\", \"input\"]):\n",
" irrelevant.decompose()\n",
" self.text = soup.body.get_text(separator=\"\\n\", strip=True)\n",
" else:\n",
" self.text = \"\"\n",
" links = [link.get('href') for link in soup.find_all('a')]\n",
" self.links = [link for link in links if link]\n",
"\n",
" def get_contents(self):\n",
" return f\"Webpage Title:\\n{self.title}\\nWebpage Contents:\\n{self.text}\\n\\n\""
]
},
{
"cell_type": "markdown",
"id": "50d4cffe-da7a-4cab-afea-d061a1a608ac",
"metadata": {},
"source": [
"Step 1: Find relevant links to the website in order to create the brochure (Use Multi-shot prompting)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b43b4c64-bc6a-41ca-bdb9-aa714e4e794e",
"metadata": {},
"outputs": [],
"source": [
"link_system_prompt = \"You are provided with a list of links found on a webpage like ['https://edwarddonner.com/', https://www.udemy.com/course/llm-engineering-master-ai-and-large-language-models/?referralCode=35EB41EBB11DD247CF54&couponCode=KEEPLEARNING] or ['https://huggingface.co/', https://huggingface.co/models] \\\n",
"You are able to decide which of the links would be most relevant to include in a brochure about the company, \\\n",
"such as links to an About page, or a News page, or a Home page, or a Company page, or Careers/Jobs pages.\\n\"\n",
"link_system_prompt += \"You should respond in JSON as in these example:\"\n",
"link_system_prompt += \"\"\"\n",
"{\n",
" \"links\": [\n",
" {\"type\": \"about page\", \"url\": \"https://full.url/goes/here/about\"},\n",
" {\"type\": \"careers page\", \"url\": \"https://another.full.url/careers\"}\n",
" ]\n",
"}\n",
"\n",
"{\n",
" \"links\": [\n",
" {\"type\": \"home page\", \"url\": \"https://full.url/goes/here/about\"},\n",
" {\"type\": \"news page\", \"url\": \"https://another.full.url/careers\"}\n",
" ]\n",
"}\n",
"\"\"\""
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "15d2870c-67ab-4aa2-89f5-04b608a9c810",
"metadata": {},
"outputs": [],
"source": [
"def get_links_user_prompt(website):\n",
" user_prompt = f\"Here is the list of links on the website of {website.url} - \"\n",
" user_prompt += \"please decide which of these are relevant web links for a brochure about the company, respond with the full https URL in JSON format. \\\n",
"Do not include Terms of Service, Privacy, email links.\\n\"\n",
" user_prompt += \"Links (some might be relative links):\\n\"\n",
" user_prompt += \"\\n\".join(website.links)\n",
" return user_prompt"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e255be42-5e71-47ca-9275-c0cf22beeb00",
"metadata": {},
"outputs": [],
"source": [
"def get_links(url):\n",
" website = Website(url)\n",
" response = openai.chat.completions.create(\n",
" model=MODEL,\n",
" messages=[\n",
" {\"role\": \"system\", \"content\": link_system_prompt},\n",
" {\"role\": \"user\", \"content\": get_links_user_prompt(website)}\n",
" ],\n",
" response_format={\"type\": \"json_object\"}\n",
" )\n",
" result = response.choices[0].message.content\n",
" return json.loads(result)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "818b6e50-c403-42a1-8ee4-7606eaf0006f",
"metadata": {},
"outputs": [],
"source": [
"get_links('https://huggingface.co/')"
]
},
{
"cell_type": "markdown",
"id": "030ceb9b-ef71-41fd-9f23-92cb6e1d137e",
"metadata": {},
"source": [
"Step 2: Generate the brochure using the relevant links we got from OpenAI's selection"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a703230e-d57b-43a5-bdd0-e25fc2ec2e3b",
"metadata": {},
"outputs": [],
"source": [
"def get_all_details(url):\n",
" result = \"Landing page:\\n\"\n",
" result += Website(url).get_contents()\n",
" links = get_links(url)\n",
" print(\"Found links:\", links)\n",
" for link in links[\"links\"]:\n",
" result += f\"\\n\\n{link['type']}\\n\"\n",
" result += Website(link[\"url\"]).get_contents()\n",
" return result"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "74d19852-f817-4fee-a95c-35ca7a83234f",
"metadata": {},
"outputs": [],
"source": [
"system_prompt = \"\"\"You are an assistant that analyzes the contents of several relevant pages from a company website \\\n",
"and creates a short brochure about the company for prospective customers, investors and recruits. Respond in markdown.\\\n",
"Include details of company culture, customers and careers/jobs if you have the information. \\\n",
"Example 1: \\\n",
"Relevant pages: \\\n",
"- https://example.com/about \\\n",
"- https://example.com/careers \\\n",
"- https://example.com/news \\\n",
"\n",
"Brochure: \\\n",
"# About ExampleCorp \\\n",
"ExampleCorp is a global leader in AI-driven logistics optimization. Founded in 2015, the company serves clients in over 30 countries... \\\n",
"\n",
"--- \\\n",
"\n",
"Example 2: \\\n",
"Relevant pages: \\\n",
"- https://techstart.io/home \\\n",
"- https://techstart.io/jobs \\\n",
"- https://techstart.io/customers \\\n",
"\n",
"Brochure: \\\n",
"# Welcome to TechStart \\\n",
"TechStart builds tools that power the future of software development. With a team-first culture and customers like Stripe, Atlassian... \\\n",
"\n",
"--- \\\n",
"\n",
"\"\"\"\n",
"\n",
"# Or uncomment the lines below for a more humorous brochure - this demonstrates how easy it is to incorporate 'tone':\n",
"\n",
"# system_prompt = \"You are an assistant that analyzes the contents of several relevant pages from a company website \\\n",
"# and creates a short humorous, entertaining, jokey brochure about the company for prospective customers, investors and recruits. Respond in markdown.\\\n",
"# Include details of company culture, customers and careers/jobs if you have the information.\"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a2f19085-0d03-4386-b390-a38014ca6590",
"metadata": {},
"outputs": [],
"source": [
"def get_brochure_user_prompt(company_name, url):\n",
" user_prompt = f\"You are looking at a company called: {company_name}\\n\"\n",
" user_prompt += f\"Here are the contents of its landing page and other relevant pages; use this information to build a short brochure of the company in markdown.\\n\"\n",
" user_prompt += get_all_details(url)\n",
" user_prompt = user_prompt[:5_000] # Truncate if more than 5,000 characters\n",
" return user_prompt"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0ddbdea7-cf80-48d4-8bce-a11bd1a32d47",
"metadata": {},
"outputs": [],
"source": [
"def create_brochure(company_name, url):\n",
" response = openai.chat.completions.create(\n",
" model=MODEL,\n",
" messages=[\n",
" {\"role\": \"system\", \"content\": system_prompt},\n",
" {\"role\": \"user\", \"content\": get_brochure_user_prompt(company_name, url)}\n",
" ],\n",
" )\n",
" result = response.choices[0].message.content\n",
" # display(Markdown(result))\n",
" return result"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "023c1ba0-7f5a-48ac-9a98-dd184432a758",
"metadata": {},
"outputs": [],
"source": [
"create_brochure(\"HuggingFace\", \"https://huggingface.co\")"
]
},
{
"cell_type": "markdown",
"id": "187651f6-d42d-405a-abed-732486161359",
"metadata": {},
"source": [
"Step 3: Translate to French"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7734915d-d38f-40ad-8335-0df39c91f6d8",
"metadata": {},
"outputs": [],
"source": [
"system_prompt = \"\"\"You are a translator that translates the English language to the French language \\\n",
"professionally. All you do, is first show the original version in english and then show the translate version below it in French.\\\n",
"Respond in Markdown\"\"\""
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "29a1b40c-9040-4a3d-808b-0ca906d5cfc8",
"metadata": {},
"outputs": [],
"source": [
"def get_user_translation_prompt(company_name, url):\n",
" user_prompt=\"You are to translate the following brochure from the english to the french \\\n",
" language and going to display it with the English language brochure version first and then\\\n",
" the French language brochure version, don't make any changes to it, just a translation, the \\\n",
" following is the brochure:\"\n",
" user_prompt+=create_brochure(company_name, url)\n",
" return user_prompt"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a6e45b1f-3fa6-4db8-9f73-8339265502a7",
"metadata": {},
"outputs": [],
"source": [
"def translate_brochure(company_name, url):\n",
" response = openai.chat.completions.create(\n",
" model=MODEL,\n",
" messages=[\n",
" {\"role\": \"system\", \"content\": system_prompt},\n",
" {\"role\": \"user\", \"content\": get_user_translation_prompt(company_name, url)}\n",
" ],\n",
" )\n",
" result = response.choices[0].message.content\n",
" display(Markdown(result))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f71c2496-76ea-4f25-9939-98ebd37cb6a6",
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"translate_brochure(\"HuggingFace\", \"https://huggingface.co\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.13"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@@ -0,0 +1,214 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "f97c7598-f571-4ea1-838c-e9158f729c3e",
"metadata": {},
"outputs": [],
"source": [
"import ollama\n",
"import base64\n",
"import os"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9fc1393c-f0b1-4982-94a2-bfd502e85b23",
"metadata": {},
"outputs": [],
"source": [
"def encode_image(image_path):\n",
" with open(image_path, 'rb') as f:\n",
" return base64.b64encode(f.read()).decode('utf-8')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "53cca1fa-6db2-4fe4-8990-ffd98423964a",
"metadata": {},
"outputs": [],
"source": [
"# image_path = r\"C:\\Users\\LAKSHYA\\OneDrive\\Pictures\\Camera Roll\\WIN_20250614_02_46_47_Pro.jpg\"\n",
"# image_base64 = encode_image(image_path)\n",
"# print(image_base64[:100]) "
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "71146ccf-25af-48d3-8068-ee3c9008cebf",
"metadata": {},
"outputs": [],
"source": [
"image_list = []"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6f8801a8-0c30-4199-a334-587096e6edeb",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "ee3c5d82-e530-40f5-901a-681421f21d1e",
"metadata": {},
"outputs": [],
"source": [
"def put_image():\n",
" global image_list\n",
" user_input_image = input(\"Enter image path or press enter to skip: \").strip()\n",
" \n",
" if not user_input_image:\n",
" print(\"No image inserted\")\n",
" return image_list\n",
"\n",
" image_path = os.path.normpath(user_input_image)\n",
" \n",
" if not os.path.exists(image_path):\n",
" print(\"Image path not found! Try again or enter to leave blank\")\n",
" return put_image() # Continue to allow more inputs\n",
" \n",
"\n",
"\n",
"\n",
" \n",
" image_base64 = encode_image(image_path)\n",
" image_list.append(image_base64)\n",
" \n",
" # Detect file extension for MIME type\n",
" # ext = os.path.splitext(image_path)[-1].lower()\n",
" # mime_type = 'image/jpeg' if ext in ['.jpg', '.jpeg'] else 'image/png' # Extend if needed\n",
"\n",
"\n",
" return image_list\n",
" \n",
" # return f\"data:{mime_type};base64,{image_base64[:100]}\"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "032f1abb-ca6c-4f03-bda1-1a0a62f2ec43",
"metadata": {},
"outputs": [],
"source": [
"prompt= (\"System prompt: (You are a compassionate and intelligent visual assistant designed to help people who are blind or visually impaired. \"\n",
" \"Your job is to look at an image and describe it in a way that helps the user understand the scene clearly. \"\n",
" \"Use simple, descriptive language and avoid technical terms. Describe what is happening in the image, people's body language, clothing, facial expressions, objects, and surroundings. \"\n",
" \"Be vivid and precise, as if you are painting a picture with words. \"\n",
" \"Also, take into account any personal instructions or questions provided by the user—such as describing a specific person, activity, or object. \"\n",
" \"If the user includes a specific prompt, prioritize that in your description.)\")\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "29494db0-4770-4689-9904-8eebc4390e7c",
"metadata": {},
"outputs": [],
"source": [
"def put_prompt():\n",
" global prompt\n",
" user_input = input(\"Put new prompt: \")\n",
" if not user_input:\n",
" print(\"please enter a prompt\")\n",
" return put_prompt()\n",
" prompt += \"\\nUser: \" + user_input\n",
" return prompt\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d286369c-e6ef-4a20-a3a8-3563af28940a",
"metadata": {},
"outputs": [],
"source": [
"def image_description():\n",
" global prompt\n",
"\n",
" put_image()\n",
" if not image_list: \n",
" return \"No images available. Skipping...\"\n",
"\n",
" user_prompt = put_prompt()\n",
" full_answer = \"\"\n",
"\n",
" for chunk in ollama.generate(\n",
" model='llava:7b-v1.6',\n",
" prompt=user_prompt,\n",
" images=image_list,\n",
" stream=True\n",
" ):\n",
" content = chunk.get(\"response\", \"\")\n",
" print(\"\\n\\n Final Answer:\",content, end=\"\", flush=True) # Live stream to console\n",
" full_answer += content\n",
"\n",
" prompt += \"\\nUser: \" + user_prompt + \"\\nAssistant: \" + full_answer\n",
" return full_answer\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "cbda35a3-45ed-4509-ab41-6827eacd922c",
"metadata": {},
"outputs": [],
"source": [
"def call_llava():\n",
" image_list.clear()\n",
" for i in range(5):\n",
" print(f\"\\n Iteration {i+1}\")\n",
" answer = image_description()\n",
" print(\"\\n\\n Final Answer:\", answer)\n",
" \n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "15518865-6c59-4029-bc2d-42d313eb78bc",
"metadata": {},
"outputs": [],
"source": [
"call_llava()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c77bd493-f893-402e-b4e3-64854e9d2e19",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.13"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@@ -0,0 +1,484 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "d15d8294-3328-4e07-ad16-8a03e9bbfdb9",
"metadata": {},
"source": [
"# How to run a cell\n",
"\n",
"Press `Shift` + `Return` to run a Cell.\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4e2a9393-7767-488e-a8bf-27c12dca35bd",
"metadata": {},
"outputs": [],
"source": [
"# imports\n",
"\n",
"import os, requests, time\n",
"from dotenv import load_dotenv\n",
"from bs4 import BeautifulSoup\n",
"from IPython.display import Markdown, display\n",
"from openai import OpenAI\n",
"\n",
"# Load environment variables in a file called .env\n",
"load_dotenv(override=True)\n",
"api_key = os.getenv('OPENAI_API_KEY')\n",
"\n",
"# Check the key\n",
"if not api_key:\n",
" print(\"No API key was found\")\n",
"else:\n",
" print(\"API key found and looks good so far!\")\n",
"\n",
"# Instantiate an OpenAI object\n",
"openai = OpenAI()"
]
},
{
"cell_type": "markdown",
"id": "442fc84b-0815-4f40-99ab-d9a5da6bda91",
"metadata": {},
"source": [
"# Make a test call to a Frontier model (Open AI) to get started:"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a58394bf-1e45-46af-9bfd-01e24da6f49a",
"metadata": {},
"outputs": [],
"source": [
"message = \"Hello, GPT! Holla back to this space probe!\"\n",
"response = openai.chat.completions.create(model=\"gpt-4o-mini\", messages=[{\"role\":\"user\", \"content\":message}])\n",
"print(response.choices[0].message.content)"
]
},
{
"cell_type": "markdown",
"id": "2aa190e5-cb31-456a-96cc-db109919cd78",
"metadata": {},
"source": [
"## Summarization project"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c5e793b2-6775-426a-a139-4848291d0463",
"metadata": {},
"outputs": [],
"source": [
"# Some websites need proper headers when fetching them:\n",
"headers = {\n",
" \"User-Agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36\"\n",
"}\n",
"\n",
"\"\"\"\n",
"A class to represent a Webpage\n",
"\"\"\"\n",
"class Website:\n",
"\n",
" def __init__(self, url):\n",
" \"\"\"\n",
" Create this Website object from the given url using the BeautifulSoup library\n",
" \"\"\"\n",
" self.url = url\n",
" response = requests.get(url, headers=headers)\n",
" soup = BeautifulSoup(response.content, 'html.parser')\n",
" self.title = soup.title.string if soup.title else \"No title found\"\n",
" for irrelevant in soup.body([\"script\", \"style\", \"img\", \"input\"]):\n",
" irrelevant.decompose()\n",
" self.text = soup.body.get_text(separator=\"\\n\", strip=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "2ef960cf-6dc2-4cda-afb3-b38be12f4c97",
"metadata": {},
"outputs": [],
"source": [
"# Summarize website content\n",
"website = Website(\"https://rwothoromo.wordpress.com/\")\n",
"# print(eli.title, \"\\n\", eli.text)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "abdb8417-c5dc-44bc-9bee-2e059d162699",
"metadata": {},
"outputs": [],
"source": [
"# A system prompt tells a model like GPT4o what task they are performing and what tone they should use\n",
"# A user prompt is the conversation starter that they should reply to\n",
"\n",
"system_prompt = \"You are an assistant that analyzes the contents of a given website, \\\n",
"and returns a brief summary, ignoring text that might be navigation-related. \\\n",
"Respond in markdown.\""
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f0275b1b-7cfe-4f9d-abfa-7650d378da0c",
"metadata": {},
"outputs": [],
"source": [
"# A function that writes a User Prompt that asks for summaries of websites:\n",
"\n",
"def user_prompt_for(website):\n",
" user_prompt = f\"You are looking at a website titled {website.title}\"\n",
" user_prompt += \"\\nThe contents of this website is as follows; \\\n",
"please provide a short summary of this website in markdown. \\\n",
"If it includes news or announcements, then summarize these too.\\n\\n\"\n",
" user_prompt += website.text\n",
" return user_prompt"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "26448ec4-5c00-4204-baec-7df91d11ff2e",
"metadata": {},
"outputs": [],
"source": [
"print(user_prompt_for(website))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f25dcd35-0cd0-4235-9f64-ac37ed9eaaa5",
"metadata": {},
"outputs": [],
"source": [
"# The API from OpenAI expects to receive messages in a particular structure. Many of the other APIs share this structure:\n",
"messages = [\n",
" {\"role\": \"system\", \"content\": \"You are a snarky assistant\"}, # system message\n",
" {\"role\": \"user\", \"content\": \"What is 2 + 2?\"}, # user message\n",
"]\n",
"response = openai.chat.completions.create(model=\"gpt-4o-mini\", messages=messages)\n",
"print(response.choices[0].message.content)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0134dfa4-8299-48b5-b444-f2a8c3403c88",
"metadata": {},
"outputs": [],
"source": [
"# To build useful messages for GPT-4o-mini\n",
"\n",
"def messages_for(website):\n",
" return [\n",
" {\"role\": \"system\", \"content\": system_prompt},\n",
" {\"role\": \"user\", \"content\": user_prompt_for(website)}\n",
" ]\n",
"\n",
"messages_for(website)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "905b9919-aba7-45b5-ae65-81b3d1d78e34",
"metadata": {},
"outputs": [],
"source": [
"# Call the OpenAI API.\n",
"\n",
"url = \"https://rwothoromo.wordpress.com/\"\n",
"website = Website(url)\n",
"\n",
"def summarize(website):\n",
" response = openai.chat.completions.create(\n",
" model = \"gpt-4o-mini\",\n",
" messages = messages_for(website)\n",
" )\n",
" return response.choices[0].message.content"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "05e38d41-dfa4-4b20-9c96-c46ea75d9fb5",
"metadata": {},
"outputs": [],
"source": [
"summarize(website)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3d926d59-450e-4609-92ba-2d6f244f1342",
"metadata": {},
"outputs": [],
"source": [
"# A function to display this nicely in the Jupyter output, using markdown\n",
"\n",
"summary = summarize(website)\n",
"def display_summary(summary):\n",
" display(Markdown(summary))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3018853a-445f-41ff-9560-d925d1774b2f",
"metadata": {},
"outputs": [],
"source": [
"display_summary(summary)\n",
"# display_summary(summarize(Website(\"https://edwarddonner.com\")))\n",
"# display_summary(summarize(Website(\"https://cnn.com\")))\n",
"# display_summary(summarize(Website(\"https://anthropic.com\")))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5a904323-acd9-4c8e-9a17-70df76184590",
"metadata": {},
"outputs": [],
"source": [
"# Websites protected with CloudFront (and similar) or with JavaScript need a Selenium or Playwright implementation. They return 403\n",
"\n",
"# display_summary(summarize(Website(\"https://openai.com\")))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "139ad985",
"metadata": {},
"outputs": [],
"source": [
"# To generate the above summary, use selenium\n",
"\n",
"from selenium import webdriver\n",
"from selenium.webdriver.chrome.service import Service\n",
"from selenium.webdriver.common.by import By\n",
"from selenium.webdriver.support.ui import WebDriverWait\n",
"from selenium.webdriver.support import expected_conditions as EC\n",
"\n",
"class WebsiteSelenium:\n",
" def __init__(self, url):\n",
" self.url = url\n",
" self.title = \"No title found\"\n",
" self.text = \"\"\n",
"\n",
" # Configure Chrome options (headless mode is recommended for server environments)\n",
" chrome_options = webdriver.ChromeOptions()\n",
" chrome_options.add_argument(\"--headless\") # Run Chrome in headless mode (without a UI)\n",
" chrome_options.add_argument(\"--no-sandbox\") # Required for running as root in some environments\n",
" chrome_options.add_argument(\"--disable-dev-shm-usage\") # Overcomes limited resource problems\n",
"\n",
" # Path to your WebDriver executable (e.g., chromedriver)\n",
" # Make sure to replace this with the actual path to your chromedriver\n",
" # You might need to download it from: https://chromedriver.chromium.org/downloads and place it in a drivers dir\n",
" service = Service('./drivers/chromedriver-mac-x64/chromedriver')\n",
"\n",
" driver = None\n",
" try:\n",
" driver = webdriver.Chrome(service=service, options=chrome_options)\n",
" driver.get(url)\n",
"\n",
" # Wait for the page to load and dynamic content to render\n",
" # You might need to adjust the wait condition based on the website\n",
" WebDriverWait(driver, 10).until(\n",
" EC.presence_of_element_located((By.TAG_NAME, \"body\"))\n",
" )\n",
" time.sleep(3) # Give more time for JavaScript to execute\n",
"\n",
" # Get the page source after dynamic content has loaded\n",
" soup = BeautifulSoup(driver.page_source, 'html.parser')\n",
"\n",
" self.title = soup.title.string if soup.title else \"No title found\"\n",
" for irrelevant in soup.body([\"script\", \"style\", \"img\", \"input\"]):\n",
" irrelevant.decompose()\n",
" self.text = soup.body.get_text(separator=\"\\n\", strip=True)\n",
"\n",
" except Exception as e:\n",
" print(f\"Error accessing {url} with Selenium: {e}\")\n",
" finally:\n",
" if driver:\n",
" driver.quit() # Always close the browser\n",
"\n",
"display_summary(summarize(WebsiteSelenium(\"https://openai.com\")))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "130d4572",
"metadata": {},
"outputs": [],
"source": [
"import asyncio\n",
"from playwright.async_api import async_playwright\n",
"import nest_asyncio\n",
"\n",
"# Apply nest_asyncio to allow asyncio.run in Jupyter\n",
"nest_asyncio.apply()\n",
"\n",
"class WebsitePlaywright:\n",
" def __init__(self, url):\n",
" self.url = url\n",
" self.title = \"No title found\"\n",
" self.text = \"\"\n",
" asyncio.run(self._fetch_content())\n",
"\n",
" async def _fetch_content(self):\n",
" async with async_playwright() as p:\n",
" browser = None\n",
" try:\n",
" browser = await p.chromium.launch(headless=True)\n",
" page = await browser.new_page()\n",
"\n",
" # Increase timeout for navigation and other operations\n",
" await page.goto(self.url, timeout=60000) # Wait up to 60 seconds for navigation\n",
" print(f\"Accessing {self.url} with Playwright - goto()\")\n",
"\n",
" # You might need to adjust or add more specific waits\n",
" await page.wait_for_load_state('domcontentloaded', timeout=60000) # Wait for basic HTML\n",
" # await page.wait_for_load_state('networkidle', timeout=60000) # Wait for network activity to settle\n",
" await page.wait_for_selector('div.duration-short', timeout=60000) # instead of networkidle\n",
" await page.wait_for_selector('body', timeout=60000) # Wait for the body to be present\n",
" await asyncio.sleep(5) # Give a bit more time for final rendering\n",
"\n",
" content = await page.content()\n",
" soup = BeautifulSoup(content, 'html.parser')\n",
"\n",
" self.title = soup.title.string if soup.title else \"No title found\"\n",
" for irrelevant in soup.body([\"script\", \"style\", \"img\", \"input\"]):\n",
" irrelevant.decompose()\n",
" self.text = soup.body.get_text(separator=\"\\n\", strip=True)\n",
" print(f\"Accessed {self.url} with Playwright\")\n",
"\n",
" except Exception as e:\n",
" print(f\"Error accessing {self.url} with Playwright: {e}\")\n",
" finally:\n",
" if browser:\n",
" await browser.close()\n",
"\n",
"display_summary(summarize(WebsitePlaywright(\"https://openai.com/\")))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "00743dac-0e70-45b7-879a-d7293a6f68a6",
"metadata": {},
"outputs": [],
"source": [
"# Step 1: Create your prompts\n",
"\n",
"system_prompt = \"You are a professional assistant. Review this conversation and provide a comprehensive summary. Also, suggest how much better the converation could have gone:\"\n",
"user_prompt = \"\"\"\n",
"\n",
"Dear Email Contact,\n",
"\n",
"I hope this message finds you well.\n",
"I would like to share that I have proficiency in front-end design tools, particularly Figma, react and Angular. At this stage, I am keenly interested in finding opportunities to apply these skills professionally.\n",
"\n",
"If you are aware of any companies, projects, or platforms seeking enterprise in front-end design, I would be grateful for any advice or recommendations you might kindly provide.\n",
"\n",
"Thank you very much for your time and consideration.\n",
"\n",
"Hello Job Seeker,\n",
"\n",
"I hope you are doing well.\n",
"\n",
"The last role (3 months gig) I saw was looking for a junior PHP Developer. Does your CV include that?\n",
"\n",
"Hello Email Contact,\n",
"Thank you for your feedback.\n",
"Yes my CV has PHP as one of my skill set. Can I share it with you?\n",
"\n",
"Email Contact: They said \"It's late. Interviews were on Monday\"\n",
"\n",
"Hello Email Contact\n",
"\n",
"Thanks for the update. When you hear of any opportunity please let me know.\n",
"\n",
"Email Contact: For now, check out https://refactory.academy/courses/refactory-apprenticeship/\n",
"\"\"\"\n",
"\n",
"# Step 2: Make the messages list\n",
"\n",
"messages = [\n",
" {\"role\": \"system\", \"content\": system_prompt},\n",
" {\"role\": \"user\", \"content\": user_prompt},\n",
"]\n",
"\n",
"# Step 3: Call OpenAI\n",
"\n",
"response = openai.chat.completions.create(\n",
" model = \"gpt-4o-mini\",\n",
" messages = messages\n",
")\n",
"\n",
"# Step 4: print the result\n",
"\n",
"print(response.choices[0].message.content)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4b583226-9b13-4990-863a-86517a5ccfec",
"metadata": {},
"outputs": [],
"source": [
"# To perform summaries using a model running locally\n",
"import ollama\n",
"\n",
"# OLLAMA_API = \"http://localhost:11434/api/chat\"\n",
"# HEADERS = {\"Content-Type\": \"application/json\"}\n",
"MODEL = \"llama3.2\"\n",
"\n",
"def summarize_with_local_model(url):\n",
" website = Website(url)\n",
" messages = messages_for(website)\n",
" response = ollama.chat(\n",
" model=MODEL,\n",
" messages=messages,\n",
" stream=False # just get the results, don't stream them\n",
" )\n",
" return response['message']['content']\n",
"\n",
"display(Markdown(summarize_with_local_model(\"https://rwothoromo.wordpress.com/\")))"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.7"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

Some files were not shown because too many files have changed in this diff Show More