Merge pull request #734 from NatKacz99/accommodation-assistant-week1

add solutions for week1
This commit is contained in:
Ed Donner
2025-10-18 14:42:28 -04:00
committed by GitHub
2 changed files with 539 additions and 0 deletions

View File

@@ -0,0 +1,260 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "44b58c16-8319-4095-b194-85b58928e6fd",
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"import requests\n",
"import json\n",
"import re\n",
"from typing import List, Dict\n",
"from bs4 import BeautifulSoup\n",
"from openai import OpenAI\n",
"from selenium import webdriver\n",
"from selenium.webdriver.chrome.service import Service\n",
"from selenium.webdriver.chrome.options import Options"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5bcb4ab0-30f6-4f29-a97e-02ff6e287c37",
"metadata": {
"editable": true,
"slideshow": {
"slide_type": ""
},
"tags": []
},
"outputs": [],
"source": [
"MODEL = \"llama3.2\"\n",
"openai = OpenAI(base_url = \"http://localhost:11434/v1\", api_key = \"ollama\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c6d30cf9-0b57-44b3-a81a-ccbd622140c3",
"metadata": {},
"outputs": [],
"source": [
"class HotelListing:\n",
" def __init__(self, name, price, url, features = None):\n",
" self.name = name\n",
" self.price = price\n",
" self.url = url\n",
" self.features = features or []\n",
" def to_dict(self):\n",
" return {\n",
" \"name\": self.name,\n",
" \"price\": self.price,\n",
" \"url\": self.url,\n",
" \"features\": self.features\n",
" }"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3c547397-3e14-44dc-b08e-c192028d9ded",
"metadata": {},
"outputs": [],
"source": [
"class BookingParser:\n",
" def __init__(self, url, headers = None):\n",
" self.url = url\n",
" self.headers = headers or {\"User-Agent\": \"Mozilla/5.0\"}\n",
" self.listings = []\n",
" self.fetch_and_parse()\n",
"\n",
" def fetch_and_parse(self):\n",
" try:\n",
" request = requests.get(self.url, headers = self.headers, timeout = 10)\n",
" request.raise_for_status()\n",
" except Exception as e:\n",
" print(f\"Page download error: {e}\")\n",
" return\n",
"\n",
" soup = BeautifulSoup(request.content, \"html.parser\")\n",
"\n",
" hotel_cards = soup.find_all(\"div\", {\"data-stid\": \"property-listing-results\"})\n",
"\n",
" if not hotel_cards:\n",
" hotel_cards = soup.find_all(\"div\", class_ = re.compile(\"property-listing|property-card-card-results\"))\n",
"\n",
" for card in hotel_cards[:10]:\n",
" listing = self._parse_hotel_card(card)\n",
" if listing:\n",
" self.listings.append(listing)\n",
"\n",
" def _parse_hotel_card(self, card):\n",
" try:\n",
" name_element = card.find(\"a\", {\"data-stid\": \"open-hotel-information\"})\n",
" if not name_element:\n",
" name_element = card.find(\"h3\") or car.find(\"span\", class_ = re.compile(\"is-visually-hidden\"))\n",
" name = name_element.get_text(strip = True) if name_element else \"name unknown\"\n",
"\n",
" price_element = card.find(\"span\", {\"class\": \"uitk-badge-base-text\"})\n",
"\n",
" price_text = price_element.get_text(strip = True) if price_element else \"0\"\n",
" price_match = request.search(r'(\\d+)', price_text.replace('$', ''))\n",
" price = int(price_match.group(1)) if price_match else 0\n",
"\n",
" link_element = card.find(\"a\", href = True)\n",
" url = \"https://www.hotels.com\" + link_element[\"href\"] if link_element else \"\"\n",
"\n",
" features = []\n",
" feature_spans = card.select('[data-stid=\"sp-content-list\"]')\n",
" if feature_spans:\n",
" items = feature_spans[0].select('li[data-stid^=\"sp-content-item\"]')\n",
" \n",
" for item in items:\n",
" text = item.get_text(strip=True)\n",
" if text:\n",
" features.append(text.lower())\n",
"\n",
" card_text = card.get_text().lower()\n",
" if \"wi-fi\" in card_text or \"wifi\" in card_text:\n",
" features.append(\"wifi\")\n",
" if \"breakfest\" in card_text:\n",
" features.append(\"breakfest\")\n",
"\n",
" return HotelListing(name, price, url, features)\n",
" except Exception as e:\n",
" print(f\"Parsing hotel card error: {e}\")\n",
" return None\n",
"\n",
" def get_listings(self):\n",
" return [listing.to_dict() for listing in self.listings]\n",
" "
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9e700023-1f0c-4e8b-a823-c5e3ce9bfb28",
"metadata": {},
"outputs": [],
"source": [
"def make_prompt(listings: List[Dict], user_preferences: Dict):\n",
" prompt = (\n",
" \"You are an assistant and help a user in accommodation choosing.\\n\"\n",
" \"Below is a list of hotel offers and user preferences.\\n\"\n",
" \"HOTELS OFERTS:\\n\"\n",
" f\"{json.dumps(listings, ensure_ascii = False, indent = 1)}\\n\\n\"\n",
" \"USER PREFERENCES:\\n\"\n",
" f\"{json.dumps(user_preferences, ensure_ascii = False, indent = 1)}\\n\\n\"\n",
" \"For every ofert:\\n\"\n",
" \"1) Assess suitability in 0-10 rate (where 10 = ideal suitability)\\n\"\n",
" \"2) Give 2-3 short reasons for your assessment\\n\"\n",
" \"3) Please indicate if the price is within your budget\\n\"\n",
" \"Finally, list the TOP 3 best offers with justification.\\n\"\n",
" )\n",
" return prompt"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "58fa69bd-162b-4088-91ab-fe1fc39b4a50",
"metadata": {},
"outputs": [],
"source": [
"def analyze_listings(listings: List[Dict], preferences: Dict):\n",
" if not listings:\n",
" print(\"No offers to analyze.\")\n",
" return None\n",
"\n",
" prompt = make_prompt(listings, preferences)\n",
"\n",
" try:\n",
" response = openai.chat.completions.create(\n",
" model = MODEL,\n",
" messages = [\n",
" {\n",
" \"role\": \"system\",\n",
" \"content\": \"You are an expert in choosing the best accommodation.\\n\" \n",
" \"You analyze offers and advise users.\"\n",
" },\n",
" {\"role\": \"user\", \"content\": prompt}\n",
" ]\n",
" )\n",
"\n",
" result = response.choices[0].message.content\n",
" return result\n",
"\n",
" except Exception as e:\n",
" print(f\"Communication error with LLM: {e}\")\n",
" return None"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b4ade5a4-3a3c-422d-9740-d3b647996222",
"metadata": {},
"outputs": [],
"source": [
"def main():\n",
" url = (\"https://www.hotels.com/Hotel-Search?destination=Warsaw%20-%20Eastern%20Poland%2C%20Poland&d1=2025-10-18&startDate=2025-10-18&d2=2025-10-20&endDate=2025-10-20&adults=1&rooms=1&regionId=6057142&sort=RECOMMENDED&theme=&userIntent=&semdtl=&categorySearch=&useRewards=false&children=&latLong&pwaDialog=&daysInFuture&stayLength\")\n",
"\n",
" preferences = {\n",
" \"max_price\": 200,\n",
" \"must_have\": [\"wifi\", \"breakfest\"],\n",
" \"number_of_rooms\": 1,\n",
" \"localization\": \"Warsaw\"\n",
" }\n",
"\n",
" print(\"🔍 Oferts downloading from Hotels.com..\")\n",
" parser = BookingParser(url)\n",
" listings = parser.get_listings()\n",
"\n",
" print(f\"✅ Found {len(listings)} offerts\\n\")\n",
" print(\"=\"*60)\n",
"\n",
" print(\"FOUND OFFERTS:\\n\")\n",
" for i, listing in enumerate(listings, 1):\n",
" print(f\"\\n{i}. {listing['name']}\")\n",
" print(f\"Amount: {listing['price']} pln\")\n",
" print(f\"Features: {', '.join(listing['features']) if listing['features'] else 'Informations lack.'}\")\n",
"\n",
" analysis = analyze_listings(listings, preferences)\n",
"\n",
" if analysis:\n",
" print(analysis)\n",
" else:\n",
" print(\"❌ Analysis failed\")\n",
"\n",
"if __name__ == \"__main__\":\n",
" main()\n",
" "
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.13.7"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@@ -0,0 +1,279 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "4e5da3f5-ebd0-4e20-ab89-95847187287b",
"metadata": {},
"outputs": [],
"source": [
"import requests\n",
"import json\n",
"from typing import List\n",
"from bs4 import BeautifulSoup\n",
"from IPython.display import Markdown, display, update_display, clear_output\n",
"from openai import OpenAI\n",
"from dotenv import load_dotenv\n",
"import os\n",
"from scraper import fetch_website_links, fetch_website_contents"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "86adec56-3b27-46da-9b1a-1e5946a76a09",
"metadata": {},
"outputs": [],
"source": [
"load_dotenv(override=True)\n",
"api_key = os.getenv('OPENROUTER_API_KEY')\n",
"\n",
"# Check the key\n",
"\n",
"if not api_key:\n",
" print(\"No API key was found - please head over to the troubleshooting notebook in this folder to identify & fix!\")\n",
"elif api_key.strip() != api_key:\n",
" print(\"An API key was found, but it looks like it might have space or tab characters at the start or end - please remove them - see troubleshooting notebook\")\n",
"else:\n",
" print(\"API key found and looks good so far!\")\n",
"headers = {\n",
" \"User-Agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36\"\n",
"}\n",
"\n",
"openrouter_url = \"https://openrouter.ai/api/v1\"\n",
"openai = OpenAI(api_key=api_key, base_url=openrouter_url)\n",
"MODEL = \"gpt-5-nano\""
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "abf2f706-2709-404a-9fb7-774a9f57dd11",
"metadata": {
"editable": true,
"slideshow": {
"slide_type": ""
},
"tags": []
},
"outputs": [],
"source": [
"company_name = input(\"Enter the company name: \")\n",
"url = input(\"Enter the company url: \")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "153fa3d1-3ce5-46d0-838d-3e95a4b8628b",
"metadata": {},
"outputs": [],
"source": [
"link_system_prompt = \"You are provided with a list of links found on a webpage. You are able to decide which of the links would be most relevant to include in a brochure about the company, such as links to an About page, or a Company page, or Careers/Jobs pages.\\n\"\n",
"link_system_prompt += \"You should respond in JSON as in this example:\"\n",
"link_system_prompt += \"\"\"\n",
" EXAMPLE 1:\n",
" {\n",
" \"links\": [\n",
" {\"type\": \"about page\", \"url\": \"https://full.url/goes/here/about\"},\n",
" {\"type\": \"careers page\", \"url\": \"https://another.full.url/careers\"}\n",
" ]\n",
" }\n",
" EXAMPLE 2:\n",
" {\n",
" \"links\": [\n",
" {\"type\": \"company blog\", \"url\": \"https://blog.example.com\"},\n",
" {\"type\": \"our story\", \"url\": \"https://example.com/our-story\"}\n",
" ]\n",
" }\n",
" \"\"\""
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6fcacc2e-7445-4d8a-aa80-489d3a2247ec",
"metadata": {},
"outputs": [],
"source": [
"def get_links_user_prompt(url):\n",
" user_prompt = f\"Here is the list of links on the website of {url} - \"\n",
" user_prompt += \"please decide which of these are relevant web links for a brochure about the company, respond with the full https URL in JSON format. Do not include Terms of Service, Privacy, email links.\\n\"\n",
" user_prompt += \"Links (some might be relative links):\\n\"\n",
" links = fetch_website_links(url)\n",
" user_prompt += \"\\n\".join(links[:20])\n",
" return user_prompt"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "dfe222c5-0d3e-4be2-85e1-596ab9d407dc",
"metadata": {},
"outputs": [],
"source": [
"def get_links(url):\n",
" response = openai.chat.completions.create(\n",
" model = MODEL,\n",
" messages = [\n",
" {\"role\": \"system\", \"content\": link_system_prompt},\n",
" {\"role\": \"user\", \"content\": get_links_user_prompt(url)}\n",
" ],\n",
" response_format = {\"type\": \"json_object\"}\n",
" )\n",
" result = response.choices[0].message.content\n",
" return json.loads(result)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c964bdce-be5d-41c7-a8d7-8e25e58463c5",
"metadata": {},
"outputs": [],
"source": [
"def get_all_details(url):\n",
" result = \"Landing page:\\n\"\n",
" result += fetch_website_contents(url)\n",
" links = get_links(url)\n",
"\n",
" for link in links[\"links\"]:\n",
" result += f\"{link['type']}\\n\"\n",
" try:\n",
" result += f\"\\n\\n### Link: Link: {link['type']}\\n\"\n",
" result += fetch_website_contents(link[\"url\"])\n",
" except Exception as e:\n",
" print(f\"Omitted link: {link['url']}: {e}\")\n",
" continue\n",
" return result"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5337019a-b789-49d7-bf10-0f15148c0276",
"metadata": {},
"outputs": [],
"source": [
"system_prompt = (\n",
" \"You are an assistant that analyzes the contents of several relevant pages from a company website \"\n",
" \"and creates a great type of brochure about the company for prospective customers, investors, and recruits. \"\n",
" \"Respond in markdown. Include details of company culture, customers, and careers/jobs if you have the information. Add emoticons where ever possible.\\n\\n\"\n",
"\n",
" \"Please structure the brochure using the following sections:\\n\"\n",
" \"1. **Introduction**: A brief overview of the company.\\n\"\n",
" \"2. **Company Culture**: Emphasize fun, atmosphere, and any unique cultural elements.\\n\"\n",
" \"3. **Customers**: Mention notable customers or industries.\\n\"\n",
" \"4. **Careers/Jobs**: Highlight career opportunities.\\n\"\n",
" \"5. **Conclusion**: Wrap up with a final lighthearted message.\\n\"\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1dd4f2d4-8189-452a-b15a-c09ae5894ac8",
"metadata": {},
"outputs": [],
"source": [
"def get_brochure_user_prompt(company_name, url):\n",
" user_prompt = f\"You are looking at a company called: {company_name}\\n\"\n",
" user_prompt += f\"Here are the contents of its landing page and other relevant pages; use this information to build a short brochure of the company in markdown.\\n\"\n",
" user_prompt += get_all_details(url)\n",
" user_prompt = user_prompt[:20000]\n",
" return user_prompt"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "8ab4bfef-eb22-43fb-8a46-f1f6a225793b",
"metadata": {},
"outputs": [],
"source": [
"def stream_brochure():\n",
" global brochure_text\n",
" brochure_text = \"\"\n",
"\n",
" stream = openai.chat.completions.create(\n",
" model = MODEL,\n",
" messages = [\n",
" {\"role\": \"system\", \"content\": system_prompt},\n",
" {\"role\": \"user\", \"content\": get_brochure_user_prompt(company_name, url)}\n",
" ],\n",
" stream = True\n",
" )\n",
"\n",
" response = \"\"\n",
" display_handle = display(Markdown(\"\"), display_id = True)\n",
" for chunk in stream:\n",
" content = chunk.choices[0].delta.content or ''\n",
" response += content\n",
" brochure_text += content\n",
" response = response.replace(\"```\", \"\"). replace(\"markdown\", \"\")\n",
" update_display(Markdown(response), display_id = display_handle.display_id)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7828c747-7872-48e2-b3e6-faab95ba76cb",
"metadata": {},
"outputs": [],
"source": [
"def user_translate_brochure(language):\n",
" clear_output(wait = True)\n",
"\n",
" translation_stream = openai.chat.completions.create(\n",
" model = MODEL,\n",
" messages = [\n",
" {\"role\": \"user\", \"content\": f\"Translate the following to {language}:\\n {brochure_text}\"}\n",
" ],\n",
" stream = True\n",
" )\n",
"\n",
" display_handle = display(Markdown(\"\"), display_id = True)\n",
" translated_text = \"\"\n",
"\n",
" for chunk in translation_stream:\n",
" content = chunk.choices[0].delta.content or \"\"\n",
" if content:\n",
" translated_text += content\n",
" update_display(Markdown(translated_text), display_id = display_handle.display_id)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e6cfa92a-8a86-485d-a7e1-1651705ee6dc",
"metadata": {},
"outputs": [],
"source": [
"stream_brochure()\n",
"language_choice = input(\"Enter the language to translate the brochure into (e.g., 'French'): \")\n",
"user_translate_brochure(language_choice)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.13.7"
}
},
"nbformat": 4,
"nbformat_minor": 5
}