261 lines
9.3 KiB
Plaintext
261 lines
9.3 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "44b58c16-8319-4095-b194-85b58928e6fd",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import os\n",
|
|
"import requests\n",
|
|
"import json\n",
|
|
"import re\n",
|
|
"from typing import List, Dict\n",
|
|
"from bs4 import BeautifulSoup\n",
|
|
"from openai import OpenAI\n",
|
|
"from selenium import webdriver\n",
|
|
"from selenium.webdriver.chrome.service import Service\n",
|
|
"from selenium.webdriver.chrome.options import Options"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "5bcb4ab0-30f6-4f29-a97e-02ff6e287c37",
|
|
"metadata": {
|
|
"editable": true,
|
|
"slideshow": {
|
|
"slide_type": ""
|
|
},
|
|
"tags": []
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"MODEL = \"llama3.2\"\n",
|
|
"openai = OpenAI(base_url = \"http://localhost:11434/v1\", api_key = \"ollama\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "c6d30cf9-0b57-44b3-a81a-ccbd622140c3",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"class HotelListing:\n",
|
|
" def __init__(self, name, price, url, features = None):\n",
|
|
" self.name = name\n",
|
|
" self.price = price\n",
|
|
" self.url = url\n",
|
|
" self.features = features or []\n",
|
|
" def to_dict(self):\n",
|
|
" return {\n",
|
|
" \"name\": self.name,\n",
|
|
" \"price\": self.price,\n",
|
|
" \"url\": self.url,\n",
|
|
" \"features\": self.features\n",
|
|
" }"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "3c547397-3e14-44dc-b08e-c192028d9ded",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"class BookingParser:\n",
|
|
" def __init__(self, url, headers = None):\n",
|
|
" self.url = url\n",
|
|
" self.headers = headers or {\"User-Agent\": \"Mozilla/5.0\"}\n",
|
|
" self.listings = []\n",
|
|
" self.fetch_and_parse()\n",
|
|
"\n",
|
|
" def fetch_and_parse(self):\n",
|
|
" try:\n",
|
|
" request = requests.get(self.url, headers = self.headers, timeout = 10)\n",
|
|
" request.raise_for_status()\n",
|
|
" except Exception as e:\n",
|
|
" print(f\"Page download error: {e}\")\n",
|
|
" return\n",
|
|
"\n",
|
|
" soup = BeautifulSoup(request.content, \"html.parser\")\n",
|
|
"\n",
|
|
" hotel_cards = soup.find_all(\"div\", {\"data-stid\": \"property-listing-results\"})\n",
|
|
"\n",
|
|
" if not hotel_cards:\n",
|
|
" hotel_cards = soup.find_all(\"div\", class_ = re.compile(\"property-listing|property-card-card-results\"))\n",
|
|
"\n",
|
|
" for card in hotel_cards[:10]:\n",
|
|
" listing = self._parse_hotel_card(card)\n",
|
|
" if listing:\n",
|
|
" self.listings.append(listing)\n",
|
|
"\n",
|
|
" def _parse_hotel_card(self, card):\n",
|
|
" try:\n",
|
|
" name_element = card.find(\"a\", {\"data-stid\": \"open-hotel-information\"})\n",
|
|
" if not name_element:\n",
|
|
" name_element = card.find(\"h3\") or car.find(\"span\", class_ = re.compile(\"is-visually-hidden\"))\n",
|
|
" name = name_element.get_text(strip = True) if name_element else \"name unknown\"\n",
|
|
"\n",
|
|
" price_element = card.find(\"span\", {\"class\": \"uitk-badge-base-text\"})\n",
|
|
"\n",
|
|
" price_text = price_element.get_text(strip = True) if price_element else \"0\"\n",
|
|
" price_match = request.search(r'(\\d+)', price_text.replace('$', ''))\n",
|
|
" price = int(price_match.group(1)) if price_match else 0\n",
|
|
"\n",
|
|
" link_element = card.find(\"a\", href = True)\n",
|
|
" url = \"https://www.hotels.com\" + link_element[\"href\"] if link_element else \"\"\n",
|
|
"\n",
|
|
" features = []\n",
|
|
" feature_spans = card.select('[data-stid=\"sp-content-list\"]')\n",
|
|
" if feature_spans:\n",
|
|
" items = feature_spans[0].select('li[data-stid^=\"sp-content-item\"]')\n",
|
|
" \n",
|
|
" for item in items:\n",
|
|
" text = item.get_text(strip=True)\n",
|
|
" if text:\n",
|
|
" features.append(text.lower())\n",
|
|
"\n",
|
|
" card_text = card.get_text().lower()\n",
|
|
" if \"wi-fi\" in card_text or \"wifi\" in card_text:\n",
|
|
" features.append(\"wifi\")\n",
|
|
" if \"breakfest\" in card_text:\n",
|
|
" features.append(\"breakfest\")\n",
|
|
"\n",
|
|
" return HotelListing(name, price, url, features)\n",
|
|
" except Exception as e:\n",
|
|
" print(f\"Parsing hotel card error: {e}\")\n",
|
|
" return None\n",
|
|
"\n",
|
|
" def get_listings(self):\n",
|
|
" return [listing.to_dict() for listing in self.listings]\n",
|
|
" "
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "9e700023-1f0c-4e8b-a823-c5e3ce9bfb28",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def make_prompt(listings: List[Dict], user_preferences: Dict):\n",
|
|
" prompt = (\n",
|
|
" \"You are an assistant and help a user in accommodation choosing.\\n\"\n",
|
|
" \"Below is a list of hotel offers and user preferences.\\n\"\n",
|
|
" \"HOTELS OFERTS:\\n\"\n",
|
|
" f\"{json.dumps(listings, ensure_ascii = False, indent = 1)}\\n\\n\"\n",
|
|
" \"USER PREFERENCES:\\n\"\n",
|
|
" f\"{json.dumps(user_preferences, ensure_ascii = False, indent = 1)}\\n\\n\"\n",
|
|
" \"For every ofert:\\n\"\n",
|
|
" \"1) Assess suitability in 0-10 rate (where 10 = ideal suitability)\\n\"\n",
|
|
" \"2) Give 2-3 short reasons for your assessment\\n\"\n",
|
|
" \"3) Please indicate if the price is within your budget\\n\"\n",
|
|
" \"Finally, list the TOP 3 best offers with justification.\\n\"\n",
|
|
" )\n",
|
|
" return prompt"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "58fa69bd-162b-4088-91ab-fe1fc39b4a50",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def analyze_listings(listings: List[Dict], preferences: Dict):\n",
|
|
" if not listings:\n",
|
|
" print(\"No offers to analyze.\")\n",
|
|
" return None\n",
|
|
"\n",
|
|
" prompt = make_prompt(listings, preferences)\n",
|
|
"\n",
|
|
" try:\n",
|
|
" response = openai.chat.completions.create(\n",
|
|
" model = MODEL,\n",
|
|
" messages = [\n",
|
|
" {\n",
|
|
" \"role\": \"system\",\n",
|
|
" \"content\": \"You are an expert in choosing the best accommodation.\\n\" \n",
|
|
" \"You analyze offers and advise users.\"\n",
|
|
" },\n",
|
|
" {\"role\": \"user\", \"content\": prompt}\n",
|
|
" ]\n",
|
|
" )\n",
|
|
"\n",
|
|
" result = response.choices[0].message.content\n",
|
|
" return result\n",
|
|
"\n",
|
|
" except Exception as e:\n",
|
|
" print(f\"Communication error with LLM: {e}\")\n",
|
|
" return None"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "b4ade5a4-3a3c-422d-9740-d3b647996222",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def main():\n",
|
|
" url = (\"https://www.hotels.com/Hotel-Search?destination=Warsaw%20-%20Eastern%20Poland%2C%20Poland&d1=2025-10-18&startDate=2025-10-18&d2=2025-10-20&endDate=2025-10-20&adults=1&rooms=1®ionId=6057142&sort=RECOMMENDED&theme=&userIntent=&semdtl=&categorySearch=&useRewards=false&children=&latLong&pwaDialog=&daysInFuture&stayLength\")\n",
|
|
"\n",
|
|
" preferences = {\n",
|
|
" \"max_price\": 200,\n",
|
|
" \"must_have\": [\"wifi\", \"breakfest\"],\n",
|
|
" \"number_of_rooms\": 1,\n",
|
|
" \"localization\": \"Warsaw\"\n",
|
|
" }\n",
|
|
"\n",
|
|
" print(\"🔍 Oferts downloading from Hotels.com..\")\n",
|
|
" parser = BookingParser(url)\n",
|
|
" listings = parser.get_listings()\n",
|
|
"\n",
|
|
" print(f\"✅ Found {len(listings)} offerts\\n\")\n",
|
|
" print(\"=\"*60)\n",
|
|
"\n",
|
|
" print(\"FOUND OFFERTS:\\n\")\n",
|
|
" for i, listing in enumerate(listings, 1):\n",
|
|
" print(f\"\\n{i}. {listing['name']}\")\n",
|
|
" print(f\"Amount: {listing['price']} pln\")\n",
|
|
" print(f\"Features: {', '.join(listing['features']) if listing['features'] else 'Informations lack.'}\")\n",
|
|
"\n",
|
|
" analysis = analyze_listings(listings, preferences)\n",
|
|
"\n",
|
|
" if analysis:\n",
|
|
" print(analysis)\n",
|
|
" else:\n",
|
|
" print(\"❌ Analysis failed\")\n",
|
|
"\n",
|
|
"if __name__ == \"__main__\":\n",
|
|
" main()\n",
|
|
" "
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3 (ipykernel)",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.13.7"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 5
|
|
}
|