{ "cells": [ { "cell_type": "code", "execution_count": null, "id": "44b58c16-8319-4095-b194-85b58928e6fd", "metadata": {}, "outputs": [], "source": [ "import os\n", "import requests\n", "import json\n", "import re\n", "from typing import List, Dict\n", "from bs4 import BeautifulSoup\n", "from openai import OpenAI\n", "from selenium import webdriver\n", "from selenium.webdriver.chrome.service import Service\n", "from selenium.webdriver.chrome.options import Options" ] }, { "cell_type": "code", "execution_count": null, "id": "5bcb4ab0-30f6-4f29-a97e-02ff6e287c37", "metadata": { "editable": true, "slideshow": { "slide_type": "" }, "tags": [] }, "outputs": [], "source": [ "MODEL = \"llama3.2\"\n", "openai = OpenAI(base_url = \"http://localhost:11434/v1\", api_key = \"ollama\")" ] }, { "cell_type": "code", "execution_count": null, "id": "c6d30cf9-0b57-44b3-a81a-ccbd622140c3", "metadata": {}, "outputs": [], "source": [ "class HotelListing:\n", " def __init__(self, name, price, url, features = None):\n", " self.name = name\n", " self.price = price\n", " self.url = url\n", " self.features = features or []\n", " def to_dict(self):\n", " return {\n", " \"name\": self.name,\n", " \"price\": self.price,\n", " \"url\": self.url,\n", " \"features\": self.features\n", " }" ] }, { "cell_type": "code", "execution_count": null, "id": "3c547397-3e14-44dc-b08e-c192028d9ded", "metadata": {}, "outputs": [], "source": [ "class BookingParser:\n", " def __init__(self, url, headers = None):\n", " self.url = url\n", " self.headers = headers or {\"User-Agent\": \"Mozilla/5.0\"}\n", " self.listings = []\n", " self.fetch_and_parse()\n", "\n", " def fetch_and_parse(self):\n", " try:\n", " request = requests.get(self.url, headers = self.headers, timeout = 10)\n", " request.raise_for_status()\n", " except Exception as e:\n", " print(f\"Page download error: {e}\")\n", " return\n", "\n", " soup = BeautifulSoup(request.content, \"html.parser\")\n", "\n", " hotel_cards = soup.find_all(\"div\", {\"data-stid\": \"property-listing-results\"})\n", "\n", " if not hotel_cards:\n", " hotel_cards = soup.find_all(\"div\", class_ = re.compile(\"property-listing|property-card-card-results\"))\n", "\n", " for card in hotel_cards[:10]:\n", " listing = self._parse_hotel_card(card)\n", " if listing:\n", " self.listings.append(listing)\n", "\n", " def _parse_hotel_card(self, card):\n", " try:\n", " name_element = card.find(\"a\", {\"data-stid\": \"open-hotel-information\"})\n", " if not name_element:\n", " name_element = card.find(\"h3\") or car.find(\"span\", class_ = re.compile(\"is-visually-hidden\"))\n", " name = name_element.get_text(strip = True) if name_element else \"name unknown\"\n", "\n", " price_element = card.find(\"span\", {\"class\": \"uitk-badge-base-text\"})\n", "\n", " price_text = price_element.get_text(strip = True) if price_element else \"0\"\n", " price_match = request.search(r'(\\d+)', price_text.replace('$', ''))\n", " price = int(price_match.group(1)) if price_match else 0\n", "\n", " link_element = card.find(\"a\", href = True)\n", " url = \"https://www.hotels.com\" + link_element[\"href\"] if link_element else \"\"\n", "\n", " features = []\n", " feature_spans = card.select('[data-stid=\"sp-content-list\"]')\n", " if feature_spans:\n", " items = feature_spans[0].select('li[data-stid^=\"sp-content-item\"]')\n", " \n", " for item in items:\n", " text = item.get_text(strip=True)\n", " if text:\n", " features.append(text.lower())\n", "\n", " card_text = card.get_text().lower()\n", " if \"wi-fi\" in card_text or \"wifi\" in card_text:\n", " features.append(\"wifi\")\n", " if \"breakfest\" in card_text:\n", " features.append(\"breakfest\")\n", "\n", " return HotelListing(name, price, url, features)\n", " except Exception as e:\n", " print(f\"Parsing hotel card error: {e}\")\n", " return None\n", "\n", " def get_listings(self):\n", " return [listing.to_dict() for listing in self.listings]\n", " " ] }, { "cell_type": "code", "execution_count": null, "id": "9e700023-1f0c-4e8b-a823-c5e3ce9bfb28", "metadata": {}, "outputs": [], "source": [ "def make_prompt(listings: List[Dict], user_preferences: Dict):\n", " prompt = (\n", " \"You are an assistant and help a user in accommodation choosing.\\n\"\n", " \"Below is a list of hotel offers and user preferences.\\n\"\n", " \"HOTELS OFERTS:\\n\"\n", " f\"{json.dumps(listings, ensure_ascii = False, indent = 1)}\\n\\n\"\n", " \"USER PREFERENCES:\\n\"\n", " f\"{json.dumps(user_preferences, ensure_ascii = False, indent = 1)}\\n\\n\"\n", " \"For every ofert:\\n\"\n", " \"1) Assess suitability in 0-10 rate (where 10 = ideal suitability)\\n\"\n", " \"2) Give 2-3 short reasons for your assessment\\n\"\n", " \"3) Please indicate if the price is within your budget\\n\"\n", " \"Finally, list the TOP 3 best offers with justification.\\n\"\n", " )\n", " return prompt" ] }, { "cell_type": "code", "execution_count": null, "id": "58fa69bd-162b-4088-91ab-fe1fc39b4a50", "metadata": {}, "outputs": [], "source": [ "def analyze_listings(listings: List[Dict], preferences: Dict):\n", " if not listings:\n", " print(\"No offers to analyze.\")\n", " return None\n", "\n", " prompt = make_prompt(listings, preferences)\n", "\n", " try:\n", " response = openai.chat.completions.create(\n", " model = MODEL,\n", " messages = [\n", " {\n", " \"role\": \"system\",\n", " \"content\": \"You are an expert in choosing the best accommodation.\\n\" \n", " \"You analyze offers and advise users.\"\n", " },\n", " {\"role\": \"user\", \"content\": prompt}\n", " ]\n", " )\n", "\n", " result = response.choices[0].message.content\n", " return result\n", "\n", " except Exception as e:\n", " print(f\"Communication error with LLM: {e}\")\n", " return None" ] }, { "cell_type": "code", "execution_count": null, "id": "b4ade5a4-3a3c-422d-9740-d3b647996222", "metadata": {}, "outputs": [], "source": [ "def main():\n", " url = (\"https://www.hotels.com/Hotel-Search?destination=Warsaw%20-%20Eastern%20Poland%2C%20Poland&d1=2025-10-18&startDate=2025-10-18&d2=2025-10-20&endDate=2025-10-20&adults=1&rooms=1®ionId=6057142&sort=RECOMMENDED&theme=&userIntent=&semdtl=&categorySearch=&useRewards=false&children=&latLong&pwaDialog=&daysInFuture&stayLength\")\n", "\n", " preferences = {\n", " \"max_price\": 200,\n", " \"must_have\": [\"wifi\", \"breakfest\"],\n", " \"number_of_rooms\": 1,\n", " \"localization\": \"Warsaw\"\n", " }\n", "\n", " print(\"🔍 Oferts downloading from Hotels.com..\")\n", " parser = BookingParser(url)\n", " listings = parser.get_listings()\n", "\n", " print(f\"✅ Found {len(listings)} offerts\\n\")\n", " print(\"=\"*60)\n", "\n", " print(\"FOUND OFFERTS:\\n\")\n", " for i, listing in enumerate(listings, 1):\n", " print(f\"\\n{i}. {listing['name']}\")\n", " print(f\"Amount: {listing['price']} pln\")\n", " print(f\"Features: {', '.join(listing['features']) if listing['features'] else 'Informations lack.'}\")\n", "\n", " analysis = analyze_listings(listings, preferences)\n", "\n", " if analysis:\n", " print(analysis)\n", " else:\n", " print(\"❌ Analysis failed\")\n", "\n", "if __name__ == \"__main__\":\n", " main()\n", " " ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.13.7" } }, "nbformat": 4, "nbformat_minor": 5 }