Merge pull request #734 from NatKacz99/accommodation-assistant-week1

add solutions for week1
2025-10-18 14:42:28 -04:00
parent 80c148e941 3343d77152
commit acb2165038
2 changed files with 539 additions and 0 deletions
--- a/week1/community-contributions/accommodation_assistant.ipynb
+++ b/week1/community-contributions/accommodation_assistant.ipynb
@@ -0,0 +1,260 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "44b58c16-8319-4095-b194-85b58928e6fd",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "import requests\n",
+    "import json\n",
+    "import re\n",
+    "from typing import List, Dict\n",
+    "from bs4 import BeautifulSoup\n",
+    "from openai import OpenAI\n",
+    "from selenium import webdriver\n",
+    "from selenium.webdriver.chrome.service import Service\n",
+    "from selenium.webdriver.chrome.options import Options"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5bcb4ab0-30f6-4f29-a97e-02ff6e287c37",
+   "metadata": {
+    "editable": true,
+    "slideshow": {
+     "slide_type": ""
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "MODEL = \"llama3.2\"\n",
+    "openai = OpenAI(base_url = \"http://localhost:11434/v1\", api_key = \"ollama\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c6d30cf9-0b57-44b3-a81a-ccbd622140c3",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class HotelListing:\n",
+    "    def __init__(self, name, price, url, features = None):\n",
+    "        self.name = name\n",
+    "        self.price = price\n",
+    "        self.url = url\n",
+    "        self.features = features or []\n",
+    "    def to_dict(self):\n",
+    "        return {\n",
+    "            \"name\": self.name,\n",
+    "            \"price\": self.price,\n",
+    "            \"url\": self.url,\n",
+    "            \"features\": self.features\n",
+    "        }"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3c547397-3e14-44dc-b08e-c192028d9ded",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class BookingParser:\n",
+    "    def __init__(self, url, headers = None):\n",
+    "        self.url = url\n",
+    "        self.headers = headers or {\"User-Agent\": \"Mozilla/5.0\"}\n",
+    "        self.listings = []\n",
+    "        self.fetch_and_parse()\n",
+    "\n",
+    "    def fetch_and_parse(self):\n",
+    "        try:\n",
+    "            request = requests.get(self.url, headers = self.headers, timeout = 10)\n",
+    "            request.raise_for_status()\n",
+    "        except Exception as e:\n",
+    "            print(f\"Page download error: {e}\")\n",
+    "            return\n",
+    "\n",
+    "        soup = BeautifulSoup(request.content, \"html.parser\")\n",
+    "\n",
+    "        hotel_cards = soup.find_all(\"div\", {\"data-stid\": \"property-listing-results\"})\n",
+    "\n",
+    "        if not hotel_cards:\n",
+    "            hotel_cards = soup.find_all(\"div\", class_ = re.compile(\"property-listing|property-card-card-results\"))\n",
+    "\n",
+    "        for card in hotel_cards[:10]:\n",
+    "            listing = self._parse_hotel_card(card)\n",
+    "            if listing:\n",
+    "                self.listings.append(listing)\n",
+    "\n",
+    "    def _parse_hotel_card(self, card):\n",
+    "        try:\n",
+    "            name_element = card.find(\"a\", {\"data-stid\": \"open-hotel-information\"})\n",
+    "            if not name_element:\n",
+    "                name_element = card.find(\"h3\") or car.find(\"span\", class_ = re.compile(\"is-visually-hidden\"))\n",
+    "            name = name_element.get_text(strip = True) if name_element else \"name unknown\"\n",
+    "\n",
+    "            price_element = card.find(\"span\", {\"class\": \"uitk-badge-base-text\"})\n",
+    "\n",
+    "            price_text = price_element.get_text(strip = True) if price_element else \"0\"\n",
+    "            price_match = request.search(r'(\\d+)', price_text.replace('$', ''))\n",
+    "            price = int(price_match.group(1)) if price_match else 0\n",
+    "\n",
+    "            link_element = card.find(\"a\", href = True)\n",
+    "            url = \"https://www.hotels.com\" + link_element[\"href\"] if link_element else \"\"\n",
+    "\n",
+    "            features = []\n",
+    "            feature_spans = card.select('[data-stid=\"sp-content-list\"]')\n",
+    "            if feature_spans:\n",
+    "                items = feature_spans[0].select('li[data-stid^=\"sp-content-item\"]')\n",
+    "                \n",
+    "                for item in items:\n",
+    "                    text = item.get_text(strip=True)\n",
+    "                    if text:\n",
+    "                        features.append(text.lower())\n",
+    "\n",
+    "            card_text = card.get_text().lower()\n",
+    "            if \"wi-fi\" in card_text or \"wifi\" in card_text:\n",
+    "                features.append(\"wifi\")\n",
+    "                if \"breakfest\" in card_text:\n",
+    "                    features.append(\"breakfest\")\n",
+    "\n",
+    "            return HotelListing(name, price, url, features)\n",
+    "        except Exception as e:\n",
+    "            print(f\"Parsing hotel card error: {e}\")\n",
+    "            return None\n",
+    "\n",
+    "    def get_listings(self):\n",
+    "        return [listing.to_dict() for listing in self.listings]\n",
+    "        "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9e700023-1f0c-4e8b-a823-c5e3ce9bfb28",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def make_prompt(listings: List[Dict], user_preferences: Dict):\n",
+    "    prompt = (\n",
+    "        \"You are an assistant and help a user in accommodation choosing.\\n\"\n",
+    "        \"Below is a list of hotel offers and user preferences.\\n\"\n",
+    "        \"HOTELS OFERTS:\\n\"\n",
+    "        f\"{json.dumps(listings, ensure_ascii = False, indent = 1)}\\n\\n\"\n",
+    "        \"USER PREFERENCES:\\n\"\n",
+    "        f\"{json.dumps(user_preferences, ensure_ascii = False, indent = 1)}\\n\\n\"\n",
+    "        \"For every ofert:\\n\"\n",
+    "        \"1) Assess suitability in 0-10 rate (where 10 = ideal suitability)\\n\"\n",
+    "        \"2) Give 2-3 short reasons for your assessment\\n\"\n",
+    "        \"3) Please indicate if the price is within your budget\\n\"\n",
+    "        \"Finally, list the TOP 3 best offers with justification.\\n\"\n",
+    "    )\n",
+    "    return prompt"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "58fa69bd-162b-4088-91ab-fe1fc39b4a50",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def analyze_listings(listings: List[Dict], preferences: Dict):\n",
+    "    if not listings:\n",
+    "        print(\"No offers to analyze.\")\n",
+    "        return None\n",
+    "\n",
+    "    prompt = make_prompt(listings, preferences)\n",
+    "\n",
+    "    try:\n",
+    "        response = openai.chat.completions.create(\n",
+    "            model = MODEL,\n",
+    "            messages = [\n",
+    "                {\n",
+    "                    \"role\": \"system\",\n",
+    "                    \"content\": \"You are an expert in choosing the best accommodation.\\n\" \n",
+    "                               \"You analyze offers and advise users.\"\n",
+    "                },\n",
+    "                {\"role\": \"user\", \"content\": prompt}\n",
+    "            ]\n",
+    "        )\n",
+    "\n",
+    "        result = response.choices[0].message.content\n",
+    "        return result\n",
+    "\n",
+    "    except Exception as e:\n",
+    "        print(f\"Communication error with LLM: {e}\")\n",
+    "        return None"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b4ade5a4-3a3c-422d-9740-d3b647996222",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def main():\n",
+    "    url = (\"https://www.hotels.com/Hotel-Search?destination=Warsaw%20-%20Eastern%20Poland%2C%20Poland&d1=2025-10-18&startDate=2025-10-18&d2=2025-10-20&endDate=2025-10-20&adults=1&rooms=1&regionId=6057142&sort=RECOMMENDED&theme=&userIntent=&semdtl=&categorySearch=&useRewards=false&children=&latLong&pwaDialog=&daysInFuture&stayLength\")\n",
+    "\n",
+    "    preferences = {\n",
+    "        \"max_price\": 200,\n",
+    "        \"must_have\": [\"wifi\", \"breakfest\"],\n",
+    "        \"number_of_rooms\": 1,\n",
+    "        \"localization\": \"Warsaw\"\n",
+    "    }\n",
+    "\n",
+    "    print(\"🔍 Oferts downloading from Hotels.com..\")\n",
+    "    parser = BookingParser(url)\n",
+    "    listings = parser.get_listings()\n",
+    "\n",
+    "    print(f\"✅ Found {len(listings)} offerts\\n\")\n",
+    "    print(\"=\"*60)\n",
+    "\n",
+    "    print(\"FOUND OFFERTS:\\n\")\n",
+    "    for i, listing in enumerate(listings, 1):\n",
+    "        print(f\"\\n{i}. {listing['name']}\")\n",
+    "        print(f\"Amount: {listing['price']} pln\")\n",
+    "        print(f\"Features: {', '.join(listing['features']) if listing['features'] else 'Informations lack.'}\")\n",
+    "\n",
+    "        analysis = analyze_listings(listings, preferences)\n",
+    "\n",
+    "        if analysis:\n",
+    "            print(analysis)\n",
+    "        else:\n",
+    "            print(\"❌ Analysis failed\")\n",
+    "\n",
+    "if __name__ == \"__main__\":\n",
+    "    main()\n",
+    "        "
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.13.7"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
--- a/week1/community-contributions/company_brochure_relevent_links.ipynb
+++ b/week1/community-contributions/company_brochure_relevent_links.ipynb
@@ -0,0 +1,279 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4e5da3f5-ebd0-4e20-ab89-95847187287b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import requests\n",
+    "import json\n",
+    "from typing import List\n",
+    "from bs4 import BeautifulSoup\n",
+    "from IPython.display import Markdown, display, update_display, clear_output\n",
+    "from openai import OpenAI\n",
+    "from dotenv import load_dotenv\n",
+    "import os\n",
+    "from scraper import fetch_website_links, fetch_website_contents"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "86adec56-3b27-46da-9b1a-1e5946a76a09",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "load_dotenv(override=True)\n",
+    "api_key = os.getenv('OPENROUTER_API_KEY')\n",
+    "\n",
+    "# Check the key\n",
+    "\n",
+    "if not api_key:\n",
+    "    print(\"No API key was found - please head over to the troubleshooting notebook in this folder to identify & fix!\")\n",
+    "elif api_key.strip() != api_key:\n",
+    "    print(\"An API key was found, but it looks like it might have space or tab characters at the start or end - please remove them - see troubleshooting notebook\")\n",
+    "else:\n",
+    "    print(\"API key found and looks good so far!\")\n",
+    "headers = {\n",
+    " \"User-Agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36\"\n",
+    "}\n",
+    "\n",
+    "openrouter_url = \"https://openrouter.ai/api/v1\"\n",
+    "openai = OpenAI(api_key=api_key, base_url=openrouter_url)\n",
+    "MODEL = \"gpt-5-nano\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "abf2f706-2709-404a-9fb7-774a9f57dd11",
+   "metadata": {
+    "editable": true,
+    "slideshow": {
+     "slide_type": ""
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "company_name = input(\"Enter the company name: \")\n",
+    "url = input(\"Enter the company url: \")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "153fa3d1-3ce5-46d0-838d-3e95a4b8628b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "link_system_prompt = \"You are provided with a list of links found on a webpage. You are able to decide which of the links would be most relevant to include in a brochure about the company, such as links to an About page, or a Company page, or Careers/Jobs pages.\\n\"\n",
+    "link_system_prompt += \"You should respond in JSON as in this example:\"\n",
+    "link_system_prompt += \"\"\"\n",
+    "    EXAMPLE 1:\n",
+    "    {\n",
+    "        \"links\": [\n",
+    "            {\"type\": \"about page\", \"url\": \"https://full.url/goes/here/about\"},\n",
+    "            {\"type\": \"careers page\", \"url\": \"https://another.full.url/careers\"}\n",
+    "        ]\n",
+    "    }\n",
+    "    EXAMPLE 2:\n",
+    "    {\n",
+    "        \"links\": [\n",
+    "            {\"type\": \"company blog\", \"url\": \"https://blog.example.com\"},\n",
+    "            {\"type\": \"our story\", \"url\": \"https://example.com/our-story\"}\n",
+    "        ]\n",
+    "    }\n",
+    "    \"\"\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6fcacc2e-7445-4d8a-aa80-489d3a2247ec",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def get_links_user_prompt(url):\n",
+    "    user_prompt = f\"Here is the list of links on the website of {url} - \"\n",
+    "    user_prompt += \"please decide which of these are relevant web links for a brochure about the company, respond with the full https URL in JSON format. Do not include Terms of Service, Privacy, email links.\\n\"\n",
+    "    user_prompt += \"Links (some might be relative links):\\n\"\n",
+    "    links = fetch_website_links(url)\n",
+    "    user_prompt += \"\\n\".join(links[:20])\n",
+    "    return user_prompt"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "dfe222c5-0d3e-4be2-85e1-596ab9d407dc",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def get_links(url):\n",
+    "    response = openai.chat.completions.create(\n",
+    "        model = MODEL,\n",
+    "        messages = [\n",
+    "            {\"role\": \"system\", \"content\": link_system_prompt},\n",
+    "            {\"role\": \"user\", \"content\": get_links_user_prompt(url)}\n",
+    "        ],\n",
+    "        response_format = {\"type\": \"json_object\"}\n",
+    "    )\n",
+    "    result = response.choices[0].message.content\n",
+    "    return json.loads(result)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c964bdce-be5d-41c7-a8d7-8e25e58463c5",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def get_all_details(url):\n",
+    "    result = \"Landing  page:\\n\"\n",
+    "    result += fetch_website_contents(url)\n",
+    "    links = get_links(url)\n",
+    "\n",
+    "    for link in links[\"links\"]:\n",
+    "        result += f\"{link['type']}\\n\"\n",
+    "        try:\n",
+    "            result += f\"\\n\\n### Link: Link: {link['type']}\\n\"\n",
+    "            result += fetch_website_contents(link[\"url\"])\n",
+    "        except Exception as e:\n",
+    "            print(f\"Omitted link: {link['url']}: {e}\")\n",
+    "            continue\n",
+    "    return result"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5337019a-b789-49d7-bf10-0f15148c0276",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "system_prompt = (\n",
+    "    \"You are an assistant that analyzes the contents of several relevant pages from a company website \"\n",
+    "    \"and creates a great type of brochure about the company for prospective customers, investors, and recruits. \"\n",
+    "    \"Respond in markdown. Include details of company culture, customers, and careers/jobs if you have the information. Add emoticons where ever possible.\\n\\n\"\n",
+    "\n",
+    "    \"Please structure the brochure using the following sections:\\n\"\n",
+    "    \"1. **Introduction**: A brief overview of the company.\\n\"\n",
+    "    \"2. **Company Culture**: Emphasize fun, atmosphere, and any unique cultural elements.\\n\"\n",
+    "    \"3. **Customers**: Mention notable customers or industries.\\n\"\n",
+    "    \"4. **Careers/Jobs**: Highlight career opportunities.\\n\"\n",
+    "    \"5. **Conclusion**: Wrap up with a final lighthearted message.\\n\"\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1dd4f2d4-8189-452a-b15a-c09ae5894ac8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def get_brochure_user_prompt(company_name, url):\n",
+    "    user_prompt = f\"You are looking at a company called: {company_name}\\n\"\n",
+    "    user_prompt += f\"Here are the contents of its landing page and other relevant pages; use this information to build a short brochure of the company in markdown.\\n\"\n",
+    "    user_prompt += get_all_details(url)\n",
+    "    user_prompt = user_prompt[:20000]\n",
+    "    return user_prompt"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8ab4bfef-eb22-43fb-8a46-f1f6a225793b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def stream_brochure():\n",
+    "    global brochure_text\n",
+    "    brochure_text = \"\"\n",
+    "\n",
+    "    stream = openai.chat.completions.create(\n",
+    "        model = MODEL,\n",
+    "        messages = [\n",
+    "            {\"role\": \"system\", \"content\": system_prompt},\n",
+    "            {\"role\": \"user\", \"content\": get_brochure_user_prompt(company_name, url)}\n",
+    "        ],\n",
+    "        stream = True\n",
+    "    )\n",
+    "\n",
+    "    response = \"\"\n",
+    "    display_handle = display(Markdown(\"\"), display_id = True)\n",
+    "    for chunk in stream:\n",
+    "        content = chunk.choices[0].delta.content or ''\n",
+    "        response += content\n",
+    "        brochure_text += content\n",
+    "        response = response.replace(\"```\", \"\"). replace(\"markdown\", \"\")\n",
+    "        update_display(Markdown(response), display_id = display_handle.display_id)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7828c747-7872-48e2-b3e6-faab95ba76cb",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def user_translate_brochure(language):\n",
+    "    clear_output(wait = True)\n",
+    "\n",
+    "    translation_stream = openai.chat.completions.create(\n",
+    "        model = MODEL,\n",
+    "        messages = [\n",
+    "            {\"role\": \"user\", \"content\": f\"Translate the following to {language}:\\n {brochure_text}\"}\n",
+    "        ],\n",
+    "        stream = True\n",
+    "    )\n",
+    "\n",
+    "    display_handle = display(Markdown(\"\"), display_id = True)\n",
+    "    translated_text = \"\"\n",
+    "\n",
+    "    for chunk in translation_stream:\n",
+    "        content = chunk.choices[0].delta.content or \"\"\n",
+    "        if content:\n",
+    "            translated_text += content\n",
+    "            update_display(Markdown(translated_text), display_id = display_handle.display_id)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e6cfa92a-8a86-485d-a7e1-1651705ee6dc",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "stream_brochure()\n",
+    "language_choice = input(\"Enter the language to translate the brochure into (e.g., 'French'): \")\n",
+    "user_translate_brochure(language_choice)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.13.7"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}