Files
LLM_Engineering_OLD/week8/community_contributions/w8d5/helpers/travel_deals.py
2025-10-29 13:04:37 +05:00

68 lines
1.7 KiB
Python

from pydantic import BaseModel
from typing import List, Dict, Self
from bs4 import BeautifulSoup
import re
import feedparser
from tqdm import tqdm
import requests
import time
feeds = [
"https://thepointsguy.com/feed/",
]
def extract(html_snippet: str) -> str:
soup = BeautifulSoup(html_snippet, 'html.parser')
text = soup.get_text(strip=True)
text = re.sub('<[^<]+?>', '', text)
return text.replace('\n', ' ').strip()
class ScrapedTravelDeal:
title: str
summary: str
url: str
details: str
def __init__(self, entry: Dict[str, str]):
self.title = entry.get('title', '')
summary_text = entry.get('summary', entry.get('description', ''))
self.summary = extract(summary_text)
self.url = entry.get('link', '')
self.details = self.summary
def __repr__(self):
return f"<{self.title}>"
def describe(self):
return f"Title: {self.title}\nDetails: {self.details.strip()}\nURL: {self.url}"
@classmethod
def fetch(cls, show_progress: bool = False) -> List[Self]:
deals = []
feed_iter = tqdm(feeds) if show_progress else feeds
for feed_url in feed_iter:
try:
feed = feedparser.parse(feed_url)
for entry in feed.entries[:10]:
deals.append(cls(entry))
time.sleep(0.3)
except Exception as e:
print(f"Error fetching {feed_url}: {e}")
return deals
class TravelDeal(BaseModel):
destination: str
deal_type: str
description: str
price: float
url: str
class TravelDealSelection(BaseModel):
deals: List[TravelDeal]
class TravelOpportunity(BaseModel):
deal: TravelDeal
estimate: float
discount: float