68 lines
1.7 KiB
Python
68 lines
1.7 KiB
Python
from pydantic import BaseModel
|
|
from typing import List, Dict, Self
|
|
from bs4 import BeautifulSoup
|
|
import re
|
|
import feedparser
|
|
from tqdm import tqdm
|
|
import requests
|
|
import time
|
|
|
|
feeds = [
|
|
"https://thepointsguy.com/feed/",
|
|
]
|
|
|
|
def extract(html_snippet: str) -> str:
|
|
soup = BeautifulSoup(html_snippet, 'html.parser')
|
|
text = soup.get_text(strip=True)
|
|
text = re.sub('<[^<]+?>', '', text)
|
|
return text.replace('\n', ' ').strip()
|
|
|
|
class ScrapedTravelDeal:
|
|
title: str
|
|
summary: str
|
|
url: str
|
|
details: str
|
|
|
|
def __init__(self, entry: Dict[str, str]):
|
|
self.title = entry.get('title', '')
|
|
summary_text = entry.get('summary', entry.get('description', ''))
|
|
self.summary = extract(summary_text)
|
|
self.url = entry.get('link', '')
|
|
self.details = self.summary
|
|
|
|
def __repr__(self):
|
|
return f"<{self.title}>"
|
|
|
|
def describe(self):
|
|
return f"Title: {self.title}\nDetails: {self.details.strip()}\nURL: {self.url}"
|
|
|
|
@classmethod
|
|
def fetch(cls, show_progress: bool = False) -> List[Self]:
|
|
deals = []
|
|
feed_iter = tqdm(feeds) if show_progress else feeds
|
|
for feed_url in feed_iter:
|
|
try:
|
|
feed = feedparser.parse(feed_url)
|
|
for entry in feed.entries[:10]:
|
|
deals.append(cls(entry))
|
|
time.sleep(0.3)
|
|
except Exception as e:
|
|
print(f"Error fetching {feed_url}: {e}")
|
|
return deals
|
|
|
|
class TravelDeal(BaseModel):
|
|
destination: str
|
|
deal_type: str
|
|
description: str
|
|
price: float
|
|
url: str
|
|
|
|
class TravelDealSelection(BaseModel):
|
|
deals: List[TravelDeal]
|
|
|
|
class TravelOpportunity(BaseModel):
|
|
deal: TravelDeal
|
|
estimate: float
|
|
discount: float
|
|
|