Merge pull request #891 from iamumarjaved/WEEK8-DAY5

Week8 day5
This commit is contained in:
Ed Donner
2025-10-29 11:39:30 -04:00
committed by GitHub
17 changed files with 1389 additions and 0 deletions

View File

@@ -0,0 +1,33 @@
import logging
class Agent:
"""
An abstract superclass for Agents
Used to log messages in a way that can identify each Agent
"""
# Foreground colors
RED = '\033[31m'
GREEN = '\033[32m'
YELLOW = '\033[33m'
BLUE = '\033[34m'
MAGENTA = '\033[35m'
CYAN = '\033[36m'
WHITE = '\033[37m'
# Background color
BG_BLACK = '\033[40m'
# Reset code to return to default color
RESET = '\033[0m'
name: str = ""
color: str = '\033[37m'
def log(self, message):
"""
Log this as an info message, identifying the agent
"""
color_code = self.BG_BLACK + self.color
message = f"[{self.name}] {message}"
logging.info(color_code + message + self.RESET)

View File

@@ -0,0 +1,75 @@
import os
import re
import sys
from typing import List, Dict
from openai import OpenAI
from sentence_transformers import SentenceTransformer
w8d5_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
if w8d5_path not in sys.path:
sys.path.insert(0, w8d5_path)
from agents.agent import Agent
class TravelEstimatorAgent(Agent):
name = "Travel Estimator"
color = Agent.BLUE
MODEL = "gpt-4o-mini"
def __init__(self, collection):
self.log("Travel Estimator initializing")
self.client = OpenAI()
self.MODEL = "gpt-4o-mini"
self.log("Travel Estimator using OpenAI")
self.collection = collection
self.model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
self.log("Travel Estimator ready")
def make_context(self, similars: List[str], prices: List[float]) -> str:
message = "Here are similar travel deals for context:\n\n"
for similar, price in zip(similars, prices):
message += f"Similar deal:\n{similar}\nPrice: ${price:.2f}\n\n"
return message
def messages_for(self, description: str, similars: List[str], prices: List[float]) -> List[Dict[str, str]]:
system_message = "You estimate fair market prices for travel deals. Reply only with the price estimate, no explanation"
user_prompt = self.make_context(similars, prices)
user_prompt += "Now estimate the fair market price for:\n\n"
user_prompt += description
return [
{"role": "system", "content": system_message},
{"role": "user", "content": user_prompt},
{"role": "assistant", "content": "Fair price estimate: $"}
]
def find_similars(self, description: str):
self.log("Travel Estimator searching for similar deals")
vector = self.model.encode([description])
results = self.collection.query(query_embeddings=vector.astype(float).tolist(), n_results=5)
documents = results['documents'][0][:]
prices = [m['price'] for m in results['metadatas'][0][:]]
self.log("Travel Estimator found similar deals")
return documents, prices
def get_price(self, s) -> float:
s = s.replace('$','').replace(',','')
match = re.search(r"[-+]?\d*\.\d+|\d+", s)
return float(match.group()) if match else 0.0
def estimate(self, description: str) -> float:
documents, prices = self.find_similars(description)
self.log(f"Travel Estimator calling {self.MODEL}")
response = self.client.chat.completions.create(
model=self.MODEL,
messages=self.messages_for(description, documents, prices),
seed=42,
max_tokens=10
)
reply = response.choices[0].message.content
result = self.get_price(reply)
self.log(f"Travel Estimator complete - ${result:.2f}")
return result

View File

@@ -0,0 +1,48 @@
import os
import sys
import http.client
import urllib
w8d5_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
if w8d5_path not in sys.path:
sys.path.insert(0, w8d5_path)
from agents.agent import Agent
from helpers.travel_deals import TravelOpportunity
DO_PUSH = True
class TravelMessagingAgent(Agent):
name = "Travel Messenger"
color = Agent.WHITE
def __init__(self):
self.log("Travel Messenger initializing")
if DO_PUSH:
self.pushover_user = os.getenv('PUSHOVER_USER', 'your-pushover-user-if-not-using-env')
self.pushover_token = os.getenv('PUSHOVER_TOKEN', 'your-pushover-token-if-not-using-env')
self.log("Travel Messenger has initialized Pushover")
def push(self, text):
self.log("Travel Messenger sending push notification")
conn = http.client.HTTPSConnection("api.pushover.net:443")
conn.request("POST", "/1/messages.json",
urllib.parse.urlencode({
"token": self.pushover_token,
"user": self.pushover_user,
"message": text,
"sound": "cashregister"
}), { "Content-type": "application/x-www-form-urlencoded" })
conn.getresponse()
def alert(self, opportunity: TravelOpportunity):
text = f"Travel Deal! {opportunity.deal.destination} - "
text += f"Price=${opportunity.deal.price:.2f}, "
text += f"Est=${opportunity.estimate:.2f}, "
text += f"Save ${opportunity.discount:.2f}! "
text += opportunity.deal.url
if DO_PUSH:
self.push(text)
self.log("Travel Messenger completed")

View File

@@ -0,0 +1,57 @@
import os
import sys
from typing import Optional, List
w8d5_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
if w8d5_path not in sys.path:
sys.path.insert(0, w8d5_path)
from agents.agent import Agent
from helpers.travel_deals import TravelDeal, TravelOpportunity
from agents.travel_scanner_agent import TravelScannerAgent
from agents.travel_estimator_agent import TravelEstimatorAgent
from agents.travel_messaging_agent import TravelMessagingAgent
class TravelPlanningAgent(Agent):
name = "Travel Planner"
color = Agent.GREEN
DEAL_THRESHOLD = 50
def __init__(self, collection):
self.log("Travel Planner initializing")
self.scanner = TravelScannerAgent()
self.estimator = TravelEstimatorAgent(collection)
self.messenger = TravelMessagingAgent()
self.log("Travel Planner ready")
def evaluate(self, deal: TravelDeal) -> TravelOpportunity:
self.log(f"Travel Planner evaluating {deal.destination}")
estimate = self.estimator.estimate(deal.description)
discount = estimate - deal.price
self.log(f"Travel Planner found discount ${discount:.2f}")
return TravelOpportunity(deal=deal, estimate=estimate, discount=discount)
def plan(self, memory: List[str] = []) -> Optional[List[TravelOpportunity]]:
self.log("Travel Planner starting run")
selection = self.scanner.scan(memory=memory)
if selection and selection.deals:
opportunities = [self.evaluate(deal) for deal in selection.deals[:5]]
if not opportunities:
self.log("Travel Planner found no valid opportunities")
return None
opportunities.sort(key=lambda opp: opp.discount, reverse=True)
good_deals = [opp for opp in opportunities if opp.discount > self.DEAL_THRESHOLD]
if good_deals:
best = good_deals[0]
self.log(f"Travel Planner found {len(good_deals)} deals above threshold, best: ${best.discount:.2f} off")
self.messenger.alert(best)
self.log("Travel Planner completed")
return good_deals
else:
self.log(f"Travel Planner completed - no deals above ${self.DEAL_THRESHOLD} threshold")
return None
self.log("Travel Planner found no deals to evaluate")
return None

View File

@@ -0,0 +1,87 @@
import os
import sys
from typing import Optional, List
from openai import OpenAI
w8d5_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
if w8d5_path not in sys.path:
sys.path.insert(0, w8d5_path)
from agents.agent import Agent
from helpers.travel_deals import ScrapedTravelDeal, TravelDealSelection
class TravelScannerAgent(Agent):
MODEL = "gpt-4o-mini"
SYSTEM_PROMPT = """You identify and summarize the 5 most promising travel deals from a list.
Focus on deals with destinations, deal types (flight/hotel/package), and detailed descriptions.
If price is mentioned, extract it. If no specific price is given but there's a discount mentioned (e.g. "30% off"), estimate a reasonable price.
If absolutely no pricing information exists, use a placeholder price of 500.
Respond strictly in JSON with no explanation.
{"deals": [
{
"destination": "City or Country name",
"deal_type": "Flight, Hotel, or Package",
"description": "4-5 sentences describing the travel deal, dates, what's included, and key highlights",
"price": 499.99,
"url": "the url as provided"
},
...
]}"""
USER_PROMPT_PREFIX = """Respond with the 5 most promising travel deals with destinations, types, and descriptions.
Respond strictly in JSON. Provide detailed descriptions focusing on what travelers get.
Extract the destination and deal type (Flight/Hotel/Package) from the title and description.
For pricing: extract exact prices if available, estimate from percentage discounts, or use 500 as placeholder.
Travel Deals:
"""
USER_PROMPT_SUFFIX = "\n\nStrictly respond in JSON with exactly 5 deals."
name = "Travel Scanner"
color = Agent.CYAN
def __init__(self):
self.log("Travel Scanner is initializing")
self.openai = OpenAI()
self.log("Travel Scanner is ready")
def fetch_deals(self, memory) -> List[ScrapedTravelDeal]:
self.log("Travel Scanner fetching deals from RSS feeds")
urls = [opp.deal.url for opp in memory]
scraped = ScrapedTravelDeal.fetch()
result = [scrape for scrape in scraped if scrape.url not in urls]
self.log(f"Travel Scanner found {len(result)} new deals")
return result
def make_user_prompt(self, scraped) -> str:
user_prompt = self.USER_PROMPT_PREFIX
user_prompt += '\n\n'.join([scrape.describe() for scrape in scraped])
user_prompt += self.USER_PROMPT_SUFFIX
return user_prompt
def scan(self, memory: List[str]=[]) -> Optional[TravelDealSelection]:
scraped = self.fetch_deals(memory)
if scraped:
user_prompt = self.make_user_prompt(scraped)
self.log("Travel Scanner calling OpenAI")
result = self.openai.beta.chat.completions.parse(
model=self.MODEL,
messages=[
{"role": "system", "content": self.SYSTEM_PROMPT},
{"role": "user", "content": user_prompt}
],
response_format=TravelDealSelection
)
result = result.choices[0].message.parsed
valid_deals = [deal for deal in result.deals if deal.price > 0]
result.deals = valid_deals
self.log(f"Travel Scanner received {len(result.deals)} valid deals")
return result if result.deals else None
return None

View File

@@ -0,0 +1,73 @@
import os
import sys
import numpy as np
import joblib
from sentence_transformers import SentenceTransformer
import xgboost as xgb
w8d5_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
if w8d5_path not in sys.path:
sys.path.insert(0, w8d5_path)
from agents.agent import Agent
class TravelXGBoostAgent(Agent):
name = "XGBoost Estimator"
color = Agent.GREEN
def __init__(self, collection):
self.log("XGBoost Estimator initializing")
self.collection = collection
self.model_path = os.path.join(w8d5_path, 'helpers', 'travel_xgboost_model.pkl')
self.embedder = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
if os.path.exists(self.model_path):
self.log("Loading existing XGBoost model")
self.model = joblib.load(self.model_path)
else:
self.log("Training new XGBoost model")
self.model = self._train_model()
joblib.dump(self.model, self.model_path)
self.log(f"XGBoost model saved to {self.model_path}")
self.log("XGBoost Estimator ready")
def _train_model(self):
self.log("Fetching training data from ChromaDB")
result = self.collection.get(include=['embeddings', 'metadatas'])
X = np.array(result['embeddings'])
y = np.array([m['price'] for m in result['metadatas']])
self.log(f"Training on {len(X)} samples")
model = xgb.XGBRegressor(
n_estimators=100,
max_depth=6,
learning_rate=0.1,
subsample=0.8,
colsample_bytree=0.8,
random_state=42,
n_jobs=-1
)
model.fit(X, y)
self.log("XGBoost training complete")
return model
def estimate(self, description: str) -> float:
self.log(f"XGBoost estimating price for: {description[:50]}...")
embedding = self.embedder.encode([description])[0]
embedding_2d = embedding.reshape(1, -1)
prediction = self.model.predict(embedding_2d)[0]
prediction = max(0, prediction)
self.log(f"XGBoost estimate: ${prediction:.2f}")
return float(prediction)

View File

@@ -0,0 +1,230 @@
import os
import random
from dotenv import load_dotenv
from huggingface_hub import login
from sentence_transformers import SentenceTransformer
import chromadb
from tqdm import tqdm
load_dotenv(override=True)
os.environ['HF_TOKEN'] = os.getenv('HF_TOKEN', 'your-key-if-not-using-env')
hf_token = os.environ['HF_TOKEN']
login(hf_token, add_to_git_credential=True)
DB = "travel_vectorstore"
CATEGORIES = ['Flights', 'Hotels', 'Car_Rentals', 'Vacation_Packages', 'Cruises', 'Activities']
AIRLINES = ['American Airlines', 'Delta', 'United', 'Southwest', 'JetBlue', 'Spirit', 'Frontier', 'Alaska Airlines', 'Emirates', 'British Airways', 'Air France', 'Lufthansa', 'Qatar Airways']
CITIES = ['New York', 'Los Angeles', 'Chicago', 'Houston', 'Miami', 'San Francisco', 'Boston', 'Seattle', 'Denver', 'Atlanta', 'Las Vegas', 'Orlando', 'Phoenix', 'London', 'Paris', 'Tokyo', 'Dubai', 'Singapore', 'Sydney', 'Rome']
HOTELS = ['Hilton', 'Marriott', 'Hyatt', 'Holiday Inn', 'Best Western', 'Sheraton', 'Ritz-Carlton', 'Four Seasons', 'Westin', 'Radisson']
CLASSES = ['Economy', 'Premium Economy', 'Business', 'First Class']
CAR_COMPANIES = ['Hertz', 'Enterprise', 'Avis', 'Budget', 'National', 'Alamo']
CAR_TYPES = ['Compact', 'Sedan', 'SUV', 'Luxury', 'Van']
def generate_flight_description():
airline = random.choice(AIRLINES)
source = random.choice(CITIES)
dest = random.choice([c for c in CITIES if c != source])
flight_class = random.choice(CLASSES)
stops = random.choice(['non-stop', 'one-stop', 'two-stops'])
duration = f"{random.randint(1, 15)} hours {random.randint(0, 59)} minutes"
description = f"{airline} {flight_class} {stops} flight from {source} to {dest}. "
description += f"Flight duration approximately {duration}. "
if random.random() > 0.5:
description += f"Includes {random.randint(1, 2)} checked bag"
if random.random() > 0.5:
description += "s"
description += ". "
if flight_class in ['Business', 'First Class']:
description += random.choice(['Priority boarding included. ', 'Lounge access available. ', 'Lie-flat seats. '])
price = random.randint(150, 2500) if flight_class == 'Economy' else random.randint(800, 8000)
return description, price
def generate_hotel_description():
hotel = random.choice(HOTELS)
city = random.choice(CITIES)
stars = random.randint(2, 5)
room_type = random.choice(['Standard Room', 'Deluxe Room', 'Suite', 'Executive Suite'])
nights = random.randint(1, 7)
description = f"{hotel} {stars}-star hotel in {city}. {room_type} for {nights} night"
if nights > 1:
description += "s"
description += ". "
amenities = []
if random.random() > 0.3:
amenities.append('Free WiFi')
if random.random() > 0.5:
amenities.append('Breakfast included')
if random.random() > 0.6:
amenities.append('Pool access')
if random.random() > 0.7:
amenities.append('Gym')
if random.random() > 0.8:
amenities.append('Spa services')
if amenities:
description += f"Amenities: {', '.join(amenities)}. "
price_per_night = random.randint(80, 500) if stars <= 3 else random.randint(200, 1200)
total_price = price_per_night * nights
return description, total_price
def generate_car_rental_description():
company = random.choice(CAR_COMPANIES)
car_type = random.choice(CAR_TYPES)
city = random.choice(CITIES)
days = random.randint(1, 14)
description = f"{company} car rental in {city}. {car_type} class vehicle for {days} day"
if days > 1:
description += "s"
description += ". "
if random.random() > 0.6:
description += "Unlimited mileage included. "
if random.random() > 0.5:
description += "Airport pickup available. "
if random.random() > 0.7:
description += "GPS navigation included. "
daily_rate = {'Compact': random.randint(25, 45), 'Sedan': random.randint(35, 65), 'SUV': random.randint(50, 90), 'Luxury': random.randint(80, 200), 'Van': random.randint(60, 100)}
total_price = daily_rate[car_type] * days
return description, total_price
def generate_vacation_package_description():
city = random.choice(CITIES)
nights = random.randint(3, 10)
description = f"All-inclusive vacation package to {city} for {nights} nights. "
description += f"Includes round-trip {random.choice(CLASSES)} flights, {random.choice(HOTELS)} hotel accommodation, "
extras = []
if random.random() > 0.3:
extras.append('daily breakfast')
if random.random() > 0.5:
extras.append('airport transfers')
if random.random() > 0.6:
extras.append('city tour')
if random.random() > 0.7:
extras.append('travel insurance')
if extras:
description += f"and {', '.join(extras)}. "
base_price = random.randint(800, 4000)
return description, base_price
def generate_cruise_description():
destinations = [', '.join(random.sample(['Caribbean', 'Mediterranean', 'Alaska', 'Hawaii', 'Baltic Sea', 'South Pacific'], k=random.randint(2, 4)))]
nights = random.choice([3, 5, 7, 10, 14])
description = f"{nights}-night cruise visiting {destinations[0]}. "
description += f"All meals and entertainment included. "
cabin_type = random.choice(['Interior cabin', 'Ocean view cabin', 'Balcony cabin', 'Suite'])
description += f"{cabin_type}. "
if random.random() > 0.5:
description += "Unlimited beverage package available. "
if random.random() > 0.6:
description += "Shore excursions at each port. "
base_price = random.randint(500, 5000)
return description, base_price
def generate_activity_description():
city = random.choice(CITIES)
activities = ['City sightseeing tour', 'Museum pass', 'Adventure sports package', 'Wine tasting tour', 'Cooking class', 'Hot air balloon ride', 'Snorkeling excursion', 'Helicopter tour', 'Spa day package', 'Theme park tickets']
activity = random.choice(activities)
description = f"{activity} in {city}. "
if 'tour' in activity.lower():
description += f"Duration: {random.randint(2, 8)} hours. "
if random.random() > 0.5:
description += "Hotel pickup included. "
if random.random() > 0.6:
description += "Small group experience. "
price = random.randint(30, 500)
return description, price
GENERATORS = {
'Flights': generate_flight_description,
'Hotels': generate_hotel_description,
'Car_Rentals': generate_car_rental_description,
'Vacation_Packages': generate_vacation_package_description,
'Cruises': generate_cruise_description,
'Activities': generate_activity_description
}
print("Generating synthetic travel dataset...")
travel_data = []
items_per_category = 3334
for category in CATEGORIES:
print(f"Generating {category}...")
generator = GENERATORS[category]
for _ in range(items_per_category):
description, price = generator()
travel_data.append((description, float(price), category))
random.shuffle(travel_data)
print(f"Generated {len(travel_data)} travel deals")
print("\nInitializing SentenceTransformer model...")
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
print(f"Connecting to ChromaDB at {DB}...")
client = chromadb.PersistentClient(path=DB)
collection_name = "travel_deals"
existing_collections = [col.name for col in client.list_collections()]
if collection_name in existing_collections:
client.delete_collection(collection_name)
print(f"Deleted existing collection: {collection_name}")
collection = client.create_collection(collection_name)
print(f"Created new collection: {collection_name}")
print("\nCreating embeddings and adding to ChromaDB...")
for i in tqdm(range(0, len(travel_data), 1000)):
batch = travel_data[i:i+1000]
documents = [desc for desc, _, _ in batch]
vectors = model.encode(documents).astype(float).tolist()
metadatas = [{"category": cat, "price": price} for _, price, cat in batch]
ids = [f"travel_{j}" for j in range(i, i+len(batch))]
collection.add(
ids=ids,
documents=documents,
embeddings=vectors,
metadatas=metadatas
)
total_items = collection.count()
print(f"\nVectorstore created successfully with {total_items} travel deals")
result = collection.get(include=['metadatas'], limit=total_items)
categories = [m['category'] for m in result['metadatas']]
prices = [m['price'] for m in result['metadatas']]
category_counts = {}
for cat in categories:
category_counts[cat] = category_counts.get(cat, 0) + 1
print("\nCategory distribution:")
for category, count in sorted(category_counts.items()):
print(f" {category}: {count}")
avg_price = sum(prices) / len(prices) if prices else 0
print(f"\nAverage price: ${avg_price:.2f}")
print(f"Price range: ${min(prices):.2f} - ${max(prices):.2f}")

View File

@@ -0,0 +1,99 @@
import os
import sys
import logging
import json
from typing import List, Optional
from dotenv import load_dotenv
import chromadb
import numpy as np
from sklearn.manifold import TSNE
w8d5_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
if w8d5_path not in sys.path:
sys.path.insert(0, w8d5_path)
from agents.travel_planning_agent import TravelPlanningAgent
from helpers.travel_deals import TravelOpportunity
BG_BLUE = '\033[44m'
WHITE = '\033[37m'
RESET = '\033[0m'
CATEGORIES = ['Flights', 'Hotels', 'Car_Rentals', 'Vacation_Packages', 'Cruises', 'Activities']
COLORS = ['red', 'blue', 'green', 'orange', 'purple', 'cyan']
def init_logging():
root = logging.getLogger()
root.setLevel(logging.INFO)
handler = logging.StreamHandler(sys.stdout)
handler.setLevel(logging.INFO)
formatter = logging.Formatter(
"[%(asctime)s] [Travel Agents] [%(levelname)s] %(message)s",
datefmt="%Y-%m-%d %H:%M:%S %z",
)
handler.setFormatter(formatter)
root.addHandler(handler)
class TravelDealFramework:
DB = "travel_vectorstore"
MEMORY_FILENAME = "travel_memory.json"
def __init__(self):
init_logging()
load_dotenv()
client = chromadb.PersistentClient(path=self.DB)
self.memory = self.read_memory()
self.collection = client.get_or_create_collection('travel_deals')
self.planner = None
def init_agents_as_needed(self):
if not self.planner:
self.log("Initializing Travel Agent Framework")
self.planner = TravelPlanningAgent(self.collection)
self.log("Travel Agent Framework ready")
def read_memory(self) -> List[TravelOpportunity]:
if os.path.exists(self.MEMORY_FILENAME):
with open(self.MEMORY_FILENAME, "r") as file:
data = json.load(file)
opportunities = [TravelOpportunity(**item) for item in data]
return opportunities
return []
def write_memory(self) -> None:
data = [opportunity.dict() for opportunity in self.memory]
with open(self.MEMORY_FILENAME, "w") as file:
json.dump(data, file, indent=2)
def log(self, message: str):
text = BG_BLUE + WHITE + "[Travel Framework] " + message + RESET
logging.info(text)
def run(self) -> List[TravelOpportunity]:
self.init_agents_as_needed()
logging.info("Starting Travel Planning Agent")
results = self.planner.plan(memory=self.memory)
logging.info(f"Travel Planning Agent completed with {len(results) if results else 0} results")
if results:
self.memory.extend(results)
self.write_memory()
return self.memory
@classmethod
def get_plot_data(cls, max_datapoints=10000):
client = chromadb.PersistentClient(path=cls.DB)
collection = client.get_or_create_collection('travel_deals')
result = collection.get(include=['embeddings', 'documents', 'metadatas'], limit=max_datapoints)
vectors = np.array(result['embeddings'])
documents = result['documents']
categories = [metadata['category'] for metadata in result['metadatas']]
colors = [COLORS[CATEGORIES.index(c)] for c in categories]
tsne = TSNE(n_components=3, random_state=42, n_jobs=-1)
reduced_vectors = tsne.fit_transform(vectors)
return documents, reduced_vectors, colors
if __name__=="__main__":
TravelDealFramework().run()

View File

@@ -0,0 +1,67 @@
from pydantic import BaseModel
from typing import List, Dict, Self
from bs4 import BeautifulSoup
import re
import feedparser
from tqdm import tqdm
import requests
import time
feeds = [
"https://thepointsguy.com/feed/",
]
def extract(html_snippet: str) -> str:
soup = BeautifulSoup(html_snippet, 'html.parser')
text = soup.get_text(strip=True)
text = re.sub('<[^<]+?>', '', text)
return text.replace('\n', ' ').strip()
class ScrapedTravelDeal:
title: str
summary: str
url: str
details: str
def __init__(self, entry: Dict[str, str]):
self.title = entry.get('title', '')
summary_text = entry.get('summary', entry.get('description', ''))
self.summary = extract(summary_text)
self.url = entry.get('link', '')
self.details = self.summary
def __repr__(self):
return f"<{self.title}>"
def describe(self):
return f"Title: {self.title}\nDetails: {self.details.strip()}\nURL: {self.url}"
@classmethod
def fetch(cls, show_progress: bool = False) -> List[Self]:
deals = []
feed_iter = tqdm(feeds) if show_progress else feeds
for feed_url in feed_iter:
try:
feed = feedparser.parse(feed_url)
for entry in feed.entries[:10]:
deals.append(cls(entry))
time.sleep(0.3)
except Exception as e:
print(f"Error fetching {feed_url}: {e}")
return deals
class TravelDeal(BaseModel):
destination: str
deal_type: str
description: str
price: float
url: str
class TravelDealSelection(BaseModel):
deals: List[TravelDeal]
class TravelOpportunity(BaseModel):
deal: TravelDeal
estimate: float
discount: float

View File

@@ -0,0 +1,161 @@
import os
import sys
import logging
import json
from typing import List, Tuple
from dotenv import load_dotenv
import chromadb
import numpy as np
from sklearn.manifold import TSNE
w8d5_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
if w8d5_path not in sys.path:
sys.path.insert(0, w8d5_path)
from agents.travel_scanner_agent import TravelScannerAgent
from agents.travel_estimator_agent import TravelEstimatorAgent
from agents.travel_xgboost_agent import TravelXGBoostAgent
from agents.travel_messaging_agent import TravelMessagingAgent
from helpers.travel_deals import TravelOpportunity, TravelDeal
BG_BLUE = '\033[44m'
WHITE = '\033[37m'
RESET = '\033[0m'
CATEGORIES = ['Flights', 'Hotels', 'Car_Rentals', 'Vacation_Packages', 'Cruises', 'Activities']
COLORS = ['red', 'blue', 'green', 'orange', 'purple', 'cyan']
def init_logging():
root = logging.getLogger()
root.setLevel(logging.INFO)
handler = logging.StreamHandler(sys.stdout)
handler.setLevel(logging.INFO)
formatter = logging.Formatter(
"[%(asctime)s] [Travel Agents] [%(levelname)s] %(message)s",
datefmt="%Y-%m-%d %H:%M:%S %z",
)
handler.setFormatter(formatter)
root.addHandler(handler)
class TravelDualFramework:
DB = "travel_vectorstore"
LLM_MEMORY_FILE = "travel_memory_llm.json"
XGB_MEMORY_FILE = "travel_memory_xgb.json"
DEAL_THRESHOLD = 200.0
def __init__(self):
init_logging()
load_dotenv()
client = chromadb.PersistentClient(path=self.DB)
self.collection = client.get_or_create_collection('travel_deals')
self.llm_memory = self.read_memory(self.LLM_MEMORY_FILE)
self.xgb_memory = self.read_memory(self.XGB_MEMORY_FILE)
self.scanner = None
self.llm_estimator = None
self.xgb_estimator = None
self.messenger = None
def init_agents_as_needed(self):
if not self.scanner:
self.log("Initializing Travel Dual Estimation Framework")
self.scanner = TravelScannerAgent()
self.llm_estimator = TravelEstimatorAgent(self.collection)
self.xgb_estimator = TravelXGBoostAgent(self.collection)
self.messenger = TravelMessagingAgent()
self.log("Travel Dual Framework ready")
def read_memory(self, filename: str) -> List[TravelOpportunity]:
if os.path.exists(filename):
with open(filename, "r") as file:
data = json.load(file)
opportunities = [TravelOpportunity(**item) for item in data]
return opportunities
return []
def write_memory(self, opportunities: List[TravelOpportunity], filename: str) -> None:
data = [opportunity.dict() for opportunity in opportunities]
with open(filename, "w") as file:
json.dump(data, file, indent=2)
def log(self, message: str):
text = BG_BLUE + WHITE + "[Dual Framework] " + message + RESET
logging.info(text)
def run(self) -> Tuple[List[TravelOpportunity], List[TravelOpportunity]]:
self.init_agents_as_needed()
self.log("Starting dual estimation scan")
deal_selection = self.scanner.scan()
if not deal_selection or not deal_selection.deals:
self.log("No deals found")
return self.llm_memory, self.xgb_memory
deals = deal_selection.deals
self.log(f"Processing {len(deals)} deals with both estimators")
llm_opportunities = []
xgb_opportunities = []
for deal in deals:
llm_estimate = self.llm_estimator.estimate(deal.description)
llm_discount = llm_estimate - deal.price
if llm_discount >= self.DEAL_THRESHOLD:
llm_opp = TravelOpportunity(
deal=deal,
estimate=llm_estimate,
discount=llm_discount
)
llm_opportunities.append(llm_opp)
self.log(f"LLM found opportunity: {deal.destination} - ${llm_discount:.0f} savings")
self.messenger.alert(llm_opp)
xgb_estimate = self.xgb_estimator.estimate(deal.description)
xgb_discount = xgb_estimate - deal.price
if xgb_discount >= self.DEAL_THRESHOLD:
xgb_opp = TravelOpportunity(
deal=deal,
estimate=xgb_estimate,
discount=xgb_discount
)
xgb_opportunities.append(xgb_opp)
self.log(f"XGBoost found opportunity: {deal.destination} - ${xgb_discount:.0f} savings")
self.messenger.alert(xgb_opp)
if llm_opportunities:
self.llm_memory.extend(llm_opportunities)
self.write_memory(self.llm_memory, self.LLM_MEMORY_FILE)
if xgb_opportunities:
self.xgb_memory.extend(xgb_opportunities)
self.write_memory(self.xgb_memory, self.XGB_MEMORY_FILE)
self.log(f"Scan complete: {len(llm_opportunities)} LLM, {len(xgb_opportunities)} XGBoost opportunities")
return self.llm_memory, self.xgb_memory
@classmethod
def get_plot_data(cls, max_datapoints=10000):
client = chromadb.PersistentClient(path=cls.DB)
collection = client.get_or_create_collection('travel_deals')
result = collection.get(include=['embeddings', 'documents', 'metadatas'], limit=max_datapoints)
vectors = np.array(result['embeddings'])
documents = result['documents']
categories = [metadata['category'] for metadata in result['metadatas']]
colors = [COLORS[CATEGORIES.index(c)] for c in categories]
tsne = TSNE(n_components=3, random_state=42, n_jobs=-1)
reduced_vectors = tsne.fit_transform(vectors)
return documents, reduced_vectors, colors, categories
if __name__=="__main__":
framework = TravelDualFramework()
framework.run()

View File

@@ -0,0 +1,66 @@
import os
import sys
from dotenv import load_dotenv
project_root = os.path.join(os.path.dirname(__file__), '..')
sys.path.insert(0, project_root)
sys.path.insert(0, os.path.join(project_root, '..', '..'))
from helpers.travel_deals import ScrapedTravelDeal
from agents.travel_scanner_agent import TravelScannerAgent
from agents.travel_estimator_agent import TravelEstimatorAgent
load_dotenv()
print("\nTesting Travel Deal Hunter Components\n")
print("1. RSS Feed Scraping")
deals = ScrapedTravelDeal.fetch(show_progress=False)
print(f"Fetched {len(deals)} deals from RSS feeds")
if deals:
print(f"Sample: {deals[0].title[:60]}...")
print("\n2. OpenAI Connection")
if os.getenv("OPENAI_API_KEY"):
print("OPENAI_API_KEY found")
else:
print("OPENAI_API_KEY not found - set in .env file")
print("\n3. Scanner Agent")
scanner = TravelScannerAgent()
print("Scanner agent initialized")
print("\n4. Deal Scanning")
try:
selection = scanner.scan(memory=[])
if selection and selection.deals:
print(f"Scanner found {len(selection.deals)} processed deals")
print(f"Sample: {selection.deals[0].destination} - ${selection.deals[0].price}")
else:
print("No deals returned")
except Exception as e:
print(f"Error: {e}")
print("\n5. ChromaDB Access")
import chromadb
try:
db_path = "travel_vectorstore"
client = chromadb.PersistentClient(path=db_path)
collection = client.get_or_create_collection('travel_deals')
count = collection.count()
print(f"ChromaDB connected - {count} travel items in collection")
except Exception as e:
print(f"Error: {e}")
print("\n6. Estimator Check using travel vectorstore")
try:
estimator = TravelEstimatorAgent(collection)
sample = "Non-stop economy flight from New York to London, duration 7 hours"
estimate = estimator.estimate(sample)
print(f"Estimate: ${estimate:.2f}")
except Exception as e:
print(f"Error: {e}")
print("\nComponent tests complete")

View File

@@ -0,0 +1,49 @@
import os
import sys
from dotenv import load_dotenv
project_root = os.path.join(os.path.dirname(__file__), '..')
sys.path.insert(0, project_root)
sys.path.insert(0, os.path.join(project_root, '..', '..'))
from agents.travel_estimator_agent import TravelEstimatorAgent
from agents.travel_xgboost_agent import TravelXGBoostAgent
import chromadb
load_dotenv()
print("\nTesting Dual Estimation (LLM vs XGBoost)\n")
client = chromadb.PersistentClient(path='travel_vectorstore')
collection = client.get_collection('travel_deals')
print("Initializing agents...")
llm_agent = TravelEstimatorAgent(collection)
xgb_agent = TravelXGBoostAgent(collection)
test_cases = [
"Round trip flight from New York to London, Economy class, non-stop",
"5-star Marriott hotel in Paris, 3 nights, Suite with breakfast included",
"7-night Caribbean cruise, Balcony cabin, all meals included",
"Hertz SUV rental in Los Angeles for 5 days with unlimited mileage",
"All-inclusive vacation package to Dubai for 7 nights with Business class flights"
]
print("\n" + "="*80)
print(f"{'Travel Deal Description':<60} {'LLM Est.':<12} {'XGB Est.':<12}")
print("="*80)
for desc in test_cases:
llm_est = llm_agent.estimate(desc)
xgb_est = xgb_agent.estimate(desc)
short_desc = desc[:57] + "..." if len(desc) > 60 else desc
print(f"{short_desc:<60} ${llm_est:>9.2f} ${xgb_est:>9.2f}")
print("="*80)
print("\nDual estimation test complete!")
print("\nKey Observations:")
print("- LLM: Uses semantic understanding + RAG context")
print("- XGBoost: Uses pattern recognition from embeddings")
print("- Both trained on same 20K travel deals dataset")

View File

@@ -0,0 +1,38 @@
import os
import sys
from dotenv import load_dotenv
project_root = os.path.join(os.path.dirname(__file__), '..')
sys.path.insert(0, project_root)
sys.path.insert(0, os.path.join(project_root, '..', '..'))
from helpers.travel_deal_framework import TravelDealFramework
load_dotenv()
print("\nTesting Full Travel Deal Pipeline\n")
print("Initializing framework...")
framework = TravelDealFramework()
framework.init_agents_as_needed()
print("\nRunning one iteration...")
try:
result = framework.run()
print(f"\nPipeline completed")
print(f"Memory now has {len(result)} opportunities")
if result:
latest = result[-1]
print(f"\nLatest opportunity:")
print(f" Destination: {latest.deal.destination}")
print(f" Type: {latest.deal.deal_type}")
print(f" Price: ${latest.deal.price:.2f}")
print(f" Estimate: ${latest.estimate:.2f}")
print(f" Discount: ${latest.discount:.2f}")
except Exception as e:
print(f"\nError during pipeline: {e}")
import traceback
traceback.print_exc()
print("\n")

View File

@@ -0,0 +1,306 @@
import os
import sys
import logging
import queue
import threading
import time
import gradio as gr
import plotly.graph_objects as go
w8d5_path = os.path.abspath(os.path.dirname(__file__))
week8_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..'))
if w8d5_path not in sys.path:
sys.path.insert(0, w8d5_path)
if week8_path not in sys.path:
sys.path.insert(0, week8_path)
from log_utils import reformat
from helpers.travel_dual_framework import TravelDualFramework
from helpers.travel_deals import TravelOpportunity, TravelDeal
class QueueHandler(logging.Handler):
def __init__(self, log_queue):
super().__init__()
self.log_queue = log_queue
def emit(self, record):
self.log_queue.put(self.format(record))
log_queue = queue.Queue()
queue_handler = QueueHandler(log_queue)
queue_handler.setFormatter(
logging.Formatter(
"[%(asctime)s] [%(levelname)s] %(message)s",
datefmt="%Y-%m-%d %H:%M:%S"
)
)
logging.getLogger().addHandler(queue_handler)
logging.getLogger().setLevel(logging.INFO)
agent_framework = TravelDualFramework()
agent_framework.init_agents_as_needed()
CHECK_INTERVAL = 300
def run_agent_framework():
while True:
try:
agent_framework.run()
except Exception as e:
logging.error(f"Error in agent framework: {e}")
time.sleep(CHECK_INTERVAL)
framework_thread = threading.Thread(target=run_agent_framework, daemon=True)
framework_thread.start()
def get_llm_table(llm_opps):
return [[
opp.deal.destination,
opp.deal.deal_type,
f"${opp.deal.price:.2f}",
f"${opp.estimate:.2f}",
f"${opp.discount:.2f}",
opp.deal.url[:50] + "..." if len(opp.deal.url) > 50 else opp.deal.url
] for opp in llm_opps]
def get_xgb_table(xgb_opps):
return [[
opp.deal.destination,
opp.deal.deal_type,
f"${opp.deal.price:.2f}",
f"${opp.estimate:.2f}",
f"${opp.discount:.2f}",
opp.deal.url[:50] + "..." if len(opp.deal.url) > 50 else opp.deal.url
] for opp in xgb_opps]
log_data = []
def update_ui():
global log_data
llm_data = get_llm_table(agent_framework.llm_memory)
xgb_data = get_xgb_table(agent_framework.xgb_memory)
while not log_queue.empty():
try:
message = log_queue.get_nowait()
log_data.append(reformat(message))
except:
break
logs_html = '<div style="height: 500px; overflow-y: auto; border: 1px solid #ccc; background-color: #1a1a1a; padding: 10px; font-family: monospace; font-size: 12px; color: #fff;">'
logs_html += '<br>'.join(log_data[-50:])
logs_html += '</div>'
llm_count = len(agent_framework.llm_memory)
xgb_count = len(agent_framework.xgb_memory)
stats = f"LLM Opportunities: {llm_count} | XGBoost Opportunities: {xgb_count}"
return llm_data, xgb_data, logs_html, stats
def create_3d_plot():
try:
documents, vectors, colors, categories = TravelDualFramework.get_plot_data(max_datapoints=5000)
if len(vectors) == 0:
fig = go.Figure()
fig.add_annotation(
text="No data available yet. Vectorstore will load after initialization.",
xref="paper", yref="paper",
x=0.5, y=0.5, showarrow=False,
font=dict(size=16)
)
return fig
fig = go.Figure()
unique_categories = list(set(categories))
category_colors = {cat: colors[categories.index(cat)] for cat in unique_categories}
for category in unique_categories:
mask = [cat == category for cat in categories]
cat_vectors = vectors[mask]
fig.add_trace(go.Scatter3d(
x=cat_vectors[:, 0],
y=cat_vectors[:, 1],
z=cat_vectors[:, 2],
mode='markers',
marker=dict(
size=3,
color=category_colors[category],
opacity=0.6
),
name=category.replace('_', ' '),
hovertemplate='<b>%{text}</b><extra></extra>',
text=[category] * len(cat_vectors)
))
fig.update_layout(
title={
'text': f'3D Travel Vectorstore Visualization ({len(vectors):,} deals)',
'x': 0.5,
'xanchor': 'center'
},
scene=dict(
xaxis_title='Dimension 1',
yaxis_title='Dimension 2',
zaxis_title='Dimension 3',
camera=dict(
eye=dict(x=1.5, y=1.5, z=1.5)
)
),
width=1200,
height=600,
margin=dict(r=0, b=0, l=0, t=40),
showlegend=True,
legend=dict(
yanchor="top",
y=0.99,
xanchor="left",
x=0.01
)
)
return fig
except Exception as e:
logging.error(f"Error creating 3D plot: {e}")
fig = go.Figure()
fig.add_annotation(
text=f"Error loading plot: {str(e)}",
xref="paper", yref="paper",
x=0.5, y=0.5, showarrow=False,
font=dict(size=14, color="red")
)
return fig
with gr.Blocks(title="Travel Deal Hunter - Dual Estimation", fill_width=True, theme=gr.themes.Soft()) as ui:
gr.Markdown(
"""
<div style="text-align: center;">
<h1 style="margin-bottom: 10px;">Travel Deal Hunter - Dual Estimation System</h1>
<p style="color: #666; font-size: 16px;">
Comparing LLM-based Semantic Estimation vs XGBoost Machine Learning
</p>
<p style="color: #999; font-size: 14px; margin-top: 10px;">
System scans RSS feeds every 5 minutes. Use the button below to trigger a manual scan.
</p>
</div>
"""
)
with gr.Row():
with gr.Column(scale=3):
stats_display = gr.Textbox(
label="",
value="LLM Opportunities: 0 | XGBoost Opportunities: 0",
interactive=False,
show_label=False,
container=False
)
with gr.Column(scale=1):
scan_button = gr.Button("Scan Now", variant="primary")
with gr.Row():
with gr.Column(scale=1):
gr.Markdown("### LLM Estimates")
llm_dataframe = gr.Dataframe(
headers=["Destination", "Type", "Price", "LLM Est.", "Savings", "URL"],
datatype=["str", "str", "str", "str", "str", "str"],
wrap=True,
column_widths=[2, 1, 1, 1, 1, 2],
row_count=5,
col_count=6,
interactive=False
)
with gr.Column(scale=1):
gr.Markdown("### XGBoost Estimates")
xgb_dataframe = gr.Dataframe(
headers=["Destination", "Type", "Price", "XGB Est.", "Savings", "URL"],
datatype=["str", "str", "str", "str", "str", "str"],
wrap=True,
column_widths=[2, 1, 1, 1, 1, 2],
row_count=5,
col_count=6,
interactive=False
)
with gr.Row():
with gr.Column(scale=2):
plot_output = gr.Plot(label="3D Travel Vectorstore Visualization")
with gr.Column(scale=1):
gr.Markdown("### Agent Activity Logs")
log_output = gr.HTML(
value='<div style="height: 500px; overflow-y: auto; border: 1px solid #ccc; background-color: #1a1a1a; padding: 10px; font-family: monospace; font-size: 12px; color: #fff;"></div>'
)
ui.load(
fn=lambda: (
get_llm_table(agent_framework.llm_memory),
get_xgb_table(agent_framework.xgb_memory),
"",
f"LLM Opportunities: {len(agent_framework.llm_memory)} | XGBoost Opportunities: {len(agent_framework.xgb_memory)}",
create_3d_plot()
),
outputs=[llm_dataframe, xgb_dataframe, log_output, stats_display, plot_output]
)
# Manual scan button
def manual_scan():
try:
agent_framework.run()
return update_ui()
except Exception as e:
logging.error(f"Manual scan error: {e}")
return update_ui()
scan_button.click(
fn=manual_scan,
outputs=[llm_dataframe, xgb_dataframe, log_output, stats_display]
)
# Click handlers for notifications
def llm_click_handler(selected_index: gr.SelectData):
try:
row = selected_index.index[0]
if row < len(agent_framework.llm_memory):
opportunity = agent_framework.llm_memory[row]
agent_framework.messenger.alert(opportunity)
logging.info(f"Manual alert sent for LLM opportunity: {opportunity.deal.destination}")
except Exception as e:
logging.error(f"Error sending LLM notification: {e}")
def xgb_click_handler(selected_index: gr.SelectData):
try:
row = selected_index.index[0]
if row < len(agent_framework.xgb_memory):
opportunity = agent_framework.xgb_memory[row]
agent_framework.messenger.alert(opportunity)
logging.info(f"Manual alert sent for XGBoost opportunity: {opportunity.deal.destination}")
except Exception as e:
logging.error(f"Error sending XGBoost notification: {e}")
llm_dataframe.select(fn=llm_click_handler)
xgb_dataframe.select(fn=xgb_click_handler)
gr.Timer(5).tick(
fn=update_ui,
outputs=[llm_dataframe, xgb_dataframe, log_output, stats_display]
)
if __name__ == "__main__":
ui.launch(inbrowser=True, share=False)