33
week8/community_contributions/w8d5/agents/agent.py
Normal file
33
week8/community_contributions/w8d5/agents/agent.py
Normal file
@@ -0,0 +1,33 @@
|
||||
import logging
|
||||
|
||||
class Agent:
|
||||
"""
|
||||
An abstract superclass for Agents
|
||||
Used to log messages in a way that can identify each Agent
|
||||
"""
|
||||
|
||||
# Foreground colors
|
||||
RED = '\033[31m'
|
||||
GREEN = '\033[32m'
|
||||
YELLOW = '\033[33m'
|
||||
BLUE = '\033[34m'
|
||||
MAGENTA = '\033[35m'
|
||||
CYAN = '\033[36m'
|
||||
WHITE = '\033[37m'
|
||||
|
||||
# Background color
|
||||
BG_BLACK = '\033[40m'
|
||||
|
||||
# Reset code to return to default color
|
||||
RESET = '\033[0m'
|
||||
|
||||
name: str = ""
|
||||
color: str = '\033[37m'
|
||||
|
||||
def log(self, message):
|
||||
"""
|
||||
Log this as an info message, identifying the agent
|
||||
"""
|
||||
color_code = self.BG_BLACK + self.color
|
||||
message = f"[{self.name}] {message}"
|
||||
logging.info(color_code + message + self.RESET)
|
||||
@@ -0,0 +1,75 @@
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
from typing import List, Dict
|
||||
from openai import OpenAI
|
||||
from sentence_transformers import SentenceTransformer
|
||||
|
||||
w8d5_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
|
||||
if w8d5_path not in sys.path:
|
||||
sys.path.insert(0, w8d5_path)
|
||||
|
||||
from agents.agent import Agent
|
||||
|
||||
|
||||
class TravelEstimatorAgent(Agent):
|
||||
|
||||
name = "Travel Estimator"
|
||||
color = Agent.BLUE
|
||||
|
||||
MODEL = "gpt-4o-mini"
|
||||
|
||||
def __init__(self, collection):
|
||||
self.log("Travel Estimator initializing")
|
||||
self.client = OpenAI()
|
||||
self.MODEL = "gpt-4o-mini"
|
||||
self.log("Travel Estimator using OpenAI")
|
||||
self.collection = collection
|
||||
self.model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
|
||||
self.log("Travel Estimator ready")
|
||||
|
||||
def make_context(self, similars: List[str], prices: List[float]) -> str:
|
||||
message = "Here are similar travel deals for context:\n\n"
|
||||
for similar, price in zip(similars, prices):
|
||||
message += f"Similar deal:\n{similar}\nPrice: ${price:.2f}\n\n"
|
||||
return message
|
||||
|
||||
def messages_for(self, description: str, similars: List[str], prices: List[float]) -> List[Dict[str, str]]:
|
||||
system_message = "You estimate fair market prices for travel deals. Reply only with the price estimate, no explanation"
|
||||
user_prompt = self.make_context(similars, prices)
|
||||
user_prompt += "Now estimate the fair market price for:\n\n"
|
||||
user_prompt += description
|
||||
return [
|
||||
{"role": "system", "content": system_message},
|
||||
{"role": "user", "content": user_prompt},
|
||||
{"role": "assistant", "content": "Fair price estimate: $"}
|
||||
]
|
||||
|
||||
def find_similars(self, description: str):
|
||||
self.log("Travel Estimator searching for similar deals")
|
||||
vector = self.model.encode([description])
|
||||
results = self.collection.query(query_embeddings=vector.astype(float).tolist(), n_results=5)
|
||||
documents = results['documents'][0][:]
|
||||
prices = [m['price'] for m in results['metadatas'][0][:]]
|
||||
self.log("Travel Estimator found similar deals")
|
||||
return documents, prices
|
||||
|
||||
def get_price(self, s) -> float:
|
||||
s = s.replace('$','').replace(',','')
|
||||
match = re.search(r"[-+]?\d*\.\d+|\d+", s)
|
||||
return float(match.group()) if match else 0.0
|
||||
|
||||
def estimate(self, description: str) -> float:
|
||||
documents, prices = self.find_similars(description)
|
||||
self.log(f"Travel Estimator calling {self.MODEL}")
|
||||
response = self.client.chat.completions.create(
|
||||
model=self.MODEL,
|
||||
messages=self.messages_for(description, documents, prices),
|
||||
seed=42,
|
||||
max_tokens=10
|
||||
)
|
||||
reply = response.choices[0].message.content
|
||||
result = self.get_price(reply)
|
||||
self.log(f"Travel Estimator complete - ${result:.2f}")
|
||||
return result
|
||||
|
||||
@@ -0,0 +1,48 @@
|
||||
import os
|
||||
import sys
|
||||
import http.client
|
||||
import urllib
|
||||
|
||||
w8d5_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
|
||||
if w8d5_path not in sys.path:
|
||||
sys.path.insert(0, w8d5_path)
|
||||
|
||||
from agents.agent import Agent
|
||||
from helpers.travel_deals import TravelOpportunity
|
||||
|
||||
DO_PUSH = True
|
||||
|
||||
class TravelMessagingAgent(Agent):
|
||||
|
||||
name = "Travel Messenger"
|
||||
color = Agent.WHITE
|
||||
|
||||
def __init__(self):
|
||||
self.log("Travel Messenger initializing")
|
||||
if DO_PUSH:
|
||||
self.pushover_user = os.getenv('PUSHOVER_USER', 'your-pushover-user-if-not-using-env')
|
||||
self.pushover_token = os.getenv('PUSHOVER_TOKEN', 'your-pushover-token-if-not-using-env')
|
||||
self.log("Travel Messenger has initialized Pushover")
|
||||
|
||||
def push(self, text):
|
||||
self.log("Travel Messenger sending push notification")
|
||||
conn = http.client.HTTPSConnection("api.pushover.net:443")
|
||||
conn.request("POST", "/1/messages.json",
|
||||
urllib.parse.urlencode({
|
||||
"token": self.pushover_token,
|
||||
"user": self.pushover_user,
|
||||
"message": text,
|
||||
"sound": "cashregister"
|
||||
}), { "Content-type": "application/x-www-form-urlencoded" })
|
||||
conn.getresponse()
|
||||
|
||||
def alert(self, opportunity: TravelOpportunity):
|
||||
text = f"Travel Deal! {opportunity.deal.destination} - "
|
||||
text += f"Price=${opportunity.deal.price:.2f}, "
|
||||
text += f"Est=${opportunity.estimate:.2f}, "
|
||||
text += f"Save ${opportunity.discount:.2f}! "
|
||||
text += opportunity.deal.url
|
||||
if DO_PUSH:
|
||||
self.push(text)
|
||||
self.log("Travel Messenger completed")
|
||||
|
||||
@@ -0,0 +1,57 @@
|
||||
import os
|
||||
import sys
|
||||
from typing import Optional, List
|
||||
|
||||
w8d5_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
|
||||
if w8d5_path not in sys.path:
|
||||
sys.path.insert(0, w8d5_path)
|
||||
|
||||
from agents.agent import Agent
|
||||
from helpers.travel_deals import TravelDeal, TravelOpportunity
|
||||
from agents.travel_scanner_agent import TravelScannerAgent
|
||||
from agents.travel_estimator_agent import TravelEstimatorAgent
|
||||
from agents.travel_messaging_agent import TravelMessagingAgent
|
||||
|
||||
|
||||
class TravelPlanningAgent(Agent):
|
||||
|
||||
name = "Travel Planner"
|
||||
color = Agent.GREEN
|
||||
DEAL_THRESHOLD = 50
|
||||
|
||||
def __init__(self, collection):
|
||||
self.log("Travel Planner initializing")
|
||||
self.scanner = TravelScannerAgent()
|
||||
self.estimator = TravelEstimatorAgent(collection)
|
||||
self.messenger = TravelMessagingAgent()
|
||||
self.log("Travel Planner ready")
|
||||
|
||||
def evaluate(self, deal: TravelDeal) -> TravelOpportunity:
|
||||
self.log(f"Travel Planner evaluating {deal.destination}")
|
||||
estimate = self.estimator.estimate(deal.description)
|
||||
discount = estimate - deal.price
|
||||
self.log(f"Travel Planner found discount ${discount:.2f}")
|
||||
return TravelOpportunity(deal=deal, estimate=estimate, discount=discount)
|
||||
|
||||
def plan(self, memory: List[str] = []) -> Optional[List[TravelOpportunity]]:
|
||||
self.log("Travel Planner starting run")
|
||||
selection = self.scanner.scan(memory=memory)
|
||||
if selection and selection.deals:
|
||||
opportunities = [self.evaluate(deal) for deal in selection.deals[:5]]
|
||||
if not opportunities:
|
||||
self.log("Travel Planner found no valid opportunities")
|
||||
return None
|
||||
opportunities.sort(key=lambda opp: opp.discount, reverse=True)
|
||||
good_deals = [opp for opp in opportunities if opp.discount > self.DEAL_THRESHOLD]
|
||||
if good_deals:
|
||||
best = good_deals[0]
|
||||
self.log(f"Travel Planner found {len(good_deals)} deals above threshold, best: ${best.discount:.2f} off")
|
||||
self.messenger.alert(best)
|
||||
self.log("Travel Planner completed")
|
||||
return good_deals
|
||||
else:
|
||||
self.log(f"Travel Planner completed - no deals above ${self.DEAL_THRESHOLD} threshold")
|
||||
return None
|
||||
self.log("Travel Planner found no deals to evaluate")
|
||||
return None
|
||||
|
||||
@@ -0,0 +1,87 @@
|
||||
import os
|
||||
import sys
|
||||
from typing import Optional, List
|
||||
from openai import OpenAI
|
||||
|
||||
w8d5_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
|
||||
if w8d5_path not in sys.path:
|
||||
sys.path.insert(0, w8d5_path)
|
||||
|
||||
from agents.agent import Agent
|
||||
from helpers.travel_deals import ScrapedTravelDeal, TravelDealSelection
|
||||
|
||||
|
||||
class TravelScannerAgent(Agent):
|
||||
|
||||
MODEL = "gpt-4o-mini"
|
||||
|
||||
SYSTEM_PROMPT = """You identify and summarize the 5 most promising travel deals from a list.
|
||||
Focus on deals with destinations, deal types (flight/hotel/package), and detailed descriptions.
|
||||
If price is mentioned, extract it. If no specific price is given but there's a discount mentioned (e.g. "30% off"), estimate a reasonable price.
|
||||
If absolutely no pricing information exists, use a placeholder price of 500.
|
||||
Respond strictly in JSON with no explanation.
|
||||
|
||||
{"deals": [
|
||||
{
|
||||
"destination": "City or Country name",
|
||||
"deal_type": "Flight, Hotel, or Package",
|
||||
"description": "4-5 sentences describing the travel deal, dates, what's included, and key highlights",
|
||||
"price": 499.99,
|
||||
"url": "the url as provided"
|
||||
},
|
||||
...
|
||||
]}"""
|
||||
|
||||
USER_PROMPT_PREFIX = """Respond with the 5 most promising travel deals with destinations, types, and descriptions.
|
||||
Respond strictly in JSON. Provide detailed descriptions focusing on what travelers get.
|
||||
Extract the destination and deal type (Flight/Hotel/Package) from the title and description.
|
||||
For pricing: extract exact prices if available, estimate from percentage discounts, or use 500 as placeholder.
|
||||
|
||||
Travel Deals:
|
||||
|
||||
"""
|
||||
|
||||
USER_PROMPT_SUFFIX = "\n\nStrictly respond in JSON with exactly 5 deals."
|
||||
|
||||
name = "Travel Scanner"
|
||||
color = Agent.CYAN
|
||||
|
||||
def __init__(self):
|
||||
self.log("Travel Scanner is initializing")
|
||||
self.openai = OpenAI()
|
||||
self.log("Travel Scanner is ready")
|
||||
|
||||
def fetch_deals(self, memory) -> List[ScrapedTravelDeal]:
|
||||
self.log("Travel Scanner fetching deals from RSS feeds")
|
||||
urls = [opp.deal.url for opp in memory]
|
||||
scraped = ScrapedTravelDeal.fetch()
|
||||
result = [scrape for scrape in scraped if scrape.url not in urls]
|
||||
self.log(f"Travel Scanner found {len(result)} new deals")
|
||||
return result
|
||||
|
||||
def make_user_prompt(self, scraped) -> str:
|
||||
user_prompt = self.USER_PROMPT_PREFIX
|
||||
user_prompt += '\n\n'.join([scrape.describe() for scrape in scraped])
|
||||
user_prompt += self.USER_PROMPT_SUFFIX
|
||||
return user_prompt
|
||||
|
||||
def scan(self, memory: List[str]=[]) -> Optional[TravelDealSelection]:
|
||||
scraped = self.fetch_deals(memory)
|
||||
if scraped:
|
||||
user_prompt = self.make_user_prompt(scraped)
|
||||
self.log("Travel Scanner calling OpenAI")
|
||||
result = self.openai.beta.chat.completions.parse(
|
||||
model=self.MODEL,
|
||||
messages=[
|
||||
{"role": "system", "content": self.SYSTEM_PROMPT},
|
||||
{"role": "user", "content": user_prompt}
|
||||
],
|
||||
response_format=TravelDealSelection
|
||||
)
|
||||
result = result.choices[0].message.parsed
|
||||
valid_deals = [deal for deal in result.deals if deal.price > 0]
|
||||
result.deals = valid_deals
|
||||
self.log(f"Travel Scanner received {len(result.deals)} valid deals")
|
||||
return result if result.deals else None
|
||||
return None
|
||||
|
||||
@@ -0,0 +1,73 @@
|
||||
import os
|
||||
import sys
|
||||
import numpy as np
|
||||
import joblib
|
||||
from sentence_transformers import SentenceTransformer
|
||||
import xgboost as xgb
|
||||
|
||||
w8d5_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
|
||||
if w8d5_path not in sys.path:
|
||||
sys.path.insert(0, w8d5_path)
|
||||
|
||||
from agents.agent import Agent
|
||||
|
||||
|
||||
class TravelXGBoostAgent(Agent):
|
||||
|
||||
name = "XGBoost Estimator"
|
||||
color = Agent.GREEN
|
||||
|
||||
def __init__(self, collection):
|
||||
self.log("XGBoost Estimator initializing")
|
||||
self.collection = collection
|
||||
self.model_path = os.path.join(w8d5_path, 'helpers', 'travel_xgboost_model.pkl')
|
||||
self.embedder = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
|
||||
|
||||
if os.path.exists(self.model_path):
|
||||
self.log("Loading existing XGBoost model")
|
||||
self.model = joblib.load(self.model_path)
|
||||
else:
|
||||
self.log("Training new XGBoost model")
|
||||
self.model = self._train_model()
|
||||
joblib.dump(self.model, self.model_path)
|
||||
self.log(f"XGBoost model saved to {self.model_path}")
|
||||
|
||||
self.log("XGBoost Estimator ready")
|
||||
|
||||
def _train_model(self):
|
||||
self.log("Fetching training data from ChromaDB")
|
||||
result = self.collection.get(include=['embeddings', 'metadatas'])
|
||||
|
||||
X = np.array(result['embeddings'])
|
||||
y = np.array([m['price'] for m in result['metadatas']])
|
||||
|
||||
self.log(f"Training on {len(X)} samples")
|
||||
|
||||
model = xgb.XGBRegressor(
|
||||
n_estimators=100,
|
||||
max_depth=6,
|
||||
learning_rate=0.1,
|
||||
subsample=0.8,
|
||||
colsample_bytree=0.8,
|
||||
random_state=42,
|
||||
n_jobs=-1
|
||||
)
|
||||
|
||||
model.fit(X, y)
|
||||
self.log("XGBoost training complete")
|
||||
|
||||
return model
|
||||
|
||||
def estimate(self, description: str) -> float:
|
||||
self.log(f"XGBoost estimating price for: {description[:50]}...")
|
||||
|
||||
embedding = self.embedder.encode([description])[0]
|
||||
embedding_2d = embedding.reshape(1, -1)
|
||||
|
||||
prediction = self.model.predict(embedding_2d)[0]
|
||||
|
||||
prediction = max(0, prediction)
|
||||
|
||||
self.log(f"XGBoost estimate: ${prediction:.2f}")
|
||||
return float(prediction)
|
||||
|
||||
@@ -0,0 +1,230 @@
|
||||
import os
|
||||
import random
|
||||
from dotenv import load_dotenv
|
||||
from huggingface_hub import login
|
||||
from sentence_transformers import SentenceTransformer
|
||||
import chromadb
|
||||
from tqdm import tqdm
|
||||
|
||||
load_dotenv(override=True)
|
||||
os.environ['HF_TOKEN'] = os.getenv('HF_TOKEN', 'your-key-if-not-using-env')
|
||||
|
||||
hf_token = os.environ['HF_TOKEN']
|
||||
login(hf_token, add_to_git_credential=True)
|
||||
|
||||
DB = "travel_vectorstore"
|
||||
CATEGORIES = ['Flights', 'Hotels', 'Car_Rentals', 'Vacation_Packages', 'Cruises', 'Activities']
|
||||
|
||||
AIRLINES = ['American Airlines', 'Delta', 'United', 'Southwest', 'JetBlue', 'Spirit', 'Frontier', 'Alaska Airlines', 'Emirates', 'British Airways', 'Air France', 'Lufthansa', 'Qatar Airways']
|
||||
CITIES = ['New York', 'Los Angeles', 'Chicago', 'Houston', 'Miami', 'San Francisco', 'Boston', 'Seattle', 'Denver', 'Atlanta', 'Las Vegas', 'Orlando', 'Phoenix', 'London', 'Paris', 'Tokyo', 'Dubai', 'Singapore', 'Sydney', 'Rome']
|
||||
HOTELS = ['Hilton', 'Marriott', 'Hyatt', 'Holiday Inn', 'Best Western', 'Sheraton', 'Ritz-Carlton', 'Four Seasons', 'Westin', 'Radisson']
|
||||
CLASSES = ['Economy', 'Premium Economy', 'Business', 'First Class']
|
||||
CAR_COMPANIES = ['Hertz', 'Enterprise', 'Avis', 'Budget', 'National', 'Alamo']
|
||||
CAR_TYPES = ['Compact', 'Sedan', 'SUV', 'Luxury', 'Van']
|
||||
|
||||
def generate_flight_description():
|
||||
airline = random.choice(AIRLINES)
|
||||
source = random.choice(CITIES)
|
||||
dest = random.choice([c for c in CITIES if c != source])
|
||||
flight_class = random.choice(CLASSES)
|
||||
stops = random.choice(['non-stop', 'one-stop', 'two-stops'])
|
||||
duration = f"{random.randint(1, 15)} hours {random.randint(0, 59)} minutes"
|
||||
|
||||
description = f"{airline} {flight_class} {stops} flight from {source} to {dest}. "
|
||||
description += f"Flight duration approximately {duration}. "
|
||||
|
||||
if random.random() > 0.5:
|
||||
description += f"Includes {random.randint(1, 2)} checked bag"
|
||||
if random.random() > 0.5:
|
||||
description += "s"
|
||||
description += ". "
|
||||
|
||||
if flight_class in ['Business', 'First Class']:
|
||||
description += random.choice(['Priority boarding included. ', 'Lounge access available. ', 'Lie-flat seats. '])
|
||||
|
||||
price = random.randint(150, 2500) if flight_class == 'Economy' else random.randint(800, 8000)
|
||||
return description, price
|
||||
|
||||
def generate_hotel_description():
|
||||
hotel = random.choice(HOTELS)
|
||||
city = random.choice(CITIES)
|
||||
stars = random.randint(2, 5)
|
||||
room_type = random.choice(['Standard Room', 'Deluxe Room', 'Suite', 'Executive Suite'])
|
||||
nights = random.randint(1, 7)
|
||||
|
||||
description = f"{hotel} {stars}-star hotel in {city}. {room_type} for {nights} night"
|
||||
if nights > 1:
|
||||
description += "s"
|
||||
description += ". "
|
||||
|
||||
amenities = []
|
||||
if random.random() > 0.3:
|
||||
amenities.append('Free WiFi')
|
||||
if random.random() > 0.5:
|
||||
amenities.append('Breakfast included')
|
||||
if random.random() > 0.6:
|
||||
amenities.append('Pool access')
|
||||
if random.random() > 0.7:
|
||||
amenities.append('Gym')
|
||||
if random.random() > 0.8:
|
||||
amenities.append('Spa services')
|
||||
|
||||
if amenities:
|
||||
description += f"Amenities: {', '.join(amenities)}. "
|
||||
|
||||
price_per_night = random.randint(80, 500) if stars <= 3 else random.randint(200, 1200)
|
||||
total_price = price_per_night * nights
|
||||
|
||||
return description, total_price
|
||||
|
||||
def generate_car_rental_description():
|
||||
company = random.choice(CAR_COMPANIES)
|
||||
car_type = random.choice(CAR_TYPES)
|
||||
city = random.choice(CITIES)
|
||||
days = random.randint(1, 14)
|
||||
|
||||
description = f"{company} car rental in {city}. {car_type} class vehicle for {days} day"
|
||||
if days > 1:
|
||||
description += "s"
|
||||
description += ". "
|
||||
|
||||
if random.random() > 0.6:
|
||||
description += "Unlimited mileage included. "
|
||||
if random.random() > 0.5:
|
||||
description += "Airport pickup available. "
|
||||
if random.random() > 0.7:
|
||||
description += "GPS navigation included. "
|
||||
|
||||
daily_rate = {'Compact': random.randint(25, 45), 'Sedan': random.randint(35, 65), 'SUV': random.randint(50, 90), 'Luxury': random.randint(80, 200), 'Van': random.randint(60, 100)}
|
||||
total_price = daily_rate[car_type] * days
|
||||
|
||||
return description, total_price
|
||||
|
||||
def generate_vacation_package_description():
|
||||
city = random.choice(CITIES)
|
||||
nights = random.randint(3, 10)
|
||||
|
||||
description = f"All-inclusive vacation package to {city} for {nights} nights. "
|
||||
description += f"Includes round-trip {random.choice(CLASSES)} flights, {random.choice(HOTELS)} hotel accommodation, "
|
||||
|
||||
extras = []
|
||||
if random.random() > 0.3:
|
||||
extras.append('daily breakfast')
|
||||
if random.random() > 0.5:
|
||||
extras.append('airport transfers')
|
||||
if random.random() > 0.6:
|
||||
extras.append('city tour')
|
||||
if random.random() > 0.7:
|
||||
extras.append('travel insurance')
|
||||
|
||||
if extras:
|
||||
description += f"and {', '.join(extras)}. "
|
||||
|
||||
base_price = random.randint(800, 4000)
|
||||
return description, base_price
|
||||
|
||||
def generate_cruise_description():
|
||||
destinations = [', '.join(random.sample(['Caribbean', 'Mediterranean', 'Alaska', 'Hawaii', 'Baltic Sea', 'South Pacific'], k=random.randint(2, 4)))]
|
||||
nights = random.choice([3, 5, 7, 10, 14])
|
||||
|
||||
description = f"{nights}-night cruise visiting {destinations[0]}. "
|
||||
description += f"All meals and entertainment included. "
|
||||
|
||||
cabin_type = random.choice(['Interior cabin', 'Ocean view cabin', 'Balcony cabin', 'Suite'])
|
||||
description += f"{cabin_type}. "
|
||||
|
||||
if random.random() > 0.5:
|
||||
description += "Unlimited beverage package available. "
|
||||
if random.random() > 0.6:
|
||||
description += "Shore excursions at each port. "
|
||||
|
||||
base_price = random.randint(500, 5000)
|
||||
return description, base_price
|
||||
|
||||
def generate_activity_description():
|
||||
city = random.choice(CITIES)
|
||||
activities = ['City sightseeing tour', 'Museum pass', 'Adventure sports package', 'Wine tasting tour', 'Cooking class', 'Hot air balloon ride', 'Snorkeling excursion', 'Helicopter tour', 'Spa day package', 'Theme park tickets']
|
||||
activity = random.choice(activities)
|
||||
|
||||
description = f"{activity} in {city}. "
|
||||
|
||||
if 'tour' in activity.lower():
|
||||
description += f"Duration: {random.randint(2, 8)} hours. "
|
||||
if random.random() > 0.5:
|
||||
description += "Hotel pickup included. "
|
||||
if random.random() > 0.6:
|
||||
description += "Small group experience. "
|
||||
|
||||
price = random.randint(30, 500)
|
||||
return description, price
|
||||
|
||||
GENERATORS = {
|
||||
'Flights': generate_flight_description,
|
||||
'Hotels': generate_hotel_description,
|
||||
'Car_Rentals': generate_car_rental_description,
|
||||
'Vacation_Packages': generate_vacation_package_description,
|
||||
'Cruises': generate_cruise_description,
|
||||
'Activities': generate_activity_description
|
||||
}
|
||||
|
||||
print("Generating synthetic travel dataset...")
|
||||
travel_data = []
|
||||
|
||||
items_per_category = 3334
|
||||
for category in CATEGORIES:
|
||||
print(f"Generating {category}...")
|
||||
generator = GENERATORS[category]
|
||||
for _ in range(items_per_category):
|
||||
description, price = generator()
|
||||
travel_data.append((description, float(price), category))
|
||||
|
||||
random.shuffle(travel_data)
|
||||
print(f"Generated {len(travel_data)} travel deals")
|
||||
|
||||
print("\nInitializing SentenceTransformer model...")
|
||||
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
|
||||
|
||||
print(f"Connecting to ChromaDB at {DB}...")
|
||||
client = chromadb.PersistentClient(path=DB)
|
||||
|
||||
collection_name = "travel_deals"
|
||||
existing_collections = [col.name for col in client.list_collections()]
|
||||
if collection_name in existing_collections:
|
||||
client.delete_collection(collection_name)
|
||||
print(f"Deleted existing collection: {collection_name}")
|
||||
|
||||
collection = client.create_collection(collection_name)
|
||||
print(f"Created new collection: {collection_name}")
|
||||
|
||||
print("\nCreating embeddings and adding to ChromaDB...")
|
||||
for i in tqdm(range(0, len(travel_data), 1000)):
|
||||
batch = travel_data[i:i+1000]
|
||||
documents = [desc for desc, _, _ in batch]
|
||||
vectors = model.encode(documents).astype(float).tolist()
|
||||
metadatas = [{"category": cat, "price": price} for _, price, cat in batch]
|
||||
ids = [f"travel_{j}" for j in range(i, i+len(batch))]
|
||||
|
||||
collection.add(
|
||||
ids=ids,
|
||||
documents=documents,
|
||||
embeddings=vectors,
|
||||
metadatas=metadatas
|
||||
)
|
||||
|
||||
total_items = collection.count()
|
||||
print(f"\nVectorstore created successfully with {total_items} travel deals")
|
||||
|
||||
result = collection.get(include=['metadatas'], limit=total_items)
|
||||
categories = [m['category'] for m in result['metadatas']]
|
||||
prices = [m['price'] for m in result['metadatas']]
|
||||
category_counts = {}
|
||||
for cat in categories:
|
||||
category_counts[cat] = category_counts.get(cat, 0) + 1
|
||||
|
||||
print("\nCategory distribution:")
|
||||
for category, count in sorted(category_counts.items()):
|
||||
print(f" {category}: {count}")
|
||||
|
||||
avg_price = sum(prices) / len(prices) if prices else 0
|
||||
print(f"\nAverage price: ${avg_price:.2f}")
|
||||
print(f"Price range: ${min(prices):.2f} - ${max(prices):.2f}")
|
||||
@@ -0,0 +1,99 @@
|
||||
import os
|
||||
import sys
|
||||
import logging
|
||||
import json
|
||||
from typing import List, Optional
|
||||
from dotenv import load_dotenv
|
||||
import chromadb
|
||||
import numpy as np
|
||||
from sklearn.manifold import TSNE
|
||||
|
||||
w8d5_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
|
||||
if w8d5_path not in sys.path:
|
||||
sys.path.insert(0, w8d5_path)
|
||||
|
||||
from agents.travel_planning_agent import TravelPlanningAgent
|
||||
from helpers.travel_deals import TravelOpportunity
|
||||
|
||||
BG_BLUE = '\033[44m'
|
||||
WHITE = '\033[37m'
|
||||
RESET = '\033[0m'
|
||||
|
||||
CATEGORIES = ['Flights', 'Hotels', 'Car_Rentals', 'Vacation_Packages', 'Cruises', 'Activities']
|
||||
COLORS = ['red', 'blue', 'green', 'orange', 'purple', 'cyan']
|
||||
|
||||
def init_logging():
|
||||
root = logging.getLogger()
|
||||
root.setLevel(logging.INFO)
|
||||
|
||||
handler = logging.StreamHandler(sys.stdout)
|
||||
handler.setLevel(logging.INFO)
|
||||
formatter = logging.Formatter(
|
||||
"[%(asctime)s] [Travel Agents] [%(levelname)s] %(message)s",
|
||||
datefmt="%Y-%m-%d %H:%M:%S %z",
|
||||
)
|
||||
handler.setFormatter(formatter)
|
||||
root.addHandler(handler)
|
||||
|
||||
class TravelDealFramework:
|
||||
|
||||
DB = "travel_vectorstore"
|
||||
MEMORY_FILENAME = "travel_memory.json"
|
||||
|
||||
def __init__(self):
|
||||
init_logging()
|
||||
load_dotenv()
|
||||
client = chromadb.PersistentClient(path=self.DB)
|
||||
self.memory = self.read_memory()
|
||||
self.collection = client.get_or_create_collection('travel_deals')
|
||||
self.planner = None
|
||||
|
||||
def init_agents_as_needed(self):
|
||||
if not self.planner:
|
||||
self.log("Initializing Travel Agent Framework")
|
||||
self.planner = TravelPlanningAgent(self.collection)
|
||||
self.log("Travel Agent Framework ready")
|
||||
|
||||
def read_memory(self) -> List[TravelOpportunity]:
|
||||
if os.path.exists(self.MEMORY_FILENAME):
|
||||
with open(self.MEMORY_FILENAME, "r") as file:
|
||||
data = json.load(file)
|
||||
opportunities = [TravelOpportunity(**item) for item in data]
|
||||
return opportunities
|
||||
return []
|
||||
|
||||
def write_memory(self) -> None:
|
||||
data = [opportunity.dict() for opportunity in self.memory]
|
||||
with open(self.MEMORY_FILENAME, "w") as file:
|
||||
json.dump(data, file, indent=2)
|
||||
|
||||
def log(self, message: str):
|
||||
text = BG_BLUE + WHITE + "[Travel Framework] " + message + RESET
|
||||
logging.info(text)
|
||||
|
||||
def run(self) -> List[TravelOpportunity]:
|
||||
self.init_agents_as_needed()
|
||||
logging.info("Starting Travel Planning Agent")
|
||||
results = self.planner.plan(memory=self.memory)
|
||||
logging.info(f"Travel Planning Agent completed with {len(results) if results else 0} results")
|
||||
if results:
|
||||
self.memory.extend(results)
|
||||
self.write_memory()
|
||||
return self.memory
|
||||
|
||||
@classmethod
|
||||
def get_plot_data(cls, max_datapoints=10000):
|
||||
client = chromadb.PersistentClient(path=cls.DB)
|
||||
collection = client.get_or_create_collection('travel_deals')
|
||||
result = collection.get(include=['embeddings', 'documents', 'metadatas'], limit=max_datapoints)
|
||||
vectors = np.array(result['embeddings'])
|
||||
documents = result['documents']
|
||||
categories = [metadata['category'] for metadata in result['metadatas']]
|
||||
colors = [COLORS[CATEGORIES.index(c)] for c in categories]
|
||||
tsne = TSNE(n_components=3, random_state=42, n_jobs=-1)
|
||||
reduced_vectors = tsne.fit_transform(vectors)
|
||||
return documents, reduced_vectors, colors
|
||||
|
||||
if __name__=="__main__":
|
||||
TravelDealFramework().run()
|
||||
|
||||
67
week8/community_contributions/w8d5/helpers/travel_deals.py
Normal file
67
week8/community_contributions/w8d5/helpers/travel_deals.py
Normal file
@@ -0,0 +1,67 @@
|
||||
from pydantic import BaseModel
|
||||
from typing import List, Dict, Self
|
||||
from bs4 import BeautifulSoup
|
||||
import re
|
||||
import feedparser
|
||||
from tqdm import tqdm
|
||||
import requests
|
||||
import time
|
||||
|
||||
feeds = [
|
||||
"https://thepointsguy.com/feed/",
|
||||
]
|
||||
|
||||
def extract(html_snippet: str) -> str:
|
||||
soup = BeautifulSoup(html_snippet, 'html.parser')
|
||||
text = soup.get_text(strip=True)
|
||||
text = re.sub('<[^<]+?>', '', text)
|
||||
return text.replace('\n', ' ').strip()
|
||||
|
||||
class ScrapedTravelDeal:
|
||||
title: str
|
||||
summary: str
|
||||
url: str
|
||||
details: str
|
||||
|
||||
def __init__(self, entry: Dict[str, str]):
|
||||
self.title = entry.get('title', '')
|
||||
summary_text = entry.get('summary', entry.get('description', ''))
|
||||
self.summary = extract(summary_text)
|
||||
self.url = entry.get('link', '')
|
||||
self.details = self.summary
|
||||
|
||||
def __repr__(self):
|
||||
return f"<{self.title}>"
|
||||
|
||||
def describe(self):
|
||||
return f"Title: {self.title}\nDetails: {self.details.strip()}\nURL: {self.url}"
|
||||
|
||||
@classmethod
|
||||
def fetch(cls, show_progress: bool = False) -> List[Self]:
|
||||
deals = []
|
||||
feed_iter = tqdm(feeds) if show_progress else feeds
|
||||
for feed_url in feed_iter:
|
||||
try:
|
||||
feed = feedparser.parse(feed_url)
|
||||
for entry in feed.entries[:10]:
|
||||
deals.append(cls(entry))
|
||||
time.sleep(0.3)
|
||||
except Exception as e:
|
||||
print(f"Error fetching {feed_url}: {e}")
|
||||
return deals
|
||||
|
||||
class TravelDeal(BaseModel):
|
||||
destination: str
|
||||
deal_type: str
|
||||
description: str
|
||||
price: float
|
||||
url: str
|
||||
|
||||
class TravelDealSelection(BaseModel):
|
||||
deals: List[TravelDeal]
|
||||
|
||||
class TravelOpportunity(BaseModel):
|
||||
deal: TravelDeal
|
||||
estimate: float
|
||||
discount: float
|
||||
|
||||
@@ -0,0 +1,161 @@
|
||||
import os
|
||||
import sys
|
||||
import logging
|
||||
import json
|
||||
from typing import List, Tuple
|
||||
from dotenv import load_dotenv
|
||||
import chromadb
|
||||
import numpy as np
|
||||
from sklearn.manifold import TSNE
|
||||
|
||||
w8d5_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
|
||||
if w8d5_path not in sys.path:
|
||||
sys.path.insert(0, w8d5_path)
|
||||
|
||||
from agents.travel_scanner_agent import TravelScannerAgent
|
||||
from agents.travel_estimator_agent import TravelEstimatorAgent
|
||||
from agents.travel_xgboost_agent import TravelXGBoostAgent
|
||||
from agents.travel_messaging_agent import TravelMessagingAgent
|
||||
from helpers.travel_deals import TravelOpportunity, TravelDeal
|
||||
|
||||
BG_BLUE = '\033[44m'
|
||||
WHITE = '\033[37m'
|
||||
RESET = '\033[0m'
|
||||
|
||||
CATEGORIES = ['Flights', 'Hotels', 'Car_Rentals', 'Vacation_Packages', 'Cruises', 'Activities']
|
||||
COLORS = ['red', 'blue', 'green', 'orange', 'purple', 'cyan']
|
||||
|
||||
def init_logging():
|
||||
root = logging.getLogger()
|
||||
root.setLevel(logging.INFO)
|
||||
|
||||
handler = logging.StreamHandler(sys.stdout)
|
||||
handler.setLevel(logging.INFO)
|
||||
formatter = logging.Formatter(
|
||||
"[%(asctime)s] [Travel Agents] [%(levelname)s] %(message)s",
|
||||
datefmt="%Y-%m-%d %H:%M:%S %z",
|
||||
)
|
||||
handler.setFormatter(formatter)
|
||||
root.addHandler(handler)
|
||||
|
||||
|
||||
class TravelDualFramework:
|
||||
|
||||
DB = "travel_vectorstore"
|
||||
LLM_MEMORY_FILE = "travel_memory_llm.json"
|
||||
XGB_MEMORY_FILE = "travel_memory_xgb.json"
|
||||
DEAL_THRESHOLD = 200.0
|
||||
|
||||
def __init__(self):
|
||||
init_logging()
|
||||
load_dotenv()
|
||||
client = chromadb.PersistentClient(path=self.DB)
|
||||
self.collection = client.get_or_create_collection('travel_deals')
|
||||
|
||||
self.llm_memory = self.read_memory(self.LLM_MEMORY_FILE)
|
||||
self.xgb_memory = self.read_memory(self.XGB_MEMORY_FILE)
|
||||
|
||||
self.scanner = None
|
||||
self.llm_estimator = None
|
||||
self.xgb_estimator = None
|
||||
self.messenger = None
|
||||
|
||||
def init_agents_as_needed(self):
|
||||
if not self.scanner:
|
||||
self.log("Initializing Travel Dual Estimation Framework")
|
||||
self.scanner = TravelScannerAgent()
|
||||
self.llm_estimator = TravelEstimatorAgent(self.collection)
|
||||
self.xgb_estimator = TravelXGBoostAgent(self.collection)
|
||||
self.messenger = TravelMessagingAgent()
|
||||
self.log("Travel Dual Framework ready")
|
||||
|
||||
def read_memory(self, filename: str) -> List[TravelOpportunity]:
|
||||
if os.path.exists(filename):
|
||||
with open(filename, "r") as file:
|
||||
data = json.load(file)
|
||||
opportunities = [TravelOpportunity(**item) for item in data]
|
||||
return opportunities
|
||||
return []
|
||||
|
||||
def write_memory(self, opportunities: List[TravelOpportunity], filename: str) -> None:
|
||||
data = [opportunity.dict() for opportunity in opportunities]
|
||||
with open(filename, "w") as file:
|
||||
json.dump(data, file, indent=2)
|
||||
|
||||
def log(self, message: str):
|
||||
text = BG_BLUE + WHITE + "[Dual Framework] " + message + RESET
|
||||
logging.info(text)
|
||||
|
||||
def run(self) -> Tuple[List[TravelOpportunity], List[TravelOpportunity]]:
|
||||
self.init_agents_as_needed()
|
||||
|
||||
self.log("Starting dual estimation scan")
|
||||
deal_selection = self.scanner.scan()
|
||||
|
||||
if not deal_selection or not deal_selection.deals:
|
||||
self.log("No deals found")
|
||||
return self.llm_memory, self.xgb_memory
|
||||
|
||||
deals = deal_selection.deals
|
||||
self.log(f"Processing {len(deals)} deals with both estimators")
|
||||
|
||||
llm_opportunities = []
|
||||
xgb_opportunities = []
|
||||
|
||||
for deal in deals:
|
||||
llm_estimate = self.llm_estimator.estimate(deal.description)
|
||||
llm_discount = llm_estimate - deal.price
|
||||
|
||||
if llm_discount >= self.DEAL_THRESHOLD:
|
||||
llm_opp = TravelOpportunity(
|
||||
deal=deal,
|
||||
estimate=llm_estimate,
|
||||
discount=llm_discount
|
||||
)
|
||||
llm_opportunities.append(llm_opp)
|
||||
self.log(f"LLM found opportunity: {deal.destination} - ${llm_discount:.0f} savings")
|
||||
self.messenger.alert(llm_opp)
|
||||
|
||||
xgb_estimate = self.xgb_estimator.estimate(deal.description)
|
||||
xgb_discount = xgb_estimate - deal.price
|
||||
|
||||
if xgb_discount >= self.DEAL_THRESHOLD:
|
||||
xgb_opp = TravelOpportunity(
|
||||
deal=deal,
|
||||
estimate=xgb_estimate,
|
||||
discount=xgb_discount
|
||||
)
|
||||
xgb_opportunities.append(xgb_opp)
|
||||
self.log(f"XGBoost found opportunity: {deal.destination} - ${xgb_discount:.0f} savings")
|
||||
self.messenger.alert(xgb_opp)
|
||||
|
||||
if llm_opportunities:
|
||||
self.llm_memory.extend(llm_opportunities)
|
||||
self.write_memory(self.llm_memory, self.LLM_MEMORY_FILE)
|
||||
|
||||
if xgb_opportunities:
|
||||
self.xgb_memory.extend(xgb_opportunities)
|
||||
self.write_memory(self.xgb_memory, self.XGB_MEMORY_FILE)
|
||||
|
||||
self.log(f"Scan complete: {len(llm_opportunities)} LLM, {len(xgb_opportunities)} XGBoost opportunities")
|
||||
|
||||
return self.llm_memory, self.xgb_memory
|
||||
|
||||
@classmethod
|
||||
def get_plot_data(cls, max_datapoints=10000):
|
||||
client = chromadb.PersistentClient(path=cls.DB)
|
||||
collection = client.get_or_create_collection('travel_deals')
|
||||
result = collection.get(include=['embeddings', 'documents', 'metadatas'], limit=max_datapoints)
|
||||
vectors = np.array(result['embeddings'])
|
||||
documents = result['documents']
|
||||
categories = [metadata['category'] for metadata in result['metadatas']]
|
||||
colors = [COLORS[CATEGORIES.index(c)] for c in categories]
|
||||
tsne = TSNE(n_components=3, random_state=42, n_jobs=-1)
|
||||
reduced_vectors = tsne.fit_transform(vectors)
|
||||
return documents, reduced_vectors, colors, categories
|
||||
|
||||
|
||||
if __name__=="__main__":
|
||||
framework = TravelDualFramework()
|
||||
framework.run()
|
||||
|
||||
66
week8/community_contributions/w8d5/tests/test_components.py
Normal file
66
week8/community_contributions/w8d5/tests/test_components.py
Normal file
@@ -0,0 +1,66 @@
|
||||
import os
|
||||
import sys
|
||||
from dotenv import load_dotenv
|
||||
|
||||
project_root = os.path.join(os.path.dirname(__file__), '..')
|
||||
sys.path.insert(0, project_root)
|
||||
sys.path.insert(0, os.path.join(project_root, '..', '..'))
|
||||
|
||||
from helpers.travel_deals import ScrapedTravelDeal
|
||||
from agents.travel_scanner_agent import TravelScannerAgent
|
||||
from agents.travel_estimator_agent import TravelEstimatorAgent
|
||||
|
||||
load_dotenv()
|
||||
|
||||
print("\nTesting Travel Deal Hunter Components\n")
|
||||
|
||||
print("1. RSS Feed Scraping")
|
||||
deals = ScrapedTravelDeal.fetch(show_progress=False)
|
||||
print(f"Fetched {len(deals)} deals from RSS feeds")
|
||||
if deals:
|
||||
print(f"Sample: {deals[0].title[:60]}...")
|
||||
|
||||
|
||||
print("\n2. OpenAI Connection")
|
||||
if os.getenv("OPENAI_API_KEY"):
|
||||
print("OPENAI_API_KEY found")
|
||||
else:
|
||||
print("OPENAI_API_KEY not found - set in .env file")
|
||||
|
||||
print("\n3. Scanner Agent")
|
||||
scanner = TravelScannerAgent()
|
||||
print("Scanner agent initialized")
|
||||
|
||||
print("\n4. Deal Scanning")
|
||||
try:
|
||||
selection = scanner.scan(memory=[])
|
||||
if selection and selection.deals:
|
||||
print(f"Scanner found {len(selection.deals)} processed deals")
|
||||
print(f"Sample: {selection.deals[0].destination} - ${selection.deals[0].price}")
|
||||
else:
|
||||
print("No deals returned")
|
||||
except Exception as e:
|
||||
print(f"Error: {e}")
|
||||
|
||||
print("\n5. ChromaDB Access")
|
||||
import chromadb
|
||||
try:
|
||||
db_path = "travel_vectorstore"
|
||||
client = chromadb.PersistentClient(path=db_path)
|
||||
collection = client.get_or_create_collection('travel_deals')
|
||||
count = collection.count()
|
||||
print(f"ChromaDB connected - {count} travel items in collection")
|
||||
except Exception as e:
|
||||
print(f"Error: {e}")
|
||||
|
||||
print("\n6. Estimator Check using travel vectorstore")
|
||||
try:
|
||||
estimator = TravelEstimatorAgent(collection)
|
||||
sample = "Non-stop economy flight from New York to London, duration 7 hours"
|
||||
estimate = estimator.estimate(sample)
|
||||
print(f"Estimate: ${estimate:.2f}")
|
||||
except Exception as e:
|
||||
print(f"Error: {e}")
|
||||
|
||||
print("\nComponent tests complete")
|
||||
|
||||
@@ -0,0 +1,49 @@
|
||||
import os
|
||||
import sys
|
||||
from dotenv import load_dotenv
|
||||
|
||||
project_root = os.path.join(os.path.dirname(__file__), '..')
|
||||
sys.path.insert(0, project_root)
|
||||
sys.path.insert(0, os.path.join(project_root, '..', '..'))
|
||||
|
||||
from agents.travel_estimator_agent import TravelEstimatorAgent
|
||||
from agents.travel_xgboost_agent import TravelXGBoostAgent
|
||||
import chromadb
|
||||
|
||||
load_dotenv()
|
||||
|
||||
print("\nTesting Dual Estimation (LLM vs XGBoost)\n")
|
||||
|
||||
client = chromadb.PersistentClient(path='travel_vectorstore')
|
||||
collection = client.get_collection('travel_deals')
|
||||
|
||||
print("Initializing agents...")
|
||||
llm_agent = TravelEstimatorAgent(collection)
|
||||
xgb_agent = TravelXGBoostAgent(collection)
|
||||
|
||||
test_cases = [
|
||||
"Round trip flight from New York to London, Economy class, non-stop",
|
||||
"5-star Marriott hotel in Paris, 3 nights, Suite with breakfast included",
|
||||
"7-night Caribbean cruise, Balcony cabin, all meals included",
|
||||
"Hertz SUV rental in Los Angeles for 5 days with unlimited mileage",
|
||||
"All-inclusive vacation package to Dubai for 7 nights with Business class flights"
|
||||
]
|
||||
|
||||
print("\n" + "="*80)
|
||||
print(f"{'Travel Deal Description':<60} {'LLM Est.':<12} {'XGB Est.':<12}")
|
||||
print("="*80)
|
||||
|
||||
for desc in test_cases:
|
||||
llm_est = llm_agent.estimate(desc)
|
||||
xgb_est = xgb_agent.estimate(desc)
|
||||
|
||||
short_desc = desc[:57] + "..." if len(desc) > 60 else desc
|
||||
print(f"{short_desc:<60} ${llm_est:>9.2f} ${xgb_est:>9.2f}")
|
||||
|
||||
print("="*80)
|
||||
print("\nDual estimation test complete!")
|
||||
print("\nKey Observations:")
|
||||
print("- LLM: Uses semantic understanding + RAG context")
|
||||
print("- XGBoost: Uses pattern recognition from embeddings")
|
||||
print("- Both trained on same 20K travel deals dataset")
|
||||
|
||||
38
week8/community_contributions/w8d5/tests/test_pipeline.py
Normal file
38
week8/community_contributions/w8d5/tests/test_pipeline.py
Normal file
@@ -0,0 +1,38 @@
|
||||
import os
|
||||
import sys
|
||||
from dotenv import load_dotenv
|
||||
|
||||
project_root = os.path.join(os.path.dirname(__file__), '..')
|
||||
sys.path.insert(0, project_root)
|
||||
sys.path.insert(0, os.path.join(project_root, '..', '..'))
|
||||
|
||||
from helpers.travel_deal_framework import TravelDealFramework
|
||||
|
||||
load_dotenv()
|
||||
|
||||
print("\nTesting Full Travel Deal Pipeline\n")
|
||||
|
||||
print("Initializing framework...")
|
||||
framework = TravelDealFramework()
|
||||
framework.init_agents_as_needed()
|
||||
|
||||
print("\nRunning one iteration...")
|
||||
try:
|
||||
result = framework.run()
|
||||
print(f"\nPipeline completed")
|
||||
print(f"Memory now has {len(result)} opportunities")
|
||||
if result:
|
||||
latest = result[-1]
|
||||
print(f"\nLatest opportunity:")
|
||||
print(f" Destination: {latest.deal.destination}")
|
||||
print(f" Type: {latest.deal.deal_type}")
|
||||
print(f" Price: ${latest.deal.price:.2f}")
|
||||
print(f" Estimate: ${latest.estimate:.2f}")
|
||||
print(f" Discount: ${latest.discount:.2f}")
|
||||
except Exception as e:
|
||||
print(f"\nError during pipeline: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
|
||||
print("\n")
|
||||
|
||||
306
week8/community_contributions/w8d5/w8d5_dual.py
Normal file
306
week8/community_contributions/w8d5/w8d5_dual.py
Normal file
@@ -0,0 +1,306 @@
|
||||
import os
|
||||
import sys
|
||||
import logging
|
||||
import queue
|
||||
import threading
|
||||
import time
|
||||
import gradio as gr
|
||||
import plotly.graph_objects as go
|
||||
|
||||
w8d5_path = os.path.abspath(os.path.dirname(__file__))
|
||||
week8_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..'))
|
||||
if w8d5_path not in sys.path:
|
||||
sys.path.insert(0, w8d5_path)
|
||||
if week8_path not in sys.path:
|
||||
sys.path.insert(0, week8_path)
|
||||
|
||||
from log_utils import reformat
|
||||
from helpers.travel_dual_framework import TravelDualFramework
|
||||
from helpers.travel_deals import TravelOpportunity, TravelDeal
|
||||
|
||||
|
||||
class QueueHandler(logging.Handler):
|
||||
def __init__(self, log_queue):
|
||||
super().__init__()
|
||||
self.log_queue = log_queue
|
||||
|
||||
def emit(self, record):
|
||||
self.log_queue.put(self.format(record))
|
||||
|
||||
|
||||
log_queue = queue.Queue()
|
||||
queue_handler = QueueHandler(log_queue)
|
||||
queue_handler.setFormatter(
|
||||
logging.Formatter(
|
||||
"[%(asctime)s] [%(levelname)s] %(message)s",
|
||||
datefmt="%Y-%m-%d %H:%M:%S"
|
||||
)
|
||||
)
|
||||
logging.getLogger().addHandler(queue_handler)
|
||||
logging.getLogger().setLevel(logging.INFO)
|
||||
|
||||
agent_framework = TravelDualFramework()
|
||||
agent_framework.init_agents_as_needed()
|
||||
|
||||
CHECK_INTERVAL = 300
|
||||
|
||||
|
||||
def run_agent_framework():
|
||||
while True:
|
||||
try:
|
||||
agent_framework.run()
|
||||
except Exception as e:
|
||||
logging.error(f"Error in agent framework: {e}")
|
||||
time.sleep(CHECK_INTERVAL)
|
||||
|
||||
|
||||
framework_thread = threading.Thread(target=run_agent_framework, daemon=True)
|
||||
framework_thread.start()
|
||||
|
||||
|
||||
def get_llm_table(llm_opps):
|
||||
return [[
|
||||
opp.deal.destination,
|
||||
opp.deal.deal_type,
|
||||
f"${opp.deal.price:.2f}",
|
||||
f"${opp.estimate:.2f}",
|
||||
f"${opp.discount:.2f}",
|
||||
opp.deal.url[:50] + "..." if len(opp.deal.url) > 50 else opp.deal.url
|
||||
] for opp in llm_opps]
|
||||
|
||||
|
||||
def get_xgb_table(xgb_opps):
|
||||
return [[
|
||||
opp.deal.destination,
|
||||
opp.deal.deal_type,
|
||||
f"${opp.deal.price:.2f}",
|
||||
f"${opp.estimate:.2f}",
|
||||
f"${opp.discount:.2f}",
|
||||
opp.deal.url[:50] + "..." if len(opp.deal.url) > 50 else opp.deal.url
|
||||
] for opp in xgb_opps]
|
||||
|
||||
|
||||
log_data = []
|
||||
|
||||
def update_ui():
|
||||
global log_data
|
||||
llm_data = get_llm_table(agent_framework.llm_memory)
|
||||
xgb_data = get_xgb_table(agent_framework.xgb_memory)
|
||||
|
||||
while not log_queue.empty():
|
||||
try:
|
||||
message = log_queue.get_nowait()
|
||||
log_data.append(reformat(message))
|
||||
except:
|
||||
break
|
||||
|
||||
logs_html = '<div style="height: 500px; overflow-y: auto; border: 1px solid #ccc; background-color: #1a1a1a; padding: 10px; font-family: monospace; font-size: 12px; color: #fff;">'
|
||||
logs_html += '<br>'.join(log_data[-50:])
|
||||
logs_html += '</div>'
|
||||
|
||||
llm_count = len(agent_framework.llm_memory)
|
||||
xgb_count = len(agent_framework.xgb_memory)
|
||||
|
||||
stats = f"LLM Opportunities: {llm_count} | XGBoost Opportunities: {xgb_count}"
|
||||
|
||||
return llm_data, xgb_data, logs_html, stats
|
||||
|
||||
|
||||
def create_3d_plot():
|
||||
try:
|
||||
documents, vectors, colors, categories = TravelDualFramework.get_plot_data(max_datapoints=5000)
|
||||
|
||||
if len(vectors) == 0:
|
||||
fig = go.Figure()
|
||||
fig.add_annotation(
|
||||
text="No data available yet. Vectorstore will load after initialization.",
|
||||
xref="paper", yref="paper",
|
||||
x=0.5, y=0.5, showarrow=False,
|
||||
font=dict(size=16)
|
||||
)
|
||||
return fig
|
||||
|
||||
fig = go.Figure()
|
||||
|
||||
unique_categories = list(set(categories))
|
||||
category_colors = {cat: colors[categories.index(cat)] for cat in unique_categories}
|
||||
|
||||
for category in unique_categories:
|
||||
mask = [cat == category for cat in categories]
|
||||
cat_vectors = vectors[mask]
|
||||
|
||||
fig.add_trace(go.Scatter3d(
|
||||
x=cat_vectors[:, 0],
|
||||
y=cat_vectors[:, 1],
|
||||
z=cat_vectors[:, 2],
|
||||
mode='markers',
|
||||
marker=dict(
|
||||
size=3,
|
||||
color=category_colors[category],
|
||||
opacity=0.6
|
||||
),
|
||||
name=category.replace('_', ' '),
|
||||
hovertemplate='<b>%{text}</b><extra></extra>',
|
||||
text=[category] * len(cat_vectors)
|
||||
))
|
||||
|
||||
fig.update_layout(
|
||||
title={
|
||||
'text': f'3D Travel Vectorstore Visualization ({len(vectors):,} deals)',
|
||||
'x': 0.5,
|
||||
'xanchor': 'center'
|
||||
},
|
||||
scene=dict(
|
||||
xaxis_title='Dimension 1',
|
||||
yaxis_title='Dimension 2',
|
||||
zaxis_title='Dimension 3',
|
||||
camera=dict(
|
||||
eye=dict(x=1.5, y=1.5, z=1.5)
|
||||
)
|
||||
),
|
||||
width=1200,
|
||||
height=600,
|
||||
margin=dict(r=0, b=0, l=0, t=40),
|
||||
showlegend=True,
|
||||
legend=dict(
|
||||
yanchor="top",
|
||||
y=0.99,
|
||||
xanchor="left",
|
||||
x=0.01
|
||||
)
|
||||
)
|
||||
|
||||
return fig
|
||||
except Exception as e:
|
||||
logging.error(f"Error creating 3D plot: {e}")
|
||||
fig = go.Figure()
|
||||
fig.add_annotation(
|
||||
text=f"Error loading plot: {str(e)}",
|
||||
xref="paper", yref="paper",
|
||||
x=0.5, y=0.5, showarrow=False,
|
||||
font=dict(size=14, color="red")
|
||||
)
|
||||
return fig
|
||||
|
||||
|
||||
with gr.Blocks(title="Travel Deal Hunter - Dual Estimation", fill_width=True, theme=gr.themes.Soft()) as ui:
|
||||
|
||||
gr.Markdown(
|
||||
"""
|
||||
<div style="text-align: center;">
|
||||
<h1 style="margin-bottom: 10px;">Travel Deal Hunter - Dual Estimation System</h1>
|
||||
<p style="color: #666; font-size: 16px;">
|
||||
Comparing LLM-based Semantic Estimation vs XGBoost Machine Learning
|
||||
</p>
|
||||
<p style="color: #999; font-size: 14px; margin-top: 10px;">
|
||||
System scans RSS feeds every 5 minutes. Use the button below to trigger a manual scan.
|
||||
</p>
|
||||
</div>
|
||||
"""
|
||||
)
|
||||
|
||||
with gr.Row():
|
||||
with gr.Column(scale=3):
|
||||
stats_display = gr.Textbox(
|
||||
label="",
|
||||
value="LLM Opportunities: 0 | XGBoost Opportunities: 0",
|
||||
interactive=False,
|
||||
show_label=False,
|
||||
container=False
|
||||
)
|
||||
with gr.Column(scale=1):
|
||||
scan_button = gr.Button("Scan Now", variant="primary")
|
||||
|
||||
with gr.Row():
|
||||
with gr.Column(scale=1):
|
||||
gr.Markdown("### LLM Estimates")
|
||||
llm_dataframe = gr.Dataframe(
|
||||
headers=["Destination", "Type", "Price", "LLM Est.", "Savings", "URL"],
|
||||
datatype=["str", "str", "str", "str", "str", "str"],
|
||||
wrap=True,
|
||||
column_widths=[2, 1, 1, 1, 1, 2],
|
||||
row_count=5,
|
||||
col_count=6,
|
||||
interactive=False
|
||||
)
|
||||
|
||||
with gr.Column(scale=1):
|
||||
gr.Markdown("### XGBoost Estimates")
|
||||
xgb_dataframe = gr.Dataframe(
|
||||
headers=["Destination", "Type", "Price", "XGB Est.", "Savings", "URL"],
|
||||
datatype=["str", "str", "str", "str", "str", "str"],
|
||||
wrap=True,
|
||||
column_widths=[2, 1, 1, 1, 1, 2],
|
||||
row_count=5,
|
||||
col_count=6,
|
||||
interactive=False
|
||||
)
|
||||
|
||||
with gr.Row():
|
||||
with gr.Column(scale=2):
|
||||
plot_output = gr.Plot(label="3D Travel Vectorstore Visualization")
|
||||
|
||||
with gr.Column(scale=1):
|
||||
gr.Markdown("### Agent Activity Logs")
|
||||
log_output = gr.HTML(
|
||||
value='<div style="height: 500px; overflow-y: auto; border: 1px solid #ccc; background-color: #1a1a1a; padding: 10px; font-family: monospace; font-size: 12px; color: #fff;"></div>'
|
||||
)
|
||||
|
||||
ui.load(
|
||||
fn=lambda: (
|
||||
get_llm_table(agent_framework.llm_memory),
|
||||
get_xgb_table(agent_framework.xgb_memory),
|
||||
"",
|
||||
f"LLM Opportunities: {len(agent_framework.llm_memory)} | XGBoost Opportunities: {len(agent_framework.xgb_memory)}",
|
||||
create_3d_plot()
|
||||
),
|
||||
outputs=[llm_dataframe, xgb_dataframe, log_output, stats_display, plot_output]
|
||||
)
|
||||
|
||||
# Manual scan button
|
||||
def manual_scan():
|
||||
try:
|
||||
agent_framework.run()
|
||||
return update_ui()
|
||||
except Exception as e:
|
||||
logging.error(f"Manual scan error: {e}")
|
||||
return update_ui()
|
||||
|
||||
scan_button.click(
|
||||
fn=manual_scan,
|
||||
outputs=[llm_dataframe, xgb_dataframe, log_output, stats_display]
|
||||
)
|
||||
|
||||
# Click handlers for notifications
|
||||
def llm_click_handler(selected_index: gr.SelectData):
|
||||
try:
|
||||
row = selected_index.index[0]
|
||||
if row < len(agent_framework.llm_memory):
|
||||
opportunity = agent_framework.llm_memory[row]
|
||||
agent_framework.messenger.alert(opportunity)
|
||||
logging.info(f"Manual alert sent for LLM opportunity: {opportunity.deal.destination}")
|
||||
except Exception as e:
|
||||
logging.error(f"Error sending LLM notification: {e}")
|
||||
|
||||
def xgb_click_handler(selected_index: gr.SelectData):
|
||||
try:
|
||||
row = selected_index.index[0]
|
||||
if row < len(agent_framework.xgb_memory):
|
||||
opportunity = agent_framework.xgb_memory[row]
|
||||
agent_framework.messenger.alert(opportunity)
|
||||
logging.info(f"Manual alert sent for XGBoost opportunity: {opportunity.deal.destination}")
|
||||
except Exception as e:
|
||||
logging.error(f"Error sending XGBoost notification: {e}")
|
||||
|
||||
llm_dataframe.select(fn=llm_click_handler)
|
||||
xgb_dataframe.select(fn=xgb_click_handler)
|
||||
|
||||
gr.Timer(5).tick(
|
||||
fn=update_ui,
|
||||
outputs=[llm_dataframe, xgb_dataframe, log_output, stats_display]
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
ui.launch(inbrowser=True, share=False)
|
||||
|
||||
Reference in New Issue
Block a user