diff --git a/week8/community_contributions/tochi/agents/agent.py b/week8/community_contributions/tochi/agents/agent.py
new file mode 100644
index 0000000..fe09e18
--- /dev/null
+++ b/week8/community_contributions/tochi/agents/agent.py
@@ -0,0 +1,33 @@
+import logging
+
+class Agent:
+ """
+ An abstract superclass for Agents
+ Used to log messages in a way that can identify each Agent
+ """
+
+ # Foreground colors
+ RED = '\033[31m'
+ GREEN = '\033[32m'
+ YELLOW = '\033[33m'
+ BLUE = '\033[34m'
+ MAGENTA = '\033[35m'
+ CYAN = '\033[36m'
+ WHITE = '\033[37m'
+
+ # Background color
+ BG_BLACK = '\033[40m'
+
+ # Reset code to return to default color
+ RESET = '\033[0m'
+
+ name: str = ""
+ color: str = '\033[37m'
+
+ def log(self, message):
+ """
+ Log this as an info message, identifying the agent
+ """
+ color_code = self.BG_BLACK + self.color
+ message = f"[{self.name}] {message}"
+ logging.info(color_code + message + self.RESET)
\ No newline at end of file
diff --git a/week8/community_contributions/tochi/agents/deals.py b/week8/community_contributions/tochi/agents/deals.py
new file mode 100644
index 0000000..fc24e76
--- /dev/null
+++ b/week8/community_contributions/tochi/agents/deals.py
@@ -0,0 +1,233 @@
+import os
+from dotenv import load_dotenv
+from pydantic import BaseModel
+from typing import List, Dict, Self
+import feedparser
+from tqdm import tqdm
+import time
+from openai import OpenAI
+from typing import Optional
+import json
+
+
+load_dotenv(override=True)
+os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY", "your-key-if-not-using-env")
+
+openai = OpenAI()
+
+feeds = [
+ "https://www.dealnews.com/c142/Electronics/?rss=1",
+ "https://www.dealnews.com/c39/Computers/?rss=1",
+ "https://www.dealnews.com/c238/Automotive/?rss=1",
+ "https://www.dealnews.com/f1912/Smart-Home/?rss=1",
+ "https://www.dealnews.com/c196/Home-Garden/?rss=1",
+ "https://www.reddit.com/r/buildapcsales.rss",
+ "https://www.reddit.com/r/deals.rss",
+]
+
+SYSTEM_PROMPT = """
+You are an RSS feed parser specializing in extracting deal information. Your task is to analyze content and extract structured data.
+
+# INPUT TYPES
+You will receive one of two input types:
+
+**TYPE 1: RSS Feed Entry Data**
+- May contain fields like: title, summary, description, link
+- Summary/description often contains HTML with deal details
+- Multiple URL fields may exist (link, links array, etc.)
+
+**TYPE 2: HTML Page Content**
+- Raw HTML from a deal webpage
+- Contains product information, pricing, and purchase links
+
+# TASK
+Extract and structure the following information:
+1. **title**: The deal's headline or main title
+ - For RSS entries: Use the entry's title field directly
+ - For HTML: Extract the main product/deal title
+
+2. **summary**: A concise summary of the deal (2-3 sentences max), focusing on:
+ - What is being offered (product name, specs)
+ - Key terms (price, discount percentage, original price)
+ - Important conditions (promo codes, shipping, availability, refurb/new condition)
+ - Strip ALL HTML tags and formatting
+
+3. **url**: The primary link where users can access the deal
+ - Prioritize direct product/deal purchase links
+ - Avoid tracking links, RSS links with "?rss=1" or "?iref=rss"
+ - For RSS entries, use the "link" field or first link in "links" array
+
+# EXTRACTION RULES
+- **From RSS entries**: Parse the 'summary' or 'description' HTML to extract deal details
+- **Clean all HTML**: Remove ,
,
Was $199, now $99. Use code SAVE50.
" +link: "https://example.com/deal?iref=rss-c142" +``` + +**Output**: +```json +{ + "title": "Sony Headphones for $99 + free shipping", + "summary": "Sony Headphones originally priced at $199, now available for $99 with free shipping. Use promo code SAVE50 at checkout.", + "url": "https://example.com/deal" +} +``` +""" + + +def gpt_parse(soup: str) -> Optional[Dict[str, str]]: + """ + Parse RSS feed content using GPT to extract title, summary, and URL. + + Args: + soup: Raw RSS feed content (HTML/text) + + Returns: + Dictionary with title, summary, url keys or None if parsing fails + """ + + text_to_summarize = soup + if not text_to_summarize: + return None + + try: + response = openai.chat.completions.create( + model="gpt-4o-mini", + temperature=0.2, + messages=[ + {"role": "system", "content": SYSTEM_PROMPT}, + {"role": "user", "content": text_to_summarize}, + ], + ) + res_text = response.choices[0].message.content + parsed_data = json.loads(res_text) + + if all( + key in parsed_data and parsed_data[key] + for key in ["title", "summary", "url"] + ): + return { + "title": parsed_data["title"], + "summary": parsed_data["summary"], + "url": parsed_data["url"], + } + else: + print(f"Missing or empty required fields in response: {parsed_data}") + return None + + except json.JSONDecodeError as e: + print(f"Error parsing JSON from OpenAI response: {e}") + return None + except Exception as e: + print(f"Error calling OpenAI: {e}") + return None + +class ScrapedDeal: + """ + A class to represent a Deal retrieved from an RSS feed + """ + + category: str + title: str + summary: str + url: str + details: str + features: str + + def __init__(self, entry: Dict[str, str]): + """ + Populate this instance based on the provided dict + """ + + self.title = entry["title"] + self.summary = entry["summary"] + self.url = entry["url"] + self.details = self.summary + self.features = "" + + def __repr__(self): + """ + Return a string to describe this deal + """ + return f"<{self.title}>" + + def describe(self): + """ + Return a longer string to describe this deal for use in calling a model + """ + return f"Title: {self.title}\nDetails: {self.details.strip()}\nFeatures: {self.features.strip()}\nURL: {self.url}" + + @classmethod + def fetch(cls, show_progress: bool = False) -> List[Self]: + """ + Retrieve all deals from the selected RSS feeds + """ + deals = [] + skipped = 0 + + feed_iter = tqdm(feeds) if show_progress else feeds + for feed_url in feed_iter: + feed = feedparser.parse(feed_url) + for entry in feed.entries[:10]: + try: + parsed_deal = gpt_parse(json.dumps(entry)) + deals.append(cls(parsed_deal)) + deals.append(cls(entry)) + time.sleep(0.5) + except Exception as e: + skipped += 1 + print(f"Skipping deal: {str(e)}") + continue + + print(f"Fetched {len(deals)} deals successfully, skipped {skipped}") + return deals + + +class Deal(BaseModel): + """ + A class to Represent a Deal with a summary description + """ + + product_description: str + price: float + url: str + + +class DealSelection(BaseModel): + """ + A class to Represent a list of Deals + """ + + deals: List[Deal] + + +class Opportunity(BaseModel): + """ + A class to represent a possible opportunity: a Deal where we estimate + it should cost more than it's being offered + """ + + deal: Deal + estimate: float + discount: float + diff --git a/week8/community_contributions/tochi/agents/ensemble_agent.py b/week8/community_contributions/tochi/agents/ensemble_agent.py new file mode 100644 index 0000000..141a3e4 --- /dev/null +++ b/week8/community_contributions/tochi/agents/ensemble_agent.py @@ -0,0 +1,48 @@ +import pandas as pd +from sklearn.linear_model import LinearRegression +import joblib + +from agents.agent import Agent +from agents.specialist_agent import SpecialistAgent +from agents.frontier_agent import FrontierAgent +from agents.random_forest_agent import RandomForestAgent + +class EnsembleAgent(Agent): + + name = "Ensemble Agent" + color = Agent.YELLOW + + def __init__(self, collection): + """ + Create an instance of Ensemble, by creating each of the models + And loading the weights of the Ensemble + """ + self.log("Initializing Ensemble Agent") + self.specialist = SpecialistAgent() + self.frontier = FrontierAgent(collection) + self.random_forest = RandomForestAgent() + self.model = joblib.load('ensemble_model.pkl') + self.log("Ensemble Agent is ready") + + def price(self, description: str) -> float: + """ + Run this ensemble model + Ask each of the models to price the product + Then use the Linear Regression model to return the weighted price + :param description: the description of a product + :return: an estimate of its price + """ + self.log("Running Ensemble Agent - collaborating with specialist, frontier and random forest agents") + specialist = self.specialist.price(description) + frontier = self.frontier.price(description) + random_forest = self.random_forest.price(description) + X = pd.DataFrame({ + 'Specialist': [specialist], + 'Frontier': [frontier], + 'RandomForest': [random_forest], + 'Min': [min(specialist, frontier, random_forest)], + 'Max': [max(specialist, frontier, random_forest)], + }) + y = max(0, self.model.predict(X)[0]) + self.log(f"Ensemble Agent complete - returning ${y:.2f}") + return y \ No newline at end of file diff --git a/week8/community_contributions/tochi/agents/frontier_agent.py b/week8/community_contributions/tochi/agents/frontier_agent.py new file mode 100644 index 0000000..88e7fd4 --- /dev/null +++ b/week8/community_contributions/tochi/agents/frontier_agent.py @@ -0,0 +1,113 @@ +# imports + +import os +import re +import math +import json +from typing import List, Dict +from openai import OpenAI +from sentence_transformers import SentenceTransformer +from datasets import load_dataset +import chromadb +from items import Item +from testing import Tester +from agents.agent import Agent + + +class FrontierAgent(Agent): + + name = "Frontier Agent" + color = Agent.BLUE + + MODEL = "gpt-4o-mini" + + def __init__(self, collection): + """ + Set up this instance by connecting to OpenAI or DeepSeek, to the Chroma Datastore, + And setting up the vector encoding model + """ + self.log("Initializing Frontier Agent") + deepseek_api_key = os.getenv("DEEPSEEK_API_KEY") + if deepseek_api_key: + self.client = OpenAI(api_key=deepseek_api_key, base_url="https://api.deepseek.com") + self.MODEL = "deepseek-chat" + self.log("Frontier Agent is set up with DeepSeek") + else: + self.client = OpenAI() + self.MODEL = "gpt-4o-mini" + self.log("Frontier Agent is setting up with OpenAI") + self.collection = collection + self.model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2') + self.log("Frontier Agent is ready") + + def make_context(self, similars: List[str], prices: List[float]) -> str: + """ + Create context that can be inserted into the prompt + :param similars: similar products to the one being estimated + :param prices: prices of the similar products + :return: text to insert in the prompt that provides context + """ + message = "To provide some context, here are some other items that might be similar to the item you need to estimate.\n\n" + for similar, price in zip(similars, prices): + message += f"Potentially related product:\n{similar}\nPrice is ${price:.2f}\n\n" + return message + + def messages_for(self, description: str, similars: List[str], prices: List[float]) -> List[Dict[str, str]]: + """ + Create the message list to be included in a call to OpenAI + With the system and user prompt + :param description: a description of the product + :param similars: similar products to this one + :param prices: prices of similar products + :return: the list of messages in the format expected by OpenAI + """ + system_message = "You estimate prices of items. Reply only with the price, no explanation" + user_prompt = self.make_context(similars, prices) + user_prompt += "And now the question for you:\n\n" + user_prompt += "How much does this cost?\n\n" + description + return [ + {"role": "system", "content": system_message}, + {"role": "user", "content": user_prompt}, + {"role": "assistant", "content": "Price is $"} + ] + + def find_similars(self, description: str): + """ + Return a list of items similar to the given one by looking in the Chroma datastore + """ + self.log("Frontier Agent is performing a RAG search of the Chroma datastore to find 5 similar products") + vector = self.model.encode([description]) + results = self.collection.query(query_embeddings=vector.astype(float).tolist(), n_results=5) + documents = results['documents'][0][:] + prices = [m['price'] for m in results['metadatas'][0][:]] + self.log("Frontier Agent has found similar products") + return documents, prices + + def get_price(self, s) -> float: + """ + A utility that plucks a floating point number out of a string + """ + s = s.replace('$','').replace(',','') + match = re.search(r"[-+]?\d*\.\d+|\d+", s) + return float(match.group()) if match else 0.0 + + def price(self, description: str) -> float: + """ + Make a call to OpenAI or DeepSeek to estimate the price of the described product, + by looking up 5 similar products and including them in the prompt to give context + :param description: a description of the product + :return: an estimate of the price + """ + documents, prices = self.find_similars(description) + self.log(f"Frontier Agent is about to call {self.MODEL} with context including 5 similar products") + response = self.client.chat.completions.create( + model=self.MODEL, + messages=self.messages_for(description, documents, prices), + seed=42, + max_tokens=5 + ) + reply = response.choices[0].message.content + result = self.get_price(reply) + self.log(f"Frontier Agent completed - predicting ${result:.2f}") + return result + \ No newline at end of file diff --git a/week8/community_contributions/tochi/agents/messaging_agent.py b/week8/community_contributions/tochi/agents/messaging_agent.py new file mode 100644 index 0000000..7494703 --- /dev/null +++ b/week8/community_contributions/tochi/agents/messaging_agent.py @@ -0,0 +1,79 @@ +import os +# from twilio.rest import Client +from agents.deals import Opportunity +import http.client +import urllib +from agents.agent import Agent + +# Uncomment the Twilio lines if you wish to use Twilio + +DO_TEXT = False +DO_PUSH = True + +class MessagingAgent(Agent): + + name = "Messaging Agent" + color = Agent.WHITE + + def __init__(self): + """ + Set up this object to either do push notifications via Pushover, + or SMS via Twilio, + whichever is specified in the constants + """ + self.log(f"Messaging Agent is initializing") + if DO_TEXT: + account_sid = os.getenv('TWILIO_ACCOUNT_SID', 'your-sid-if-not-using-env') + auth_token = os.getenv('TWILIO_AUTH_TOKEN', 'your-auth-if-not-using-env') + self.me_from = os.getenv('TWILIO_FROM', 'your-phone-number-if-not-using-env') + self.me_to = os.getenv('MY_PHONE_NUMBER', 'your-phone-number-if-not-using-env') + # self.client = Client(account_sid, auth_token) + self.log("Messaging Agent has initialized Twilio") + if DO_PUSH: + self.pushover_user = os.getenv('PUSHOVER_USER', 'your-pushover-user-if-not-using-env') + self.pushover_token = os.getenv('PUSHOVER_TOKEN', 'your-pushover-user-if-not-using-env') + self.log("Messaging Agent has initialized Pushover") + + def message(self, text): + """ + Send an SMS message using the Twilio API + """ + self.log("Messaging Agent is sending a text message") + message = self.client.messages.create( + from_=self.me_from, + body=text, + to=self.me_to + ) + + def push(self, text): + """ + Send a Push Notification using the Pushover API + """ + self.log("Messaging Agent is sending a push notification") + conn = http.client.HTTPSConnection("api.pushover.net:443") + conn.request("POST", "/1/messages.json", + urllib.parse.urlencode({ + "token": self.pushover_token, + "user": self.pushover_user, + "message": text, + "sound": "cashregister" + }), { "Content-type": "application/x-www-form-urlencoded" }) + conn.getresponse() + + def alert(self, opportunity: Opportunity): + """ + Make an alert about the specified Opportunity + """ + text = f"Deal Alert! Price=${opportunity.deal.price:.2f}, " + text += f"Estimate=${opportunity.estimate:.2f}, " + text += f"Discount=${opportunity.discount:.2f} :" + text += opportunity.deal.product_description[:10]+'... ' + text += opportunity.deal.url + if DO_TEXT: + self.message(text) + if DO_PUSH: + self.push(text) + self.log("Messaging Agent has completed") + + + \ No newline at end of file diff --git a/week8/community_contributions/tochi/agents/planning_agent.py b/week8/community_contributions/tochi/agents/planning_agent.py new file mode 100644 index 0000000..547536a --- /dev/null +++ b/week8/community_contributions/tochi/agents/planning_agent.py @@ -0,0 +1,57 @@ +from typing import Optional, List +from agents.agent import Agent +from agents.deals import ScrapedDeal, DealSelection, Deal, Opportunity +from agents.scanner_agent import ScannerAgent +from agents.ensemble_agent import EnsembleAgent +from agents.messaging_agent import MessagingAgent + + +class PlanningAgent(Agent): + + name = "Planning Agent" + color = Agent.GREEN + DEAL_THRESHOLD = 50 + + def __init__(self, collection): + """ + Create instances of the 3 Agents that this planner coordinates across + """ + self.log("Planning Agent is initializing") + self.scanner = ScannerAgent() + self.ensemble = EnsembleAgent(collection) + self.messenger = MessagingAgent() + self.log("Planning Agent is ready") + + def run(self, deal: Deal) -> Opportunity: + """ + Run the workflow for a particular deal + :param deal: the deal, summarized from an RSS scrape + :returns: an opportunity including the discount + """ + self.log("Planning Agent is pricing up a potential deal") + estimate = self.ensemble.price(deal.product_description) + discount = estimate - deal.price + self.log(f"Planning Agent has processed a deal with discount ${discount:.2f}") + return Opportunity(deal=deal, estimate=estimate, discount=discount) + + def plan(self, memory: List[str] = []) -> Optional[Opportunity]: + """ + Run the full workflow: + 1. Use the ScannerAgent to find deals from RSS feeds + 2. Use the EnsembleAgent to estimate them + 3. Use the MessagingAgent to send a notification of deals + :param memory: a list of URLs that have been surfaced in the past + :return: an Opportunity if one was surfaced, otherwise None + """ + self.log("Planning Agent is kicking off a run") + selection = self.scanner.scan(memory=memory) + if selection: + opportunities = [self.run(deal) for deal in selection.deals[:5]] + opportunities.sort(key=lambda opp: opp.discount, reverse=True) + best = opportunities[0] + self.log(f"Planning Agent has identified the best deal has discount ${best.discount:.2f}") + if best.discount > self.DEAL_THRESHOLD: + self.messenger.alert(best) + self.log("Planning Agent has completed a run") + return best if best.discount > self.DEAL_THRESHOLD else None + return None \ No newline at end of file diff --git a/week8/community_contributions/tochi/agents/random_forest_agent.py b/week8/community_contributions/tochi/agents/random_forest_agent.py new file mode 100644 index 0000000..bfe9715 --- /dev/null +++ b/week8/community_contributions/tochi/agents/random_forest_agent.py @@ -0,0 +1,37 @@ +# imports + +import os +import re +from typing import List +from sentence_transformers import SentenceTransformer +import joblib +from agents.agent import Agent + + + +class RandomForestAgent(Agent): + + name = "Random Forest Agent" + color = Agent.MAGENTA + + def __init__(self): + """ + Initialize this object by loading in the saved model weights + and the SentenceTransformer vector encoding model + """ + self.log("Random Forest Agent is initializing") + self.vectorizer = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2') + self.model = joblib.load('random_forest_model.pkl') + self.log("Random Forest Agent is ready") + + def price(self, description: str) -> float: + """ + Use a Random Forest model to estimate the price of the described item + :param description: the product to be estimated + :return: the price as a float + """ + self.log("Random Forest Agent is starting a prediction") + vector = self.vectorizer.encode([description]) + result = max(0, self.model.predict(vector)[0]) + self.log(f"Random Forest Agent completed - predicting ${result:.2f}") + return result \ No newline at end of file diff --git a/week8/community_contributions/tochi/agents/scanner_agent.py b/week8/community_contributions/tochi/agents/scanner_agent.py new file mode 100644 index 0000000..8dc6674 --- /dev/null +++ b/week8/community_contributions/tochi/agents/scanner_agent.py @@ -0,0 +1,94 @@ +import os +import json +from typing import Optional, List +from openai import OpenAI +from agents.deals import ScrapedDeal, DealSelection +from agents.agent import Agent + + +class ScannerAgent(Agent): + + MODEL = "gpt-4o-mini" + + SYSTEM_PROMPT = """You identify and summarize the 5 most detailed deals from a list, by selecting deals that have the most detailed, high quality description and the most clear price. + Respond strictly in JSON with no explanation, using this format. You should provide the price as a number derived from the description. If the price of a deal isn't clear, do not include that deal in your response. + Most important is that you respond with the 5 deals that have the most detailed product description with price. It's not important to mention the terms of the deal; most important is a thorough description of the product. + Be careful with products that are described as "$XXX off" or "reduced by $XXX" - this isn't the actual price of the product. Only respond with products when you are highly confident about the price. + + {"deals": [ + { + "product_description": "Your clearly expressed summary of the product in 4-5 sentences. Details of the item are much more important than why it's a good deal. Avoid mentioning discounts and coupons; focus on the item itself. There should be a paragpraph of text for each item you choose.", + "price": 99.99, + "url": "the url as provided" + }, + ... + ]}""" + + USER_PROMPT_PREFIX = """Respond with the most promising 5 deals from this list, selecting those which have the most detailed, high quality product description and a clear price that is greater than 0. + Respond strictly in JSON, and only JSON. You should rephrase the description to be a summary of the product itself, not the terms of the deal. + Remember to respond with a paragraph of text in the product_description field for each of the 5 items that you select. + Be careful with products that are described as "$XXX off" or "reduced by $XXX" - this isn't the actual price of the product. Only respond with products when you are highly confident about the price. + + Deals: + + """ + + USER_PROMPT_SUFFIX = "\n\nStrictly respond in JSON and include exactly 5 deals, no more." + + name = "Scanner Agent" + color = Agent.CYAN + + def __init__(self): + """ + Set up this instance by initializing OpenAI + """ + self.log("Scanner Agent is initializing") + self.openai = OpenAI() + self.log("Scanner Agent is ready") + + def fetch_deals(self, memory) -> List[ScrapedDeal]: + """ + Look up deals published on RSS feeds + Return any new deals that are not already in the memory provided + """ + self.log("Scanner Agent is about to fetch deals from RSS feed") + urls = [opp.deal.url for opp in memory] + scraped = ScrapedDeal.fetch() + result = [scrape for scrape in scraped if scrape.url not in urls] + self.log(f"Scanner Agent received {len(result)} deals not already scraped") + return result + + def make_user_prompt(self, scraped) -> str: + """ + Create a user prompt for OpenAI based on the scraped deals provided + """ + user_prompt = self.USER_PROMPT_PREFIX + user_prompt += '\n\n'.join([scrape.describe() for scrape in scraped]) + user_prompt += self.USER_PROMPT_SUFFIX + return user_prompt + + def scan(self, memory: List[str]=[]) -> Optional[DealSelection]: + """ + Call OpenAI to provide a high potential list of deals with good descriptions and prices + Use StructuredOutputs to ensure it conforms to our specifications + :param memory: a list of URLs representing deals already raised + :return: a selection of good deals, or None if there aren't any + """ + scraped = self.fetch_deals(memory) + if scraped: + user_prompt = self.make_user_prompt(scraped) + self.log("Scanner Agent is calling OpenAI using Structured Output") + result = self.openai.beta.chat.completions.parse( + model=self.MODEL, + messages=[ + {"role": "system", "content": self.SYSTEM_PROMPT}, + {"role": "user", "content": user_prompt} + ], + response_format=DealSelection + ) + result = result.choices[0].message.parsed + result.deals = [deal for deal in result.deals if deal.price>0] + self.log(f"Scanner Agent received {len(result.deals)} selected deals with price>0 from OpenAI") + return result + return None + diff --git a/week8/community_contributions/tochi/agents/specialist_agent.py b/week8/community_contributions/tochi/agents/specialist_agent.py new file mode 100644 index 0000000..1bab0d5 --- /dev/null +++ b/week8/community_contributions/tochi/agents/specialist_agent.py @@ -0,0 +1,29 @@ +import modal +from agents.agent import Agent + + +class SpecialistAgent(Agent): + """ + An Agent that runs our fine-tuned LLM that's running remotely on Modal + """ + + name = "Specialist Agent" + color = Agent.RED + + def __init__(self): + """ + Set up this Agent by creating an instance of the modal class + """ + self.log("Specialist Agent is initializing - connecting to modal") + Pricer = modal.Cls.from_name("pricer-service", "Pricer") + self.pricer = Pricer() + self.log("Specialist Agent is ready") + + def price(self, description: str) -> float: + """ + Make a remote call to return the estimate of the price of this item + """ + self.log("Specialist Agent is calling remote fine-tuned model") + result = self.pricer.price.remote(description) + self.log(f"Specialist Agent completed - predicting ${result:.2f}") + return result diff --git a/week8/community_contributions/tochi/autonomous_deal_agent.ipynb b/week8/community_contributions/tochi/autonomous_deal_agent.ipynb new file mode 100644 index 0000000..415d4db --- /dev/null +++ b/week8/community_contributions/tochi/autonomous_deal_agent.ipynb @@ -0,0 +1,1262 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "d5e9f9d1", + "metadata": {}, + "source": [ + "# Autonomous Deal Agent\n", + "\n", + "An intelligent system that automatically discovers, evaluates, and surfaces the best deals from across the web using AI-powered price estimation and web scraping.\n", + "\n", + "## Overview\n", + "\n", + "This project builds an autonomous agent that monitors RSS feeds and online sources for deals, evaluates them using ensemble ML models, and delivers personalized deal notifications to users. The system combines multiple AI technologies including fine-tuned pricing models, GPT-4 for parsing, RAG for context, and intelligent web scraping.\n", + "\n", + "## Key Features\n", + "\n", + "### 🤖 AI-Powered Price Estimation\n", + "- **Fine-tuned Specialist Pricer Model**: Custom-trained model deployed on Modal for accurate price predictions\n", + "- **Ensemble Architecture**: Multiple specialist models work together to determine fair market value\n", + "- **GPT-4 Frontend with RAG**: Advanced context-aware pricing using Retrieval-Augmented Generation\n", + "- **Price Comparison**: Automatically compares deal prices against estimated market value to identify true bargains\n", + "\n", + "### 🕷️ Intelligent Web Scraping\n", + "- **Multi-Source Aggregation**: Scrapes deals from RSS feeds and Reddit\n", + "- **AI-Powered Parser**: Uses frontier AI models (GPT-4) to intelligently parse and extract deal information from various website structures\n", + "- **Adaptive Scraping**: Handles different site formats and layouts automatically\n", + "- **Data Enrichment**: Extracts product details, pricing, features, and purchase links\n", + "\n", + "### 💎 Deal Discovery & Analysis\n", + "- **Automated Deal Scanning**: Continuously monitors configured sources for new deals\n", + "- **Opportunity Detection**: Identifies deals where market price significantly exceeds offer price\n", + "- **Deal Scoring**: Ranks deals by discount percentage and estimated value\n", + "- **Memory System**: Tracks previously surfaced deals to avoid duplicates\n", + "\n", + "### 🖥️ User Interface\n", + "- **Gradio Web UI**: Clean, intuitive interface for browsing discovered deals\n", + "- **Deal Details**: Displays product description, pricing, estimated value, and discount percentage\n", + "- **Direct Purchase Links**: One-click access to deal pages\n", + "\n", + "### 📲 Push Notifications\n", + "- **Real-Time Alerts**: Instant notifications when high-value deals are discovered\n", + "- **Push Notification Integration**: Uses Pushover/similar service for mobile alerts\n", + "- **Customizable Thresholds**: Configure minimum discount percentage for notifications\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "ac199135", + "metadata": {}, + "source": [ + "### Project Imports" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "93f3b7c4", + "metadata": {}, + "outputs": [], + "source": [ + "# Standard library imports\n", + "import os\n", + "import re\n", + "import math\n", + "import json\n", + "import random\n", + "import pickle\n", + "\n", + "# Third-party imports\n", + "from dotenv import load_dotenv\n", + "from huggingface_hub import login\n", + "from tqdm import tqdm\n", + "import numpy as np\n", + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "from openai import OpenAI\n", + "from sentence_transformers import SentenceTransformer\n", + "from datasets import load_dataset\n", + "import chromadb\n", + "from sklearn.ensemble import RandomForestRegressor\n", + "from sklearn.linear_model import LinearRegression\n", + "from sklearn.metrics import mean_squared_error, r2_score\n", + "from sklearn.manifold import TSNE\n", + "import plotly.graph_objects as go\n", + "import modal\n", + "import gradio as gr\n", + "\n", + "# Local imports\n", + "from pricer_ephemeral import app, price\n", + "from items import Item\n", + "from testing import Tester\n", + "from agents.deals import ScrapedDeal, DealSelection, Opportunity, Deal\n", + "from agents.messaging_agent import MessagingAgent\n", + "from deal_agent_framework import DealAgentFramework\n", + "from agents.planning_agent import PlanningAgent" + ] + }, + { + "cell_type": "markdown", + "id": "069dd1ca", + "metadata": {}, + "source": [ + "### Loading environment variables for Open AI and Hugging Face" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "446b3028", + "metadata": {}, + "outputs": [], + "source": [ + "load_dotenv(override=True)\n", + "os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY', 'your-key-if-not-using-env')\n", + "os.environ['HF_TOKEN'] = os.getenv('HF_TOKEN', 'your-key-if-not-using-env')\n", + "DB = \"products_vectorstore\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "17db18f1", + "metadata": {}, + "outputs": [], + "source": [ + "hf_token = os.environ['HF_TOKEN']\n", + "login(hf_token, add_to_git_credential=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4a29d6a1", + "metadata": {}, + "outputs": [], + "source": [ + "from items import Item" + ] + }, + { + "cell_type": "markdown", + "id": "1d1f75b1", + "metadata": {}, + "source": [ + "### Setting up modal to deploy the pricer Service" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a47784fc", + "metadata": {}, + "outputs": [], + "source": [ + "with modal.enable_output():\n", + " with app.run():\n", + " result=price.remote(\"Quadcast HyperX condenser mic, connects via usb-c to your computer for crystal clear audio\")\n", + "result" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1ac2263f", + "metadata": {}, + "outputs": [], + "source": [ + "!modal deploy -m pricer_service" + ] + }, + { + "cell_type": "markdown", + "id": "7daae5f9", + "metadata": {}, + "source": [ + "### Setting up RAG to provide relevant Price Context to GPT - to Improve Accuracy" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "90cd03a0", + "metadata": {}, + "outputs": [], + "source": [ + "with open('train.pkl', 'rb') as file:\n", + " train = pickle.load(file)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "58a3e5a1", + "metadata": {}, + "outputs": [], + "source": [ + "client = chromadb.PersistentClient(path=DB)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2b5910ad", + "metadata": {}, + "outputs": [], + "source": [ + "collection_name = \"products\"\n", + "\n", + "existing_collection_names = client.list_collections()\n", + "\n", + "if collection_name in existing_collection_names:\n", + " client.delete_collection(collection_name)\n", + " print(f\"Deleted existing collection: {collection_name}\")\n", + "client.delete_collection(collection_name)\n", + "collection = client.create_collection(collection_name)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d8b34e0c", + "metadata": {}, + "outputs": [], + "source": [ + "model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d9fb07b2", + "metadata": {}, + "outputs": [], + "source": [ + "# Pass in a list of texts, get back a numpy array of vectors\n", + "\n", + "vector = model.encode([\"Well hi there\"])[0]\n", + "vector" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6fdab9af", + "metadata": {}, + "outputs": [], + "source": [ + "def description(item):\n", + " text = item.prompt.replace(\"How much does this cost to the nearest dollar?\\n\\n\", \"\")\n", + " return text.split(\"\\n\\nPrice is $\")[0]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a9e00e0f", + "metadata": {}, + "outputs": [], + "source": [ + "description(train[0])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2485a02d", + "metadata": {}, + "outputs": [], + "source": [ + "NUMBER_OF_DOCUMENTS = len(train)\n", + "\n", + "for i in tqdm(range(0, NUMBER_OF_DOCUMENTS, 1000)):\n", + " documents = [description(item) for item in train[i: i+1000]]\n", + " vectors = model.encode(documents).astype(float).tolist()\n", + " metadatas = [{\"category\": item.category, \"price\": item.price} for item in train[i: i+1000]]\n", + " ids = [f\"doc_{j}\" for j in range(i, i+len(documents))]\n", + " collection.add(\n", + " ids=ids,\n", + " documents=documents,\n", + " embeddings=vectors,\n", + " metadatas=metadatas\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b16d8ebc", + "metadata": {}, + "outputs": [], + "source": [ + "MAXIMUM_DATAPOINTS = 30_000" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "119197e3", + "metadata": {}, + "outputs": [], + "source": [ + "DB = \"products_vectorstore\"\n", + "client = chromadb.PersistentClient(path=DB)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d9de6312", + "metadata": {}, + "outputs": [], + "source": [ + "collection = client.get_or_create_collection('products')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f467e164", + "metadata": {}, + "outputs": [], + "source": [ + "CATEGORIES = ['Appliances', 'Automotive', 'Cell_Phones_and_Accessories', 'Electronics','Musical_Instruments', 'Office_Products', 'Tools_and_Home_Improvement', 'Toys_and_Games']\n", + "COLORS = ['red', 'blue', 'brown', 'orange', 'yellow', 'green' , 'purple', 'cyan']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e7196f11", + "metadata": {}, + "outputs": [], + "source": [ + "# Prework\n", + "result = collection.get(include=['embeddings', 'documents', 'metadatas'], limit=MAXIMUM_DATAPOINTS)\n", + "vectors = np.array(result['embeddings'])\n", + "documents = result['documents']\n", + "categories = [metadata['category'] for metadata in result['metadatas']]\n", + "colors = [COLORS[CATEGORIES.index(c)] for c in categories]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "559f8a0d", + "metadata": {}, + "outputs": [], + "source": [ + "MAXIMUM_DATAPOINTS = 20_000\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c5074217", + "metadata": {}, + "outputs": [], + "source": [ + "DB = \"products_vectorstore\"\n", + "client = chromadb.PersistentClient(path=DB)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "45142753", + "metadata": {}, + "outputs": [], + "source": [ + "collection = client.get_or_create_collection('products')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9f74b7e7", + "metadata": {}, + "outputs": [], + "source": [ + "CATEGORIES = ['Appliances', 'Automotive', 'Cell_Phones_and_Accessories', 'Electronics','Musical_Instruments', 'Office_Products', 'Tools_and_Home_Improvement', 'Toys_and_Games']\n", + "COLORS = ['red', 'blue', 'brown', 'orange', 'yellow', 'green' , 'purple', 'cyan']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "24ec13d3", + "metadata": {}, + "outputs": [], + "source": [ + "# Prework\n", + "result = collection.get(include=['embeddings', 'documents', 'metadatas'], limit=MAXIMUM_DATAPOINTS)\n", + "vectors = np.array(result['embeddings'])\n", + "documents = result['documents']\n", + "categories = [metadata['category'] for metadata in result['metadatas']]\n", + "colors = [COLORS[CATEGORIES.index(c)] for c in categories]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8b7776fe", + "metadata": {}, + "outputs": [], + "source": [ + "# Let's try 3D!\n", + "\n", + "tsne = TSNE(n_components=3, random_state=42, n_jobs=-1)\n", + "reduced_vectors = tsne.fit_transform(vectors)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5d60238b", + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "# Create the 3D scatter plot\n", + "fig = go.Figure(data=[go.Scatter3d(\n", + " x=reduced_vectors[:, 0],\n", + " y=reduced_vectors[:, 1],\n", + " z=reduced_vectors[:, 2],\n", + " mode='markers',\n", + " marker=dict(size=3, color=colors, opacity=0.7),\n", + ")])\n", + "\n", + "fig.update_layout(\n", + " title='3D Chroma Vector Store Visualization',\n", + " scene=dict(xaxis_title='x', yaxis_title='y', zaxis_title='z'),\n", + " width=1200,\n", + " height=800,\n", + " margin=dict(r=20, b=10, l=10, t=40)\n", + ")\n", + "\n", + "fig.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8919e2ce", + "metadata": {}, + "outputs": [], + "source": [ + "openai = OpenAI()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "610b78ed", + "metadata": {}, + "outputs": [], + "source": [ + "with open('test.pkl', 'rb') as file:\n", + " test = pickle.load(file)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2828141b", + "metadata": {}, + "outputs": [], + "source": [ + "def make_context(similars, prices):\n", + " message = \"To provide some context, here are some other items that might be similar to the item you need to estimate.\\n\\n\"\n", + " for similar, price in zip(similars, prices):\n", + " message += f\"Potentially related product:\\n{similar}\\nPrice is ${price:.2f}\\n\\n\"\n", + " return message" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "495e100c", + "metadata": {}, + "outputs": [], + "source": [ + "def messages_for(item, similars, prices):\n", + " system_message = \"You estimate prices of items. Reply only with the price, no explanation\"\n", + " user_prompt = make_context(similars, prices)\n", + " user_prompt += \"And now the question for you:\\n\\n\"\n", + " user_prompt += item.test_prompt().replace(\" to the nearest dollar\",\"\").replace(\"\\n\\nPrice is $\",\"\")\n", + " return [\n", + " {\"role\": \"system\", \"content\": system_message},\n", + " {\"role\": \"user\", \"content\": user_prompt},\n", + " {\"role\": \"assistant\", \"content\": \"Price is $\"}\n", + " ]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "24425574", + "metadata": {}, + "outputs": [], + "source": [ + "DB = \"products_vectorstore\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "47de76ac", + "metadata": {}, + "outputs": [], + "source": [ + "client = chromadb.PersistentClient(path=DB)\n", + "collection = client.get_or_create_collection('products')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "820fc5b1", + "metadata": {}, + "outputs": [], + "source": [ + "def description(item):\n", + " text = item.prompt.replace(\"How much does this cost to the nearest dollar?\\n\\n\", \"\")\n", + " return text.split(\"\\n\\nPrice is $\")[0]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e4b75b45", + "metadata": {}, + "outputs": [], + "source": [ + "description(test[0])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "32ec6e0a", + "metadata": {}, + "outputs": [], + "source": [ + "model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f5ea1b74", + "metadata": {}, + "outputs": [], + "source": [ + "def vector(item):\n", + " return model.encode([description(item)])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1bf5e72c", + "metadata": {}, + "outputs": [], + "source": [ + "def find_similars(item):\n", + " results = collection.query(query_embeddings=vector(item).astype(float).tolist(), n_results=5)\n", + " documents = results['documents'][0][:]\n", + " prices = [m['price'] for m in results['metadatas'][0][:]]\n", + " return documents, prices" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "68618b73", + "metadata": {}, + "outputs": [], + "source": [ + "print(make_context(documents, prices))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "91539724", + "metadata": {}, + "outputs": [], + "source": [ + "print(messages_for(test[1], documents, prices))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "82b56f8c", + "metadata": {}, + "outputs": [], + "source": [ + "def get_price(s):\n", + " s = s.replace('$','').replace(',','')\n", + " match = re.search(r\"[-+]?\\d*\\.\\d+|\\d+\", s)\n", + " return float(match.group()) if match else 0" + ] + }, + { + "cell_type": "markdown", + "id": "78ae2c5d", + "metadata": {}, + "source": [ + "### GPT 4o Mini - + RAG" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "82f0043a", + "metadata": {}, + "outputs": [], + "source": [ + "# The function for gpt-4o-mini\n", + "\n", + "def gpt_4o_mini_rag(item):\n", + " documents, prices = find_similars(item)\n", + " response = openai.chat.completions.create(\n", + " model=\"gpt-4o-mini\", \n", + " messages=messages_for(item, documents, prices),\n", + " seed=42,\n", + " max_tokens=5\n", + " )\n", + " reply = response.choices[0].message.content\n", + " return get_price(reply)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "650356ef", + "metadata": {}, + "outputs": [], + "source": [ + "gpt_4o_mini_rag(test[1])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d65cf118", + "metadata": {}, + "outputs": [], + "source": [ + "test[1].price" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7d31909a", + "metadata": {}, + "outputs": [], + "source": [ + "Tester.test(gpt_4o_mini_rag, test)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0c932850", + "metadata": {}, + "outputs": [], + "source": [ + "from agents.frontier_agent import FrontierAgent" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "09217080", + "metadata": {}, + "outputs": [], + "source": [ + "agent = FrontierAgent(collection)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f52e5c1f", + "metadata": {}, + "outputs": [], + "source": [ + "client = chromadb.PersistentClient(path=DB)\n", + "collection = client.get_or_create_collection('products')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5cff455b", + "metadata": {}, + "outputs": [], + "source": [ + "result = collection.get(include=['embeddings', 'documents', 'metadatas'])\n", + "vectors = np.array(result['embeddings'])\n", + "documents = result['documents']\n", + "prices = [metadata['price'] for metadata in result['metadatas']]" + ] + }, + { + "cell_type": "markdown", + "id": "18159c68", + "metadata": {}, + "source": [ + "### Finetuning Random Forest Model for product Price Prediction" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ae7ad094", + "metadata": {}, + "outputs": [], + "source": [ + "rf_model = RandomForestRegressor(n_estimators=100, random_state=42, n_jobs=-1)\n", + "rf_model.fit(vectors, prices)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a1b452b9", + "metadata": {}, + "outputs": [], + "source": [ + "joblib.dump(rf_model, 'random_forest_model.pkl')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e433a2cc", + "metadata": {}, + "outputs": [], + "source": [ + "rf_model = joblib.load('random_forest_model.pkl')" + ] + }, + { + "cell_type": "markdown", + "id": "ed4ad6e5", + "metadata": {}, + "source": [ + "### Agents for prediction of the price - RF, Specialist Agent Frontier Agent + RAG" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f89f1bff", + "metadata": {}, + "outputs": [], + "source": [ + "from agents.specialist_agent import SpecialistAgent\n", + "from agents.frontier_agent import FrontierAgent\n", + "from agents.random_forest_agent import RandomForestAgent" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b3e9b35f", + "metadata": {}, + "outputs": [], + "source": [ + "specialist = SpecialistAgent()\n", + "frontier = FrontierAgent(collection)\n", + "random_forest = RandomForestAgent()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c31663e1", + "metadata": {}, + "outputs": [], + "source": [ + "def description(item):\n", + " return item.prompt.split(\"to the nearest dollar?\\n\\n\")[1].split(\"\\n\\nPrice is $\")[0]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "52eadc3f", + "metadata": {}, + "outputs": [], + "source": [ + "def rf(item):\n", + " return random_forest.price(description(item))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "74e75cd3", + "metadata": {}, + "outputs": [], + "source": [ + "Tester.test(rf, test)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "738810cc", + "metadata": {}, + "outputs": [], + "source": [ + "product = \"Quadcast HyperX condenser mic for high quality audio for podcasting\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cf8d8c5d", + "metadata": {}, + "outputs": [], + "source": [ + "print(specialist.price(product))\n", + "print(frontier.price(product))\n", + "print(random_forest.price(product))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5790f240", + "metadata": {}, + "outputs": [], + "source": [ + "specialists = []\n", + "frontiers = []\n", + "random_forests = []\n", + "prices = []\n", + "for item in tqdm(test[1000:1250]):\n", + " text = description(item)\n", + " specialists.append(specialist.price(text))\n", + " frontiers.append(frontier.price(text))\n", + " random_forests.append(random_forest.price(text))\n", + " prices.append(item.price)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8e9676de", + "metadata": {}, + "outputs": [], + "source": [ + "mins = [min(s,f,r) for s,f,r in zip(specialists, frontiers, random_forests)]\n", + "maxes = [max(s,f,r) for s,f,r in zip(specialists, frontiers, random_forests)]\n", + "\n", + "X = pd.DataFrame({\n", + " 'Specialist': specialists,\n", + " 'Frontier': frontiers,\n", + " 'RandomForest': random_forests,\n", + " 'Min': mins,\n", + " 'Max': maxes,\n", + "})\n", + "\n", + "# Convert y to a Series\n", + "y = pd.Series(prices)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fac5cde1", + "metadata": {}, + "outputs": [], + "source": [ + "# Train a Linear Regression\n", + "np.random.seed(42)\n", + "\n", + "lr = LinearRegression()\n", + "lr.fit(X, y)\n", + "\n", + "feature_columns = X.columns.tolist()\n", + "\n", + "for feature, coef in zip(feature_columns, lr.coef_):\n", + " print(f\"{feature}: {coef:.2f}\")\n", + "print(f\"Intercept={lr.intercept_:.2f}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c5a36b10", + "metadata": {}, + "outputs": [], + "source": [ + "joblib.dump(lr, 'ensemble_model.pkl')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3dd2f5d7", + "metadata": {}, + "outputs": [], + "source": [ + "from agents.ensemble_agent import EnsembleAgent\n", + "ensemble = EnsembleAgent(collection)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e12fe631", + "metadata": {}, + "outputs": [], + "source": [ + "ensemble.price(product)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bf48356b", + "metadata": {}, + "outputs": [], + "source": [ + "def ensemble_pricer(item):\n", + " return max(0,ensemble.price(description(item)))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5c26bfc9", + "metadata": {}, + "outputs": [], + "source": [ + "Tester.test(ensemble_pricer, test)" + ] + }, + { + "cell_type": "markdown", + "id": "5b51c5ab", + "metadata": {}, + "source": [ + "### The Scraper Agent - Fetches deals from websites and Formats them specially" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "359d6b41", + "metadata": {}, + "outputs": [], + "source": [ + "deals = ScrapedDeal.fetch(show_progress=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fce72d69", + "metadata": {}, + "outputs": [], + "source": [ + "len(deals)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5f1a724d", + "metadata": {}, + "outputs": [], + "source": [ + "deals[0].describe()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d4fda93c", + "metadata": {}, + "outputs": [], + "source": [ + "system_prompt = \"\"\"You identify and summarize the 5 most detailed deals from a list, by selecting deals that have the most detailed, high quality description and the most clear price.\n", + "Respond strictly in JSON with no explanation, using this format. You should provide the price as a number derived from the description. If the price of a deal isn't clear, do not include that deal in your response.\n", + "Most important is that you respond with the 5 deals that have the most detailed product description with price. It's not important to mention the terms of the deal; most important is a thorough description of the product.\n", + "Be careful with products that are described as \"$XXX off\" or \"reduced by $XXX\" - this isn't the actual price of the product. Only respond with products when you are highly confident about the price. \n", + "\n", + "{\"deals\": [\n", + " {\n", + " \"product_description\": \"Your clearly expressed summary of the product in 4-5 sentences. Details of the item are much more important than why it's a good deal. Avoid mentioning discounts and coupons; focus on the item itself. There should be a paragpraph of text for each item you choose.\",\n", + " \"price\": 99.99,\n", + " \"url\": \"the url as provided\"\n", + " },\n", + " ...\n", + "]}\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2e4223a1", + "metadata": {}, + "outputs": [], + "source": [ + "user_prompt = \"\"\"Respond with the most promising 5 deals from this list, selecting those which have the most detailed, high quality product description and a clear price.\n", + "Respond strictly in JSON, and only JSON. You should rephrase the description to be a summary of the product itself, not the terms of the deal.\n", + "Remember to respond with a paragraph of text in the product_description field for each of the 5 items that you select.\n", + "Be careful with products that are described as \"$XXX off\" or \"reduced by $XXX\" - this isn't the actual price of the product. Only respond with products when you are highly confident about the price. \n", + "\n", + "Deals:\n", + "\n", + "\"\"\"\n", + "user_prompt += '\\n\\n'.join([deal.describe() for deal in deals])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d1395a64", + "metadata": {}, + "outputs": [], + "source": [ + "print(user_prompt[:2000])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5e2287b6", + "metadata": {}, + "outputs": [], + "source": [ + "def get_recommendations():\n", + " completion = openai.beta.chat.completions.parse(\n", + " model=\"gpt-4o-mini\",\n", + " messages=[\n", + " {\"role\": \"system\", \"content\": system_prompt},\n", + " {\"role\": \"user\", \"content\": user_prompt}\n", + " ],\n", + " response_format=DealSelection\n", + " )\n", + " result = completion.choices[0].message.parsed\n", + " return result" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c4341005", + "metadata": {}, + "outputs": [], + "source": [ + "result = get_recommendations()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "04a16f22", + "metadata": {}, + "outputs": [], + "source": [ + "len(result.deals)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "664e37f8", + "metadata": {}, + "outputs": [], + "source": [ + "result.deals[1]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "89a7a438", + "metadata": {}, + "outputs": [], + "source": [ + "from agents.scanner_agent import ScannerAgent" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "93195299", + "metadata": {}, + "outputs": [], + "source": [ + "agent = ScannerAgent()\n", + "result = agent.scan()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "66a360ea", + "metadata": {}, + "outputs": [], + "source": [ + "result" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "77487599", + "metadata": {}, + "outputs": [], + "source": [ + "DB = \"products_vectorstore\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "94aef186", + "metadata": {}, + "outputs": [], + "source": [ + "agent = MessagingAgent()" + ] + }, + { + "cell_type": "markdown", + "id": "4e43ea7d", + "metadata": {}, + "source": [ + "### Planning Agent " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c25239d7", + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "DB = \"products_vectorstore\"\n", + "client = chromadb.PersistentClient(path=DB)\n", + "collection = client.get_or_create_collection('products')\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8fb32dde", + "metadata": {}, + "outputs": [], + "source": [ + "planner = PlanningAgent(collection)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d8dcb93b", + "metadata": {}, + "outputs": [], + "source": [ + "planner.plan()" + ] + }, + { + "cell_type": "markdown", + "id": "4a72326b", + "metadata": {}, + "source": [ + "### Gradio UI For Data Visualization" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "385e69cc", + "metadata": {}, + "outputs": [], + "source": [ + "agent_framework = DealAgentFramework()\n", + "agent_framework.init_agents_as_needed()\n", + "\n", + "with gr.Blocks(title=\"The Price is Right\", fill_width=True) as ui:\n", + "\n", + " initial_deal = Deal(product_description=\"Example description\", price=100.0, url=\"https://cnn.com\")\n", + " initial_opportunity = Opportunity(deal=initial_deal, estimate=200.0, discount=100.0)\n", + " opportunities = gr.State([initial_opportunity])\n", + "\n", + " def get_table(opps):\n", + " return [[opp.deal.product_description, opp.deal.price, opp.estimate, opp.discount, opp.deal.url] for opp in opps]\n", + "\n", + " def do_select(opportunities, selected_index: gr.SelectData):\n", + " row = selected_index.index[0]\n", + " opportunity = opportunities[row]\n", + " agent_framework.planner.messenger.alert(opportunity)\n", + "\n", + " with gr.Row():\n", + " gr.Markdown('