Submission for Week 8
This commit is contained in:
33
week8/community_contributions/tochi/agents/agent.py
Normal file
33
week8/community_contributions/tochi/agents/agent.py
Normal file
@@ -0,0 +1,33 @@
|
||||
import logging
|
||||
|
||||
class Agent:
|
||||
"""
|
||||
An abstract superclass for Agents
|
||||
Used to log messages in a way that can identify each Agent
|
||||
"""
|
||||
|
||||
# Foreground colors
|
||||
RED = '\033[31m'
|
||||
GREEN = '\033[32m'
|
||||
YELLOW = '\033[33m'
|
||||
BLUE = '\033[34m'
|
||||
MAGENTA = '\033[35m'
|
||||
CYAN = '\033[36m'
|
||||
WHITE = '\033[37m'
|
||||
|
||||
# Background color
|
||||
BG_BLACK = '\033[40m'
|
||||
|
||||
# Reset code to return to default color
|
||||
RESET = '\033[0m'
|
||||
|
||||
name: str = ""
|
||||
color: str = '\033[37m'
|
||||
|
||||
def log(self, message):
|
||||
"""
|
||||
Log this as an info message, identifying the agent
|
||||
"""
|
||||
color_code = self.BG_BLACK + self.color
|
||||
message = f"[{self.name}] {message}"
|
||||
logging.info(color_code + message + self.RESET)
|
||||
233
week8/community_contributions/tochi/agents/deals.py
Normal file
233
week8/community_contributions/tochi/agents/deals.py
Normal file
@@ -0,0 +1,233 @@
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
from pydantic import BaseModel
|
||||
from typing import List, Dict, Self
|
||||
import feedparser
|
||||
from tqdm import tqdm
|
||||
import time
|
||||
from openai import OpenAI
|
||||
from typing import Optional
|
||||
import json
|
||||
|
||||
|
||||
load_dotenv(override=True)
|
||||
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY", "your-key-if-not-using-env")
|
||||
|
||||
openai = OpenAI()
|
||||
|
||||
feeds = [
|
||||
"https://www.dealnews.com/c142/Electronics/?rss=1",
|
||||
"https://www.dealnews.com/c39/Computers/?rss=1",
|
||||
"https://www.dealnews.com/c238/Automotive/?rss=1",
|
||||
"https://www.dealnews.com/f1912/Smart-Home/?rss=1",
|
||||
"https://www.dealnews.com/c196/Home-Garden/?rss=1",
|
||||
"https://www.reddit.com/r/buildapcsales.rss",
|
||||
"https://www.reddit.com/r/deals.rss",
|
||||
]
|
||||
|
||||
SYSTEM_PROMPT = """
|
||||
You are an RSS feed parser specializing in extracting deal information. Your task is to analyze content and extract structured data.
|
||||
|
||||
# INPUT TYPES
|
||||
You will receive one of two input types:
|
||||
|
||||
**TYPE 1: RSS Feed Entry Data**
|
||||
- May contain fields like: title, summary, description, link
|
||||
- Summary/description often contains HTML with deal details
|
||||
- Multiple URL fields may exist (link, links array, etc.)
|
||||
|
||||
**TYPE 2: HTML Page Content**
|
||||
- Raw HTML from a deal webpage
|
||||
- Contains product information, pricing, and purchase links
|
||||
|
||||
# TASK
|
||||
Extract and structure the following information:
|
||||
1. **title**: The deal's headline or main title
|
||||
- For RSS entries: Use the entry's title field directly
|
||||
- For HTML: Extract the main product/deal title
|
||||
|
||||
2. **summary**: A concise summary of the deal (2-3 sentences max), focusing on:
|
||||
- What is being offered (product name, specs)
|
||||
- Key terms (price, discount percentage, original price)
|
||||
- Important conditions (promo codes, shipping, availability, refurb/new condition)
|
||||
- Strip ALL HTML tags and formatting
|
||||
|
||||
3. **url**: The primary link where users can access the deal
|
||||
- Prioritize direct product/deal purchase links
|
||||
- Avoid tracking links, RSS links with "?rss=1" or "?iref=rss"
|
||||
- For RSS entries, use the "link" field or first link in "links" array
|
||||
|
||||
# EXTRACTION RULES
|
||||
- **From RSS entries**: Parse the 'summary' or 'description' HTML to extract deal details
|
||||
- **Clean all HTML**: Remove <img>, <div>, <p>, <ul>, <li>, and all other tags
|
||||
- **Extract pricing**: Include specific dollar amounts, percentages, and comparisons
|
||||
- **Extract conditions**: Note promo codes, refurb status, warranty info, shipping details
|
||||
- **URL priority**: Direct deal link > product page > category page
|
||||
- **Handle missing data**: Use null for any truly missing required field
|
||||
|
||||
# OUTPUT FORMAT
|
||||
Return ONLY valid JSON with this exact structure:
|
||||
{
|
||||
"title": "string",
|
||||
"summary": "string",
|
||||
"url": "string"
|
||||
}
|
||||
|
||||
Do not include any additional text, explanations, or markdown formatting - only the JSON object.
|
||||
|
||||
# EXAMPLES
|
||||
|
||||
**Input (RSS Entry)**:
|
||||
```
|
||||
title: "Sony Headphones for $99 + free shipping"
|
||||
summary: "<p>Was $199, now $99. Use code SAVE50.</p>"
|
||||
link: "https://example.com/deal?iref=rss-c142"
|
||||
```
|
||||
|
||||
**Output**:
|
||||
```json
|
||||
{
|
||||
"title": "Sony Headphones for $99 + free shipping",
|
||||
"summary": "Sony Headphones originally priced at $199, now available for $99 with free shipping. Use promo code SAVE50 at checkout.",
|
||||
"url": "https://example.com/deal"
|
||||
}
|
||||
```
|
||||
"""
|
||||
|
||||
|
||||
def gpt_parse(soup: str) -> Optional[Dict[str, str]]:
|
||||
"""
|
||||
Parse RSS feed content using GPT to extract title, summary, and URL.
|
||||
|
||||
Args:
|
||||
soup: Raw RSS feed content (HTML/text)
|
||||
|
||||
Returns:
|
||||
Dictionary with title, summary, url keys or None if parsing fails
|
||||
"""
|
||||
|
||||
text_to_summarize = soup
|
||||
if not text_to_summarize:
|
||||
return None
|
||||
|
||||
try:
|
||||
response = openai.chat.completions.create(
|
||||
model="gpt-4o-mini",
|
||||
temperature=0.2,
|
||||
messages=[
|
||||
{"role": "system", "content": SYSTEM_PROMPT},
|
||||
{"role": "user", "content": text_to_summarize},
|
||||
],
|
||||
)
|
||||
res_text = response.choices[0].message.content
|
||||
parsed_data = json.loads(res_text)
|
||||
|
||||
if all(
|
||||
key in parsed_data and parsed_data[key]
|
||||
for key in ["title", "summary", "url"]
|
||||
):
|
||||
return {
|
||||
"title": parsed_data["title"],
|
||||
"summary": parsed_data["summary"],
|
||||
"url": parsed_data["url"],
|
||||
}
|
||||
else:
|
||||
print(f"Missing or empty required fields in response: {parsed_data}")
|
||||
return None
|
||||
|
||||
except json.JSONDecodeError as e:
|
||||
print(f"Error parsing JSON from OpenAI response: {e}")
|
||||
return None
|
||||
except Exception as e:
|
||||
print(f"Error calling OpenAI: {e}")
|
||||
return None
|
||||
|
||||
class ScrapedDeal:
|
||||
"""
|
||||
A class to represent a Deal retrieved from an RSS feed
|
||||
"""
|
||||
|
||||
category: str
|
||||
title: str
|
||||
summary: str
|
||||
url: str
|
||||
details: str
|
||||
features: str
|
||||
|
||||
def __init__(self, entry: Dict[str, str]):
|
||||
"""
|
||||
Populate this instance based on the provided dict
|
||||
"""
|
||||
|
||||
self.title = entry["title"]
|
||||
self.summary = entry["summary"]
|
||||
self.url = entry["url"]
|
||||
self.details = self.summary
|
||||
self.features = ""
|
||||
|
||||
def __repr__(self):
|
||||
"""
|
||||
Return a string to describe this deal
|
||||
"""
|
||||
return f"<{self.title}>"
|
||||
|
||||
def describe(self):
|
||||
"""
|
||||
Return a longer string to describe this deal for use in calling a model
|
||||
"""
|
||||
return f"Title: {self.title}\nDetails: {self.details.strip()}\nFeatures: {self.features.strip()}\nURL: {self.url}"
|
||||
|
||||
@classmethod
|
||||
def fetch(cls, show_progress: bool = False) -> List[Self]:
|
||||
"""
|
||||
Retrieve all deals from the selected RSS feeds
|
||||
"""
|
||||
deals = []
|
||||
skipped = 0
|
||||
|
||||
feed_iter = tqdm(feeds) if show_progress else feeds
|
||||
for feed_url in feed_iter:
|
||||
feed = feedparser.parse(feed_url)
|
||||
for entry in feed.entries[:10]:
|
||||
try:
|
||||
parsed_deal = gpt_parse(json.dumps(entry))
|
||||
deals.append(cls(parsed_deal))
|
||||
deals.append(cls(entry))
|
||||
time.sleep(0.5)
|
||||
except Exception as e:
|
||||
skipped += 1
|
||||
print(f"Skipping deal: {str(e)}")
|
||||
continue
|
||||
|
||||
print(f"Fetched {len(deals)} deals successfully, skipped {skipped}")
|
||||
return deals
|
||||
|
||||
|
||||
class Deal(BaseModel):
|
||||
"""
|
||||
A class to Represent a Deal with a summary description
|
||||
"""
|
||||
|
||||
product_description: str
|
||||
price: float
|
||||
url: str
|
||||
|
||||
|
||||
class DealSelection(BaseModel):
|
||||
"""
|
||||
A class to Represent a list of Deals
|
||||
"""
|
||||
|
||||
deals: List[Deal]
|
||||
|
||||
|
||||
class Opportunity(BaseModel):
|
||||
"""
|
||||
A class to represent a possible opportunity: a Deal where we estimate
|
||||
it should cost more than it's being offered
|
||||
"""
|
||||
|
||||
deal: Deal
|
||||
estimate: float
|
||||
discount: float
|
||||
|
||||
48
week8/community_contributions/tochi/agents/ensemble_agent.py
Normal file
48
week8/community_contributions/tochi/agents/ensemble_agent.py
Normal file
@@ -0,0 +1,48 @@
|
||||
import pandas as pd
|
||||
from sklearn.linear_model import LinearRegression
|
||||
import joblib
|
||||
|
||||
from agents.agent import Agent
|
||||
from agents.specialist_agent import SpecialistAgent
|
||||
from agents.frontier_agent import FrontierAgent
|
||||
from agents.random_forest_agent import RandomForestAgent
|
||||
|
||||
class EnsembleAgent(Agent):
|
||||
|
||||
name = "Ensemble Agent"
|
||||
color = Agent.YELLOW
|
||||
|
||||
def __init__(self, collection):
|
||||
"""
|
||||
Create an instance of Ensemble, by creating each of the models
|
||||
And loading the weights of the Ensemble
|
||||
"""
|
||||
self.log("Initializing Ensemble Agent")
|
||||
self.specialist = SpecialistAgent()
|
||||
self.frontier = FrontierAgent(collection)
|
||||
self.random_forest = RandomForestAgent()
|
||||
self.model = joblib.load('ensemble_model.pkl')
|
||||
self.log("Ensemble Agent is ready")
|
||||
|
||||
def price(self, description: str) -> float:
|
||||
"""
|
||||
Run this ensemble model
|
||||
Ask each of the models to price the product
|
||||
Then use the Linear Regression model to return the weighted price
|
||||
:param description: the description of a product
|
||||
:return: an estimate of its price
|
||||
"""
|
||||
self.log("Running Ensemble Agent - collaborating with specialist, frontier and random forest agents")
|
||||
specialist = self.specialist.price(description)
|
||||
frontier = self.frontier.price(description)
|
||||
random_forest = self.random_forest.price(description)
|
||||
X = pd.DataFrame({
|
||||
'Specialist': [specialist],
|
||||
'Frontier': [frontier],
|
||||
'RandomForest': [random_forest],
|
||||
'Min': [min(specialist, frontier, random_forest)],
|
||||
'Max': [max(specialist, frontier, random_forest)],
|
||||
})
|
||||
y = max(0, self.model.predict(X)[0])
|
||||
self.log(f"Ensemble Agent complete - returning ${y:.2f}")
|
||||
return y
|
||||
113
week8/community_contributions/tochi/agents/frontier_agent.py
Normal file
113
week8/community_contributions/tochi/agents/frontier_agent.py
Normal file
@@ -0,0 +1,113 @@
|
||||
# imports
|
||||
|
||||
import os
|
||||
import re
|
||||
import math
|
||||
import json
|
||||
from typing import List, Dict
|
||||
from openai import OpenAI
|
||||
from sentence_transformers import SentenceTransformer
|
||||
from datasets import load_dataset
|
||||
import chromadb
|
||||
from items import Item
|
||||
from testing import Tester
|
||||
from agents.agent import Agent
|
||||
|
||||
|
||||
class FrontierAgent(Agent):
|
||||
|
||||
name = "Frontier Agent"
|
||||
color = Agent.BLUE
|
||||
|
||||
MODEL = "gpt-4o-mini"
|
||||
|
||||
def __init__(self, collection):
|
||||
"""
|
||||
Set up this instance by connecting to OpenAI or DeepSeek, to the Chroma Datastore,
|
||||
And setting up the vector encoding model
|
||||
"""
|
||||
self.log("Initializing Frontier Agent")
|
||||
deepseek_api_key = os.getenv("DEEPSEEK_API_KEY")
|
||||
if deepseek_api_key:
|
||||
self.client = OpenAI(api_key=deepseek_api_key, base_url="https://api.deepseek.com")
|
||||
self.MODEL = "deepseek-chat"
|
||||
self.log("Frontier Agent is set up with DeepSeek")
|
||||
else:
|
||||
self.client = OpenAI()
|
||||
self.MODEL = "gpt-4o-mini"
|
||||
self.log("Frontier Agent is setting up with OpenAI")
|
||||
self.collection = collection
|
||||
self.model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
|
||||
self.log("Frontier Agent is ready")
|
||||
|
||||
def make_context(self, similars: List[str], prices: List[float]) -> str:
|
||||
"""
|
||||
Create context that can be inserted into the prompt
|
||||
:param similars: similar products to the one being estimated
|
||||
:param prices: prices of the similar products
|
||||
:return: text to insert in the prompt that provides context
|
||||
"""
|
||||
message = "To provide some context, here are some other items that might be similar to the item you need to estimate.\n\n"
|
||||
for similar, price in zip(similars, prices):
|
||||
message += f"Potentially related product:\n{similar}\nPrice is ${price:.2f}\n\n"
|
||||
return message
|
||||
|
||||
def messages_for(self, description: str, similars: List[str], prices: List[float]) -> List[Dict[str, str]]:
|
||||
"""
|
||||
Create the message list to be included in a call to OpenAI
|
||||
With the system and user prompt
|
||||
:param description: a description of the product
|
||||
:param similars: similar products to this one
|
||||
:param prices: prices of similar products
|
||||
:return: the list of messages in the format expected by OpenAI
|
||||
"""
|
||||
system_message = "You estimate prices of items. Reply only with the price, no explanation"
|
||||
user_prompt = self.make_context(similars, prices)
|
||||
user_prompt += "And now the question for you:\n\n"
|
||||
user_prompt += "How much does this cost?\n\n" + description
|
||||
return [
|
||||
{"role": "system", "content": system_message},
|
||||
{"role": "user", "content": user_prompt},
|
||||
{"role": "assistant", "content": "Price is $"}
|
||||
]
|
||||
|
||||
def find_similars(self, description: str):
|
||||
"""
|
||||
Return a list of items similar to the given one by looking in the Chroma datastore
|
||||
"""
|
||||
self.log("Frontier Agent is performing a RAG search of the Chroma datastore to find 5 similar products")
|
||||
vector = self.model.encode([description])
|
||||
results = self.collection.query(query_embeddings=vector.astype(float).tolist(), n_results=5)
|
||||
documents = results['documents'][0][:]
|
||||
prices = [m['price'] for m in results['metadatas'][0][:]]
|
||||
self.log("Frontier Agent has found similar products")
|
||||
return documents, prices
|
||||
|
||||
def get_price(self, s) -> float:
|
||||
"""
|
||||
A utility that plucks a floating point number out of a string
|
||||
"""
|
||||
s = s.replace('$','').replace(',','')
|
||||
match = re.search(r"[-+]?\d*\.\d+|\d+", s)
|
||||
return float(match.group()) if match else 0.0
|
||||
|
||||
def price(self, description: str) -> float:
|
||||
"""
|
||||
Make a call to OpenAI or DeepSeek to estimate the price of the described product,
|
||||
by looking up 5 similar products and including them in the prompt to give context
|
||||
:param description: a description of the product
|
||||
:return: an estimate of the price
|
||||
"""
|
||||
documents, prices = self.find_similars(description)
|
||||
self.log(f"Frontier Agent is about to call {self.MODEL} with context including 5 similar products")
|
||||
response = self.client.chat.completions.create(
|
||||
model=self.MODEL,
|
||||
messages=self.messages_for(description, documents, prices),
|
||||
seed=42,
|
||||
max_tokens=5
|
||||
)
|
||||
reply = response.choices[0].message.content
|
||||
result = self.get_price(reply)
|
||||
self.log(f"Frontier Agent completed - predicting ${result:.2f}")
|
||||
return result
|
||||
|
||||
@@ -0,0 +1,79 @@
|
||||
import os
|
||||
# from twilio.rest import Client
|
||||
from agents.deals import Opportunity
|
||||
import http.client
|
||||
import urllib
|
||||
from agents.agent import Agent
|
||||
|
||||
# Uncomment the Twilio lines if you wish to use Twilio
|
||||
|
||||
DO_TEXT = False
|
||||
DO_PUSH = True
|
||||
|
||||
class MessagingAgent(Agent):
|
||||
|
||||
name = "Messaging Agent"
|
||||
color = Agent.WHITE
|
||||
|
||||
def __init__(self):
|
||||
"""
|
||||
Set up this object to either do push notifications via Pushover,
|
||||
or SMS via Twilio,
|
||||
whichever is specified in the constants
|
||||
"""
|
||||
self.log(f"Messaging Agent is initializing")
|
||||
if DO_TEXT:
|
||||
account_sid = os.getenv('TWILIO_ACCOUNT_SID', 'your-sid-if-not-using-env')
|
||||
auth_token = os.getenv('TWILIO_AUTH_TOKEN', 'your-auth-if-not-using-env')
|
||||
self.me_from = os.getenv('TWILIO_FROM', 'your-phone-number-if-not-using-env')
|
||||
self.me_to = os.getenv('MY_PHONE_NUMBER', 'your-phone-number-if-not-using-env')
|
||||
# self.client = Client(account_sid, auth_token)
|
||||
self.log("Messaging Agent has initialized Twilio")
|
||||
if DO_PUSH:
|
||||
self.pushover_user = os.getenv('PUSHOVER_USER', 'your-pushover-user-if-not-using-env')
|
||||
self.pushover_token = os.getenv('PUSHOVER_TOKEN', 'your-pushover-user-if-not-using-env')
|
||||
self.log("Messaging Agent has initialized Pushover")
|
||||
|
||||
def message(self, text):
|
||||
"""
|
||||
Send an SMS message using the Twilio API
|
||||
"""
|
||||
self.log("Messaging Agent is sending a text message")
|
||||
message = self.client.messages.create(
|
||||
from_=self.me_from,
|
||||
body=text,
|
||||
to=self.me_to
|
||||
)
|
||||
|
||||
def push(self, text):
|
||||
"""
|
||||
Send a Push Notification using the Pushover API
|
||||
"""
|
||||
self.log("Messaging Agent is sending a push notification")
|
||||
conn = http.client.HTTPSConnection("api.pushover.net:443")
|
||||
conn.request("POST", "/1/messages.json",
|
||||
urllib.parse.urlencode({
|
||||
"token": self.pushover_token,
|
||||
"user": self.pushover_user,
|
||||
"message": text,
|
||||
"sound": "cashregister"
|
||||
}), { "Content-type": "application/x-www-form-urlencoded" })
|
||||
conn.getresponse()
|
||||
|
||||
def alert(self, opportunity: Opportunity):
|
||||
"""
|
||||
Make an alert about the specified Opportunity
|
||||
"""
|
||||
text = f"Deal Alert! Price=${opportunity.deal.price:.2f}, "
|
||||
text += f"Estimate=${opportunity.estimate:.2f}, "
|
||||
text += f"Discount=${opportunity.discount:.2f} :"
|
||||
text += opportunity.deal.product_description[:10]+'... '
|
||||
text += opportunity.deal.url
|
||||
if DO_TEXT:
|
||||
self.message(text)
|
||||
if DO_PUSH:
|
||||
self.push(text)
|
||||
self.log("Messaging Agent has completed")
|
||||
|
||||
|
||||
|
||||
57
week8/community_contributions/tochi/agents/planning_agent.py
Normal file
57
week8/community_contributions/tochi/agents/planning_agent.py
Normal file
@@ -0,0 +1,57 @@
|
||||
from typing import Optional, List
|
||||
from agents.agent import Agent
|
||||
from agents.deals import ScrapedDeal, DealSelection, Deal, Opportunity
|
||||
from agents.scanner_agent import ScannerAgent
|
||||
from agents.ensemble_agent import EnsembleAgent
|
||||
from agents.messaging_agent import MessagingAgent
|
||||
|
||||
|
||||
class PlanningAgent(Agent):
|
||||
|
||||
name = "Planning Agent"
|
||||
color = Agent.GREEN
|
||||
DEAL_THRESHOLD = 50
|
||||
|
||||
def __init__(self, collection):
|
||||
"""
|
||||
Create instances of the 3 Agents that this planner coordinates across
|
||||
"""
|
||||
self.log("Planning Agent is initializing")
|
||||
self.scanner = ScannerAgent()
|
||||
self.ensemble = EnsembleAgent(collection)
|
||||
self.messenger = MessagingAgent()
|
||||
self.log("Planning Agent is ready")
|
||||
|
||||
def run(self, deal: Deal) -> Opportunity:
|
||||
"""
|
||||
Run the workflow for a particular deal
|
||||
:param deal: the deal, summarized from an RSS scrape
|
||||
:returns: an opportunity including the discount
|
||||
"""
|
||||
self.log("Planning Agent is pricing up a potential deal")
|
||||
estimate = self.ensemble.price(deal.product_description)
|
||||
discount = estimate - deal.price
|
||||
self.log(f"Planning Agent has processed a deal with discount ${discount:.2f}")
|
||||
return Opportunity(deal=deal, estimate=estimate, discount=discount)
|
||||
|
||||
def plan(self, memory: List[str] = []) -> Optional[Opportunity]:
|
||||
"""
|
||||
Run the full workflow:
|
||||
1. Use the ScannerAgent to find deals from RSS feeds
|
||||
2. Use the EnsembleAgent to estimate them
|
||||
3. Use the MessagingAgent to send a notification of deals
|
||||
:param memory: a list of URLs that have been surfaced in the past
|
||||
:return: an Opportunity if one was surfaced, otherwise None
|
||||
"""
|
||||
self.log("Planning Agent is kicking off a run")
|
||||
selection = self.scanner.scan(memory=memory)
|
||||
if selection:
|
||||
opportunities = [self.run(deal) for deal in selection.deals[:5]]
|
||||
opportunities.sort(key=lambda opp: opp.discount, reverse=True)
|
||||
best = opportunities[0]
|
||||
self.log(f"Planning Agent has identified the best deal has discount ${best.discount:.2f}")
|
||||
if best.discount > self.DEAL_THRESHOLD:
|
||||
self.messenger.alert(best)
|
||||
self.log("Planning Agent has completed a run")
|
||||
return best if best.discount > self.DEAL_THRESHOLD else None
|
||||
return None
|
||||
@@ -0,0 +1,37 @@
|
||||
# imports
|
||||
|
||||
import os
|
||||
import re
|
||||
from typing import List
|
||||
from sentence_transformers import SentenceTransformer
|
||||
import joblib
|
||||
from agents.agent import Agent
|
||||
|
||||
|
||||
|
||||
class RandomForestAgent(Agent):
|
||||
|
||||
name = "Random Forest Agent"
|
||||
color = Agent.MAGENTA
|
||||
|
||||
def __init__(self):
|
||||
"""
|
||||
Initialize this object by loading in the saved model weights
|
||||
and the SentenceTransformer vector encoding model
|
||||
"""
|
||||
self.log("Random Forest Agent is initializing")
|
||||
self.vectorizer = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
|
||||
self.model = joblib.load('random_forest_model.pkl')
|
||||
self.log("Random Forest Agent is ready")
|
||||
|
||||
def price(self, description: str) -> float:
|
||||
"""
|
||||
Use a Random Forest model to estimate the price of the described item
|
||||
:param description: the product to be estimated
|
||||
:return: the price as a float
|
||||
"""
|
||||
self.log("Random Forest Agent is starting a prediction")
|
||||
vector = self.vectorizer.encode([description])
|
||||
result = max(0, self.model.predict(vector)[0])
|
||||
self.log(f"Random Forest Agent completed - predicting ${result:.2f}")
|
||||
return result
|
||||
94
week8/community_contributions/tochi/agents/scanner_agent.py
Normal file
94
week8/community_contributions/tochi/agents/scanner_agent.py
Normal file
@@ -0,0 +1,94 @@
|
||||
import os
|
||||
import json
|
||||
from typing import Optional, List
|
||||
from openai import OpenAI
|
||||
from agents.deals import ScrapedDeal, DealSelection
|
||||
from agents.agent import Agent
|
||||
|
||||
|
||||
class ScannerAgent(Agent):
|
||||
|
||||
MODEL = "gpt-4o-mini"
|
||||
|
||||
SYSTEM_PROMPT = """You identify and summarize the 5 most detailed deals from a list, by selecting deals that have the most detailed, high quality description and the most clear price.
|
||||
Respond strictly in JSON with no explanation, using this format. You should provide the price as a number derived from the description. If the price of a deal isn't clear, do not include that deal in your response.
|
||||
Most important is that you respond with the 5 deals that have the most detailed product description with price. It's not important to mention the terms of the deal; most important is a thorough description of the product.
|
||||
Be careful with products that are described as "$XXX off" or "reduced by $XXX" - this isn't the actual price of the product. Only respond with products when you are highly confident about the price.
|
||||
|
||||
{"deals": [
|
||||
{
|
||||
"product_description": "Your clearly expressed summary of the product in 4-5 sentences. Details of the item are much more important than why it's a good deal. Avoid mentioning discounts and coupons; focus on the item itself. There should be a paragpraph of text for each item you choose.",
|
||||
"price": 99.99,
|
||||
"url": "the url as provided"
|
||||
},
|
||||
...
|
||||
]}"""
|
||||
|
||||
USER_PROMPT_PREFIX = """Respond with the most promising 5 deals from this list, selecting those which have the most detailed, high quality product description and a clear price that is greater than 0.
|
||||
Respond strictly in JSON, and only JSON. You should rephrase the description to be a summary of the product itself, not the terms of the deal.
|
||||
Remember to respond with a paragraph of text in the product_description field for each of the 5 items that you select.
|
||||
Be careful with products that are described as "$XXX off" or "reduced by $XXX" - this isn't the actual price of the product. Only respond with products when you are highly confident about the price.
|
||||
|
||||
Deals:
|
||||
|
||||
"""
|
||||
|
||||
USER_PROMPT_SUFFIX = "\n\nStrictly respond in JSON and include exactly 5 deals, no more."
|
||||
|
||||
name = "Scanner Agent"
|
||||
color = Agent.CYAN
|
||||
|
||||
def __init__(self):
|
||||
"""
|
||||
Set up this instance by initializing OpenAI
|
||||
"""
|
||||
self.log("Scanner Agent is initializing")
|
||||
self.openai = OpenAI()
|
||||
self.log("Scanner Agent is ready")
|
||||
|
||||
def fetch_deals(self, memory) -> List[ScrapedDeal]:
|
||||
"""
|
||||
Look up deals published on RSS feeds
|
||||
Return any new deals that are not already in the memory provided
|
||||
"""
|
||||
self.log("Scanner Agent is about to fetch deals from RSS feed")
|
||||
urls = [opp.deal.url for opp in memory]
|
||||
scraped = ScrapedDeal.fetch()
|
||||
result = [scrape for scrape in scraped if scrape.url not in urls]
|
||||
self.log(f"Scanner Agent received {len(result)} deals not already scraped")
|
||||
return result
|
||||
|
||||
def make_user_prompt(self, scraped) -> str:
|
||||
"""
|
||||
Create a user prompt for OpenAI based on the scraped deals provided
|
||||
"""
|
||||
user_prompt = self.USER_PROMPT_PREFIX
|
||||
user_prompt += '\n\n'.join([scrape.describe() for scrape in scraped])
|
||||
user_prompt += self.USER_PROMPT_SUFFIX
|
||||
return user_prompt
|
||||
|
||||
def scan(self, memory: List[str]=[]) -> Optional[DealSelection]:
|
||||
"""
|
||||
Call OpenAI to provide a high potential list of deals with good descriptions and prices
|
||||
Use StructuredOutputs to ensure it conforms to our specifications
|
||||
:param memory: a list of URLs representing deals already raised
|
||||
:return: a selection of good deals, or None if there aren't any
|
||||
"""
|
||||
scraped = self.fetch_deals(memory)
|
||||
if scraped:
|
||||
user_prompt = self.make_user_prompt(scraped)
|
||||
self.log("Scanner Agent is calling OpenAI using Structured Output")
|
||||
result = self.openai.beta.chat.completions.parse(
|
||||
model=self.MODEL,
|
||||
messages=[
|
||||
{"role": "system", "content": self.SYSTEM_PROMPT},
|
||||
{"role": "user", "content": user_prompt}
|
||||
],
|
||||
response_format=DealSelection
|
||||
)
|
||||
result = result.choices[0].message.parsed
|
||||
result.deals = [deal for deal in result.deals if deal.price>0]
|
||||
self.log(f"Scanner Agent received {len(result.deals)} selected deals with price>0 from OpenAI")
|
||||
return result
|
||||
return None
|
||||
|
||||
@@ -0,0 +1,29 @@
|
||||
import modal
|
||||
from agents.agent import Agent
|
||||
|
||||
|
||||
class SpecialistAgent(Agent):
|
||||
"""
|
||||
An Agent that runs our fine-tuned LLM that's running remotely on Modal
|
||||
"""
|
||||
|
||||
name = "Specialist Agent"
|
||||
color = Agent.RED
|
||||
|
||||
def __init__(self):
|
||||
"""
|
||||
Set up this Agent by creating an instance of the modal class
|
||||
"""
|
||||
self.log("Specialist Agent is initializing - connecting to modal")
|
||||
Pricer = modal.Cls.from_name("pricer-service", "Pricer")
|
||||
self.pricer = Pricer()
|
||||
self.log("Specialist Agent is ready")
|
||||
|
||||
def price(self, description: str) -> float:
|
||||
"""
|
||||
Make a remote call to return the estimate of the price of this item
|
||||
"""
|
||||
self.log("Specialist Agent is calling remote fine-tuned model")
|
||||
result = self.pricer.price.remote(description)
|
||||
self.log(f"Specialist Agent completed - predicting ${result:.2f}")
|
||||
return result
|
||||
1262
week8/community_contributions/tochi/autonomous_deal_agent.ipynb
Normal file
1262
week8/community_contributions/tochi/autonomous_deal_agent.ipynb
Normal file
File diff suppressed because it is too large
Load Diff
98
week8/community_contributions/tochi/deal_agent_framework.py
Normal file
98
week8/community_contributions/tochi/deal_agent_framework.py
Normal file
@@ -0,0 +1,98 @@
|
||||
import os
|
||||
import sys
|
||||
import logging
|
||||
import json
|
||||
from typing import List
|
||||
from dotenv import load_dotenv
|
||||
import chromadb
|
||||
from agents.planning_agent import PlanningAgent
|
||||
from agents.deals import Opportunity
|
||||
from sklearn.manifold import TSNE
|
||||
import numpy as np
|
||||
|
||||
|
||||
# Colors for logging
|
||||
BG_BLUE = '\033[44m'
|
||||
WHITE = '\033[37m'
|
||||
RESET = '\033[0m'
|
||||
|
||||
# Colors for plot
|
||||
CATEGORIES = ['Appliances', 'Automotive', 'Cell_Phones_and_Accessories', 'Electronics','Musical_Instruments', 'Office_Products', 'Tools_and_Home_Improvement', 'Toys_and_Games']
|
||||
COLORS = ['red', 'blue', 'brown', 'orange', 'yellow', 'green' , 'purple', 'cyan']
|
||||
|
||||
def init_logging():
|
||||
root = logging.getLogger()
|
||||
root.setLevel(logging.INFO)
|
||||
|
||||
handler = logging.StreamHandler(sys.stdout)
|
||||
handler.setLevel(logging.INFO)
|
||||
formatter = logging.Formatter(
|
||||
"[%(asctime)s] [Agents] [%(levelname)s] %(message)s",
|
||||
datefmt="%Y-%m-%d %H:%M:%S %z",
|
||||
)
|
||||
handler.setFormatter(formatter)
|
||||
root.addHandler(handler)
|
||||
|
||||
class DealAgentFramework:
|
||||
|
||||
DB = "products_vectorstore"
|
||||
MEMORY_FILENAME = "memory.json"
|
||||
|
||||
def __init__(self):
|
||||
init_logging()
|
||||
load_dotenv()
|
||||
client = chromadb.PersistentClient(path=self.DB)
|
||||
self.memory = self.read_memory()
|
||||
self.collection = client.get_or_create_collection('products')
|
||||
self.planner = None
|
||||
|
||||
def init_agents_as_needed(self):
|
||||
if not self.planner:
|
||||
self.log("Initializing Agent Framework")
|
||||
self.planner = PlanningAgent(self.collection)
|
||||
self.log("Agent Framework is ready")
|
||||
|
||||
def read_memory(self) -> List[Opportunity]:
|
||||
if os.path.exists(self.MEMORY_FILENAME):
|
||||
with open(self.MEMORY_FILENAME, "r") as file:
|
||||
data = json.load(file)
|
||||
opportunities = [Opportunity(**item) for item in data]
|
||||
return opportunities
|
||||
return []
|
||||
|
||||
def write_memory(self) -> None:
|
||||
data = [opportunity.dict() for opportunity in self.memory]
|
||||
with open(self.MEMORY_FILENAME, "w") as file:
|
||||
json.dump(data, file, indent=2)
|
||||
|
||||
def log(self, message: str):
|
||||
text = BG_BLUE + WHITE + "[Agent Framework] " + message + RESET
|
||||
logging.info(text)
|
||||
|
||||
def run(self) -> List[Opportunity]:
|
||||
self.init_agents_as_needed()
|
||||
logging.info("Kicking off Planning Agent")
|
||||
result = self.planner.plan(memory=self.memory)
|
||||
logging.info(f"Planning Agent has completed and returned: {result}")
|
||||
if result:
|
||||
self.memory.append(result)
|
||||
self.write_memory()
|
||||
return self.memory
|
||||
|
||||
@classmethod
|
||||
def get_plot_data(cls, max_datapoints=10000):
|
||||
client = chromadb.PersistentClient(path=cls.DB)
|
||||
collection = client.get_or_create_collection('products')
|
||||
result = collection.get(include=['embeddings', 'documents', 'metadatas'], limit=max_datapoints)
|
||||
vectors = np.array(result['embeddings'])
|
||||
documents = result['documents']
|
||||
categories = [metadata['category'] for metadata in result['metadatas']]
|
||||
colors = [COLORS[CATEGORIES.index(c)] for c in categories]
|
||||
tsne = TSNE(n_components=3, random_state=42, n_jobs=-1)
|
||||
reduced_vectors = tsne.fit_transform(vectors)
|
||||
return documents, reduced_vectors, colors
|
||||
|
||||
|
||||
if __name__=="__main__":
|
||||
DealAgentFramework().run()
|
||||
|
||||
101
week8/community_contributions/tochi/items.py
Normal file
101
week8/community_contributions/tochi/items.py
Normal file
@@ -0,0 +1,101 @@
|
||||
from typing import Optional
|
||||
from transformers import AutoTokenizer
|
||||
import re
|
||||
|
||||
BASE_MODEL = "meta-llama/Meta-Llama-3.1-8B"
|
||||
MIN_TOKENS = 150
|
||||
MAX_TOKENS = 160
|
||||
MIN_CHARS = 300
|
||||
CEILING_CHARS = MAX_TOKENS * 7
|
||||
|
||||
class Item:
|
||||
"""
|
||||
An Item is a cleaned, curated datapoint of a Product with a Price
|
||||
"""
|
||||
|
||||
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code=True)
|
||||
PREFIX = "Price is $"
|
||||
QUESTION = "How much does this cost to the nearest dollar?"
|
||||
REMOVALS = ['"Batteries Included?": "No"', '"Batteries Included?": "Yes"', '"Batteries Required?": "No"', '"Batteries Required?": "Yes"', "By Manufacturer", "Item", "Date First", "Package", ":", "Number of", "Best Sellers", "Number", "Product "]
|
||||
|
||||
title: str
|
||||
price: float
|
||||
category: str
|
||||
token_count: int = 0
|
||||
details: Optional[str]
|
||||
prompt: Optional[str] = None
|
||||
include = False
|
||||
|
||||
def __init__(self, data, price):
|
||||
self.title = data['title']
|
||||
self.price = price
|
||||
self.parse(data)
|
||||
|
||||
def scrub_details(self):
|
||||
"""
|
||||
Clean up the details string by removing common text that doesn't add value
|
||||
"""
|
||||
details = self.details
|
||||
for remove in self.REMOVALS:
|
||||
details = details.replace(remove, "")
|
||||
return details
|
||||
|
||||
def scrub(self, stuff):
|
||||
"""
|
||||
Clean up the provided text by removing unnecessary characters and whitespace
|
||||
Also remove words that are 7+ chars and contain numbers, as these are likely irrelevant product numbers
|
||||
"""
|
||||
stuff = re.sub(r'[:\[\]"{}【】\s]+', ' ', stuff).strip()
|
||||
stuff = stuff.replace(" ,", ",").replace(",,,",",").replace(",,",",")
|
||||
words = stuff.split(' ')
|
||||
select = [word for word in words if len(word)<7 or not any(char.isdigit() for char in word)]
|
||||
return " ".join(select)
|
||||
|
||||
def parse(self, data):
|
||||
"""
|
||||
Parse this datapoint and if it fits within the allowed Token range,
|
||||
then set include to True
|
||||
"""
|
||||
contents = '\n'.join(data['description'])
|
||||
if contents:
|
||||
contents += '\n'
|
||||
features = '\n'.join(data['features'])
|
||||
if features:
|
||||
contents += features + '\n'
|
||||
self.details = data['details']
|
||||
if self.details:
|
||||
contents += self.scrub_details() + '\n'
|
||||
if len(contents) > MIN_CHARS:
|
||||
contents = contents[:CEILING_CHARS]
|
||||
text = f"{self.scrub(self.title)}\n{self.scrub(contents)}"
|
||||
tokens = self.tokenizer.encode(text, add_special_tokens=False)
|
||||
if len(tokens) > MIN_TOKENS:
|
||||
tokens = tokens[:MAX_TOKENS]
|
||||
text = self.tokenizer.decode(tokens)
|
||||
self.make_prompt(text)
|
||||
self.include = True
|
||||
|
||||
def make_prompt(self, text):
|
||||
"""
|
||||
Set the prompt instance variable to be a prompt appropriate for training
|
||||
"""
|
||||
self.prompt = f"{self.QUESTION}\n\n{text}\n\n"
|
||||
self.prompt += f"{self.PREFIX}{str(round(self.price))}.00"
|
||||
self.token_count = len(self.tokenizer.encode(self.prompt, add_special_tokens=False))
|
||||
|
||||
def test_prompt(self):
|
||||
"""
|
||||
Return a prompt suitable for testing, with the actual price removed
|
||||
"""
|
||||
return self.prompt.split(self.PREFIX)[0] + self.PREFIX
|
||||
|
||||
def __repr__(self):
|
||||
"""
|
||||
Return a String version of this Item
|
||||
"""
|
||||
return f"<{self.title} = ${self.price}>"
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
35
week8/community_contributions/tochi/log_utils.py
Normal file
35
week8/community_contributions/tochi/log_utils.py
Normal file
@@ -0,0 +1,35 @@
|
||||
# Foreground colors
|
||||
RED = '\033[31m'
|
||||
GREEN = '\033[32m'
|
||||
YELLOW = '\033[33m'
|
||||
BLUE = '\033[34m'
|
||||
MAGENTA = '\033[35m'
|
||||
CYAN = '\033[36m'
|
||||
WHITE = '\033[37m'
|
||||
|
||||
# Background color
|
||||
BG_BLACK = '\033[40m'
|
||||
BG_BLUE = '\033[44m'
|
||||
|
||||
# Reset code to return to default color
|
||||
RESET = '\033[0m'
|
||||
|
||||
mapper = {
|
||||
BG_BLACK+RED: "#dd0000",
|
||||
BG_BLACK+GREEN: "#00dd00",
|
||||
BG_BLACK+YELLOW: "#dddd00",
|
||||
BG_BLACK+BLUE: "#0000ee",
|
||||
BG_BLACK+MAGENTA: "#aa00dd",
|
||||
BG_BLACK+CYAN: "#00dddd",
|
||||
BG_BLACK+WHITE: "#87CEEB",
|
||||
BG_BLUE+WHITE: "#ff7800"
|
||||
}
|
||||
|
||||
|
||||
def reformat(message):
|
||||
for key, value in mapper.items():
|
||||
message = message.replace(key, f'<span style="color: {value}">')
|
||||
message = message.replace(RESET, '</span>')
|
||||
return message
|
||||
|
||||
|
||||
62
week8/community_contributions/tochi/price_is_right.py
Normal file
62
week8/community_contributions/tochi/price_is_right.py
Normal file
@@ -0,0 +1,62 @@
|
||||
import gradio as gr
|
||||
from deal_agent_framework import DealAgentFramework
|
||||
from agents.deals import Opportunity, Deal
|
||||
|
||||
class App:
|
||||
|
||||
def __init__(self):
|
||||
self.agent_framework = None
|
||||
|
||||
def run(self):
|
||||
with gr.Blocks(title="The Price is Right", fill_width=True) as ui:
|
||||
|
||||
def table_for(opps):
|
||||
return [[opp.deal.product_description, f"${opp.deal.price:.2f}", f"${opp.estimate:.2f}", f"${opp.discount:.2f}", opp.deal.url] for opp in opps]
|
||||
|
||||
def start():
|
||||
self.agent_framework = DealAgentFramework()
|
||||
self.agent_framework.init_agents_as_needed()
|
||||
opportunities = self.agent_framework.memory
|
||||
table = table_for(opportunities)
|
||||
return table
|
||||
|
||||
def go():
|
||||
self.agent_framework.run()
|
||||
new_opportunities = self.agent_framework.memory
|
||||
table = table_for(new_opportunities)
|
||||
return table
|
||||
|
||||
def do_select(selected_index: gr.SelectData):
|
||||
opportunities = self.agent_framework.memory
|
||||
row = selected_index.index[0]
|
||||
opportunity = opportunities[row]
|
||||
self.agent_framework.planner.messenger.alert(opportunity)
|
||||
|
||||
with gr.Row():
|
||||
gr.Markdown('<div style="text-align: center;font-size:24px">"The Price is Right" - Deal Hunting Agentic AI</div>')
|
||||
with gr.Row():
|
||||
gr.Markdown('<div style="text-align: center;font-size:14px">Autonomous agent framework that finds online deals, collaborating with a proprietary fine-tuned LLM deployed on Modal, and a RAG pipeline with a frontier model and Chroma.</div>')
|
||||
with gr.Row():
|
||||
gr.Markdown('<div style="text-align: center;font-size:14px">Deals surfaced so far:</div>')
|
||||
with gr.Row():
|
||||
opportunities_dataframe = gr.Dataframe(
|
||||
headers=["Description", "Price", "Estimate", "Discount", "URL"],
|
||||
wrap=True,
|
||||
column_widths=[4, 1, 1, 1, 2],
|
||||
row_count=10,
|
||||
col_count=5,
|
||||
max_height=400,
|
||||
)
|
||||
|
||||
ui.load(start, inputs=[], outputs=[opportunities_dataframe])
|
||||
|
||||
timer = gr.Timer(value=60)
|
||||
timer.tick(go, inputs=[], outputs=[opportunities_dataframe])
|
||||
|
||||
opportunities_dataframe.select(do_select)
|
||||
|
||||
ui.launch(share=False, inbrowser=True)
|
||||
|
||||
if __name__=="__main__":
|
||||
App().run()
|
||||
|
||||
166
week8/community_contributions/tochi/price_is_right_final.py
Normal file
166
week8/community_contributions/tochi/price_is_right_final.py
Normal file
@@ -0,0 +1,166 @@
|
||||
import logging
|
||||
import queue
|
||||
import threading
|
||||
import time
|
||||
import gradio as gr
|
||||
from deal_agent_framework import DealAgentFramework
|
||||
from agents.deals import Opportunity, Deal
|
||||
from log_utils import reformat
|
||||
import plotly.graph_objects as go
|
||||
|
||||
|
||||
class QueueHandler(logging.Handler):
|
||||
def __init__(self, log_queue):
|
||||
super().__init__()
|
||||
self.log_queue = log_queue
|
||||
|
||||
def emit(self, record):
|
||||
self.log_queue.put(self.format(record))
|
||||
|
||||
def html_for(log_data):
|
||||
output = '<br>'.join(log_data[-18:])
|
||||
return f"""
|
||||
<div id="scrollContent" style="height: 400px; overflow-y: auto; border: 1px solid #ccc; background-color: #222229; padding: 10px;">
|
||||
{output}
|
||||
</div>
|
||||
"""
|
||||
|
||||
def setup_logging(log_queue):
|
||||
handler = QueueHandler(log_queue)
|
||||
formatter = logging.Formatter(
|
||||
"[%(asctime)s] %(message)s",
|
||||
datefmt="%Y-%m-%d %H:%M:%S %z",
|
||||
)
|
||||
handler.setFormatter(formatter)
|
||||
logger = logging.getLogger()
|
||||
logger.addHandler(handler)
|
||||
logger.setLevel(logging.INFO)
|
||||
|
||||
|
||||
class App:
|
||||
|
||||
def __init__(self):
|
||||
self.agent_framework = None
|
||||
|
||||
def get_agent_framework(self):
|
||||
if not self.agent_framework:
|
||||
self.agent_framework = DealAgentFramework()
|
||||
self.agent_framework.init_agents_as_needed()
|
||||
return self.agent_framework
|
||||
|
||||
def run(self):
|
||||
with gr.Blocks(title="The Price is Right", fill_width=True) as ui:
|
||||
|
||||
log_data = gr.State([])
|
||||
|
||||
def table_for(opps):
|
||||
return [[opp.deal.product_description, f"${opp.deal.price:.2f}", f"${opp.estimate:.2f}", f"${opp.discount:.2f}", opp.deal.url] for opp in opps]
|
||||
|
||||
def update_output(log_data, log_queue, result_queue):
|
||||
initial_result = table_for(self.get_agent_framework().memory)
|
||||
final_result = None
|
||||
while True:
|
||||
try:
|
||||
message = log_queue.get_nowait()
|
||||
log_data.append(reformat(message))
|
||||
yield log_data, html_for(log_data), final_result or initial_result
|
||||
except queue.Empty:
|
||||
try:
|
||||
final_result = result_queue.get_nowait()
|
||||
yield log_data, html_for(log_data), final_result or initial_result
|
||||
except queue.Empty:
|
||||
if final_result is not None:
|
||||
break
|
||||
time.sleep(0.1)
|
||||
|
||||
def get_initial_plot():
|
||||
fig = go.Figure()
|
||||
fig.update_layout(
|
||||
title='Loading vector DB...',
|
||||
height=400,
|
||||
)
|
||||
return fig
|
||||
|
||||
def get_plot():
|
||||
documents, vectors, colors = DealAgentFramework.get_plot_data(max_datapoints=1000)
|
||||
# Create the 3D scatter plot
|
||||
fig = go.Figure(data=[go.Scatter3d(
|
||||
x=vectors[:, 0],
|
||||
y=vectors[:, 1],
|
||||
z=vectors[:, 2],
|
||||
mode='markers',
|
||||
marker=dict(size=2, color=colors, opacity=0.7),
|
||||
)])
|
||||
|
||||
fig.update_layout(
|
||||
scene=dict(xaxis_title='x',
|
||||
yaxis_title='y',
|
||||
zaxis_title='z',
|
||||
aspectmode='manual',
|
||||
aspectratio=dict(x=2.2, y=2.2, z=1), # Make x-axis twice as long
|
||||
camera=dict(
|
||||
eye=dict(x=1.6, y=1.6, z=0.8) # Adjust camera position
|
||||
)),
|
||||
height=400,
|
||||
margin=dict(r=5, b=1, l=5, t=2)
|
||||
)
|
||||
|
||||
return fig
|
||||
|
||||
def do_run():
|
||||
new_opportunities = self.get_agent_framework().run()
|
||||
table = table_for(new_opportunities)
|
||||
return table
|
||||
|
||||
def run_with_logging(initial_log_data):
|
||||
log_queue = queue.Queue()
|
||||
result_queue = queue.Queue()
|
||||
setup_logging(log_queue)
|
||||
|
||||
def worker():
|
||||
result = do_run()
|
||||
result_queue.put(result)
|
||||
|
||||
thread = threading.Thread(target=worker)
|
||||
thread.start()
|
||||
|
||||
for log_data, output, final_result in update_output(initial_log_data, log_queue, result_queue):
|
||||
yield log_data, output, final_result
|
||||
|
||||
def do_select(selected_index: gr.SelectData):
|
||||
opportunities = self.get_agent_framework().memory
|
||||
row = selected_index.index[0]
|
||||
opportunity = opportunities[row]
|
||||
self.get_agent_framework().planner.messenger.alert(opportunity)
|
||||
|
||||
with gr.Row():
|
||||
gr.Markdown('<div style="text-align: center;font-size:24px"><strong>The Price is Right</strong> - Autonomous Agent Framework that hunts for deals</div>')
|
||||
with gr.Row():
|
||||
gr.Markdown('<div style="text-align: center;font-size:14px">A proprietary fine-tuned LLM deployed on Modal and a RAG pipeline with a frontier model collaborate to send push notifications with great online deals.</div>')
|
||||
with gr.Row():
|
||||
opportunities_dataframe = gr.Dataframe(
|
||||
headers=["Deals found so far", "Price", "Estimate", "Discount", "URL"],
|
||||
wrap=True,
|
||||
column_widths=[6, 1, 1, 1, 3],
|
||||
row_count=10,
|
||||
col_count=5,
|
||||
max_height=400,
|
||||
)
|
||||
with gr.Row():
|
||||
with gr.Column(scale=1):
|
||||
logs = gr.HTML()
|
||||
with gr.Column(scale=1):
|
||||
plot = gr.Plot(value=get_plot(), show_label=False)
|
||||
|
||||
ui.load(run_with_logging, inputs=[log_data], outputs=[log_data, logs, opportunities_dataframe])
|
||||
|
||||
timer = gr.Timer(value=300, active=True)
|
||||
timer.tick(run_with_logging, inputs=[log_data], outputs=[log_data, logs, opportunities_dataframe])
|
||||
|
||||
opportunities_dataframe.select(do_select)
|
||||
|
||||
ui.launch(share=False, inbrowser=True)
|
||||
|
||||
if __name__=="__main__":
|
||||
App().run()
|
||||
|
||||
66
week8/community_contributions/tochi/pricer_ephemeral.py
Normal file
66
week8/community_contributions/tochi/pricer_ephemeral.py
Normal file
@@ -0,0 +1,66 @@
|
||||
import modal
|
||||
from modal import App, Image
|
||||
|
||||
# Setup
|
||||
|
||||
app = modal.App("pricer")
|
||||
image = Image.debian_slim().pip_install("torch", "transformers", "bitsandbytes", "accelerate", "peft")
|
||||
secrets = [modal.Secret.from_name("hf-secret")]
|
||||
|
||||
# Constants
|
||||
|
||||
GPU = "T4"
|
||||
BASE_MODEL = "meta-llama/Meta-Llama-3.1-8B"
|
||||
PROJECT_NAME = "pricer"
|
||||
HF_USER = "ed-donner" # your HF name here! Or use mine if you just want to reproduce my results.
|
||||
RUN_NAME = "2024-09-13_13.04.39"
|
||||
PROJECT_RUN_NAME = f"{PROJECT_NAME}-{RUN_NAME}"
|
||||
REVISION = "e8d637df551603dc86cd7a1598a8f44af4d7ae36"
|
||||
FINETUNED_MODEL = f"{HF_USER}/{PROJECT_RUN_NAME}"
|
||||
|
||||
|
||||
@app.function(image=image, secrets=secrets, gpu=GPU, timeout=1800)
|
||||
def price(description: str) -> float:
|
||||
import os
|
||||
import re
|
||||
import torch
|
||||
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, set_seed
|
||||
from peft import PeftModel
|
||||
|
||||
QUESTION = "How much does this cost to the nearest dollar?"
|
||||
PREFIX = "Price is $"
|
||||
|
||||
prompt = f"{QUESTION}\n{description}\n{PREFIX}"
|
||||
|
||||
# Quant Config
|
||||
quant_config = BitsAndBytesConfig(
|
||||
load_in_4bit=True,
|
||||
bnb_4bit_use_double_quant=True,
|
||||
bnb_4bit_compute_dtype=torch.bfloat16,
|
||||
bnb_4bit_quant_type="nf4"
|
||||
)
|
||||
|
||||
# Load model and tokenizer
|
||||
|
||||
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
|
||||
tokenizer.pad_token = tokenizer.eos_token
|
||||
tokenizer.padding_side = "right"
|
||||
|
||||
base_model = AutoModelForCausalLM.from_pretrained(
|
||||
BASE_MODEL,
|
||||
quantization_config=quant_config,
|
||||
device_map="auto"
|
||||
)
|
||||
|
||||
fine_tuned_model = PeftModel.from_pretrained(base_model, FINETUNED_MODEL, revision=REVISION)
|
||||
|
||||
set_seed(42)
|
||||
inputs = tokenizer.encode(prompt, return_tensors="pt").to("cuda")
|
||||
attention_mask = torch.ones(inputs.shape, device="cuda")
|
||||
outputs = fine_tuned_model.generate(inputs, attention_mask=attention_mask, max_new_tokens=5, num_return_sequences=1)
|
||||
result = tokenizer.decode(outputs[0])
|
||||
|
||||
contents = result.split("Price is $")[1]
|
||||
contents = contents.replace(',','')
|
||||
match = re.search(r"[-+]?\d*\.\d+|\d+", contents)
|
||||
return float(match.group()) if match else 0
|
||||
69
week8/community_contributions/tochi/pricer_service.py
Normal file
69
week8/community_contributions/tochi/pricer_service.py
Normal file
@@ -0,0 +1,69 @@
|
||||
import modal
|
||||
from modal import App, Image
|
||||
|
||||
# Setup - define our infrastructure with code!
|
||||
|
||||
app = modal.App("pricer-service")
|
||||
image = Image.debian_slim().pip_install("torch", "transformers", "bitsandbytes", "accelerate", "peft")
|
||||
|
||||
# This collects the secret from Modal.
|
||||
# Depending on your Modal configuration, you may need to replace "hf-secret" with "huggingface-secret"
|
||||
secrets = [modal.Secret.from_name("hf-secret")]
|
||||
|
||||
# Constants
|
||||
|
||||
GPU = "T4"
|
||||
BASE_MODEL = "meta-llama/Meta-Llama-3.1-8B"
|
||||
PROJECT_NAME = "pricer"
|
||||
HF_USER = "ed-donner" # your HF name here! Or use mine if you just want to reproduce my results.
|
||||
RUN_NAME = "2024-09-13_13.04.39"
|
||||
PROJECT_RUN_NAME = f"{PROJECT_NAME}-{RUN_NAME}"
|
||||
REVISION = "e8d637df551603dc86cd7a1598a8f44af4d7ae36"
|
||||
FINETUNED_MODEL = f"{HF_USER}/{PROJECT_RUN_NAME}"
|
||||
|
||||
|
||||
@app.function(image=image, secrets=secrets, gpu=GPU, timeout=1800)
|
||||
def price(description: str) -> float:
|
||||
import os
|
||||
import re
|
||||
import torch
|
||||
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, set_seed
|
||||
from peft import PeftModel
|
||||
|
||||
QUESTION = "How much does this cost to the nearest dollar?"
|
||||
PREFIX = "Price is $"
|
||||
|
||||
prompt = f"{QUESTION}\n{description}\n{PREFIX}"
|
||||
|
||||
# Quant Config
|
||||
quant_config = BitsAndBytesConfig(
|
||||
load_in_4bit=True,
|
||||
bnb_4bit_use_double_quant=True,
|
||||
bnb_4bit_compute_dtype=torch.bfloat16,
|
||||
bnb_4bit_quant_type="nf4"
|
||||
)
|
||||
|
||||
# Load model and tokenizer
|
||||
|
||||
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
|
||||
tokenizer.pad_token = tokenizer.eos_token
|
||||
tokenizer.padding_side = "right"
|
||||
|
||||
base_model = AutoModelForCausalLM.from_pretrained(
|
||||
BASE_MODEL,
|
||||
quantization_config=quant_config,
|
||||
device_map="auto"
|
||||
)
|
||||
|
||||
fine_tuned_model = PeftModel.from_pretrained(base_model, FINETUNED_MODEL, revision=REVISION)
|
||||
|
||||
set_seed(42)
|
||||
inputs = tokenizer.encode(prompt, return_tensors="pt").to("cuda")
|
||||
attention_mask = torch.ones(inputs.shape, device="cuda")
|
||||
outputs = fine_tuned_model.generate(inputs, attention_mask=attention_mask, max_new_tokens=5, num_return_sequences=1)
|
||||
result = tokenizer.decode(outputs[0])
|
||||
|
||||
contents = result.split("Price is $")[1]
|
||||
contents = contents.replace(',','')
|
||||
match = re.search(r"[-+]?\d*\.\d+|\d+", contents)
|
||||
return float(match.group()) if match else 0
|
||||
75
week8/community_contributions/tochi/testing.py
Normal file
75
week8/community_contributions/tochi/testing.py
Normal file
@@ -0,0 +1,75 @@
|
||||
import math
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
GREEN = "\033[92m"
|
||||
YELLOW = "\033[93m"
|
||||
RED = "\033[91m"
|
||||
RESET = "\033[0m"
|
||||
COLOR_MAP = {"red":RED, "orange": YELLOW, "green": GREEN}
|
||||
|
||||
class Tester:
|
||||
|
||||
def __init__(self, predictor, data, title=None, size=250):
|
||||
self.predictor = predictor
|
||||
self.data = data
|
||||
self.title = title or predictor.__name__.replace("_", " ").title()
|
||||
self.size = size
|
||||
self.guesses = []
|
||||
self.truths = []
|
||||
self.errors = []
|
||||
self.sles = []
|
||||
self.colors = []
|
||||
|
||||
def color_for(self, error, truth):
|
||||
if error<40 or error/truth < 0.2:
|
||||
return "green"
|
||||
elif error<80 or error/truth < 0.4:
|
||||
return "orange"
|
||||
else:
|
||||
return "red"
|
||||
|
||||
def run_datapoint(self, i):
|
||||
datapoint = self.data[i]
|
||||
guess = self.predictor(datapoint)
|
||||
truth = datapoint.price
|
||||
error = abs(guess - truth)
|
||||
log_error = math.log(truth+1) - math.log(guess+1)
|
||||
sle = log_error ** 2
|
||||
color = self.color_for(error, truth)
|
||||
title = datapoint.title if len(datapoint.title) <= 40 else datapoint.title[:40]+"..."
|
||||
self.guesses.append(guess)
|
||||
self.truths.append(truth)
|
||||
self.errors.append(error)
|
||||
self.sles.append(sle)
|
||||
self.colors.append(color)
|
||||
print(f"{COLOR_MAP[color]}{i+1}: Guess: ${guess:,.2f} Truth: ${truth:,.2f} Error: ${error:,.2f} SLE: {sle:,.2f} Item: {title}{RESET}")
|
||||
|
||||
def chart(self, title):
|
||||
max_error = max(self.errors)
|
||||
plt.figure(figsize=(12, 8))
|
||||
max_val = max(max(self.truths), max(self.guesses))
|
||||
plt.plot([0, max_val], [0, max_val], color='deepskyblue', lw=2, alpha=0.6)
|
||||
plt.scatter(self.truths, self.guesses, s=3, c=self.colors)
|
||||
plt.xlabel('Ground Truth')
|
||||
plt.ylabel('Model Estimate')
|
||||
plt.xlim(0, max_val)
|
||||
plt.ylim(0, max_val)
|
||||
plt.title(title)
|
||||
plt.show()
|
||||
|
||||
def report(self):
|
||||
average_error = sum(self.errors) / self.size
|
||||
rmsle = math.sqrt(sum(self.sles) / self.size)
|
||||
hits = sum(1 for color in self.colors if color=="green")
|
||||
title = f"{self.title} Error=${average_error:,.2f} RMSLE={rmsle:,.2f} Hits={hits/self.size*100:.1f}%"
|
||||
self.chart(title)
|
||||
|
||||
def run(self):
|
||||
self.error = 0
|
||||
for i in range(self.size):
|
||||
self.run_datapoint(i)
|
||||
self.report()
|
||||
|
||||
@classmethod
|
||||
def test(cls, function, data):
|
||||
cls(function, data).run()
|
||||
Reference in New Issue
Block a user