Fixed Google Colab link in week 3 day 4, and latest week 8 updates
This commit is contained in:
84
week8_wip/agents/deals.py
Normal file
84
week8_wip/agents/deals.py
Normal file
@@ -0,0 +1,84 @@
|
||||
from pydantic import BaseModel
|
||||
from typing import List
|
||||
from bs4 import BeautifulSoup
|
||||
import re
|
||||
import feedparser
|
||||
from tqdm import tqdm
|
||||
import requests
|
||||
import time
|
||||
|
||||
feeds = [
|
||||
"https://www.dealnews.com/c142/Electronics/?rss=1",
|
||||
"https://www.dealnews.com/c39/Computers/?rss=1",
|
||||
"https://www.dealnews.com/c238/Automotive/?rss=1",
|
||||
"https://www.dealnews.com/f1912/Smart-Home/?rss=1",
|
||||
"https://www.dealnews.com/c196/Home-Garden/?rss=1",
|
||||
]
|
||||
|
||||
def extract(html_snippet):
|
||||
soup = BeautifulSoup(html_snippet, 'html.parser')
|
||||
snippet_div = soup.find('div', class_='snippet summary')
|
||||
|
||||
if snippet_div:
|
||||
description = snippet_div.get_text(strip=True)
|
||||
description = BeautifulSoup(description, 'html.parser').get_text()
|
||||
description = re.sub('<[^<]+?>', '', description)
|
||||
result = description.strip()
|
||||
else:
|
||||
result = html_snippet
|
||||
return result.replace('\n', ' ')
|
||||
|
||||
class Deal:
|
||||
category: str
|
||||
title: str
|
||||
summary: str
|
||||
url: str
|
||||
item_id: int
|
||||
details: str
|
||||
features: str
|
||||
|
||||
def __init__(self, entry, id):
|
||||
self.title = entry['title']
|
||||
self.summary = extract(entry['summary'])
|
||||
self.url = entry['links'][0]['href']
|
||||
self.item_id = id
|
||||
stuff = requests.get(self.url).content
|
||||
soup = BeautifulSoup(stuff, 'html.parser')
|
||||
content = soup.find('div', class_='content-section').get_text()
|
||||
content = content.replace('\nmore', '').replace('\n', ' ')
|
||||
if "Features" in content:
|
||||
self.details, self.features = content.split("Features")
|
||||
else:
|
||||
self.details = content
|
||||
self.features = ""
|
||||
|
||||
def __repr__(self):
|
||||
return f"<{self.title}>"
|
||||
|
||||
def describe(self):
|
||||
return f"Title: {self.title}\nDetails: {self.details.strip()}\nFeatures: {self.features.strip()}\nURL: {self.url}"
|
||||
|
||||
@classmethod
|
||||
def fetch(cls):
|
||||
deals = []
|
||||
item_id = 1001
|
||||
for feed_url in tqdm(feeds):
|
||||
feed = feedparser.parse(feed_url)
|
||||
for entry in feed.entries[:10]:
|
||||
deals.append(cls(entry, item_id))
|
||||
item_id += 1
|
||||
time.sleep(1)
|
||||
return deals
|
||||
|
||||
class QualityDeal(BaseModel):
|
||||
product_description: str
|
||||
price: float
|
||||
url: str
|
||||
|
||||
class QualityDealSelection(BaseModel):
|
||||
quality_deals: List[QualityDeal]
|
||||
|
||||
class Opportunity(BaseModel):
|
||||
quality_deal: QualityDeal
|
||||
estimate: float
|
||||
discount: float
|
||||
29
week8_wip/agents/ensemble_agent.py
Normal file
29
week8_wip/agents/ensemble_agent.py
Normal file
@@ -0,0 +1,29 @@
|
||||
import pandas as pd
|
||||
from sklearn.linear_model import LinearRegression
|
||||
import joblib
|
||||
|
||||
from agents.specialist_agent import SpecialistAgent
|
||||
from agents.frontier_agent import FrontierAgent
|
||||
from agents.random_forest_agent import RandomForestAgent
|
||||
|
||||
class EnsembleAgent:
|
||||
|
||||
def __init__(self, collection):
|
||||
self.specialist = SpecialistAgent()
|
||||
self.frontier = FrontierAgent(collection)
|
||||
self.random_forest = RandomForestAgent()
|
||||
self.model = joblib.load('ensemble_model.pkl')
|
||||
|
||||
def price(self, description):
|
||||
specialist = self.specialist.price(description)
|
||||
frontier = self.frontier.price(description)
|
||||
random_forest = self.random_forest.price(description)
|
||||
X = pd.DataFrame({
|
||||
'Specialist': [specialist],
|
||||
'Frontier': [frontier],
|
||||
'RandomForest': [random_forest],
|
||||
'Min': [min(specialist, frontier, random_forest)],
|
||||
'Max': [max(specialist, frontier, random_forest)],
|
||||
})
|
||||
y = self.model.predict(X)
|
||||
return y[0]
|
||||
63
week8_wip/agents/frontier_agent.py
Normal file
63
week8_wip/agents/frontier_agent.py
Normal file
@@ -0,0 +1,63 @@
|
||||
# imports
|
||||
|
||||
import os
|
||||
import re
|
||||
import math
|
||||
import json
|
||||
from typing import List
|
||||
from openai import OpenAI
|
||||
from sentence_transformers import SentenceTransformer
|
||||
from datasets import load_dataset
|
||||
import chromadb
|
||||
from items import Item
|
||||
from testing import Tester
|
||||
|
||||
class FrontierAgent:
|
||||
|
||||
MODEL = "gpt-4o-mini"
|
||||
|
||||
def __init__(self, collection):
|
||||
self.openai = OpenAI()
|
||||
self.collection = collection
|
||||
self.model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
|
||||
|
||||
def make_context(self, similars: List[str], prices: List[float]):
|
||||
message = "To provide some context, here are some other items that might be similar to the item you need to estimate.\n\n"
|
||||
for similar, price in zip(similars, prices):
|
||||
message += f"Potentially related product:\n{similar}\nPrice is ${price:.2f}\n\n"
|
||||
return message
|
||||
|
||||
def messages_for(self, description: str, similars: List[str], prices: List[float]):
|
||||
system_message = "You estimate prices of items. Reply only with the price, no explanation"
|
||||
user_prompt = self.make_context(similars, prices)
|
||||
user_prompt += "And now the question for you:\n\n"
|
||||
user_prompt += "How much does this cost?\n\n" + description
|
||||
return [
|
||||
{"role": "system", "content": system_message},
|
||||
{"role": "user", "content": user_prompt},
|
||||
{"role": "assistant", "content": "Price is $"}
|
||||
]
|
||||
|
||||
def find_similars(self, description: str):
|
||||
vector = self.model.encode([description])
|
||||
results = self.collection.query(query_embeddings=vector.astype(float).tolist(), n_results=5)
|
||||
documents = results['documents'][0][:]
|
||||
prices = [m['price'] for m in results['metadatas'][0][:]]
|
||||
return documents, prices
|
||||
|
||||
def get_price(self, s) -> float:
|
||||
s = s.replace('$','').replace(',','')
|
||||
match = re.search(r"[-+]?\d*\.\d+|\d+", s)
|
||||
return float(match.group()) if match else 0.0
|
||||
|
||||
def price(self, description: str) -> float:
|
||||
documents, prices = self.find_similars(description)
|
||||
response = self.openai.chat.completions.create(
|
||||
model=self.MODEL,
|
||||
messages=self.messages_for(description, documents, prices),
|
||||
seed=42,
|
||||
max_tokens=5
|
||||
)
|
||||
reply = response.choices[0].message.content
|
||||
return self.get_price(reply)
|
||||
|
||||
28
week8_wip/agents/messaging_agent.py
Normal file
28
week8_wip/agents/messaging_agent.py
Normal file
@@ -0,0 +1,28 @@
|
||||
import os
|
||||
from twilio.rest import Client
|
||||
from agents.deals import Opportunity
|
||||
|
||||
class MessagingAgent:
|
||||
|
||||
def __init__(self):
|
||||
account_sid = os.getenv('TWILIO_ACCOUNT_SID', 'your-sid-if-not-using-env')
|
||||
auth_token = os.getenv('TWILIO_AUTH_TOKEN', 'your-auth-if-not-using-env')
|
||||
self.me_from = 'whatsapp:+14155238886'
|
||||
self.me_to = f"whatsapp:+1{os.getenv('MY_PHONE_NUMBER', 'your-phone-number-if-not-using-env')}"
|
||||
self.client = Client(account_sid, auth_token)
|
||||
|
||||
def message(self, text):
|
||||
message = self.client.messages.create(
|
||||
from_=self.me_from,
|
||||
body=text,
|
||||
to=self.me_to
|
||||
)
|
||||
|
||||
def alert(self, opportunity: Opportunity):
|
||||
text = f"Deal! Price=${opportunity.quality_deal.price:.2f}, "
|
||||
text += f"Estimate=${opportunity.estimate:.2f} :"
|
||||
text += opportunity.quality_deal.product_description[:10]+'... '
|
||||
text += opportunity.quality_deal.url
|
||||
self.message(text)
|
||||
|
||||
|
||||
24
week8_wip/agents/planning_agent.py
Normal file
24
week8_wip/agents/planning_agent.py
Normal file
@@ -0,0 +1,24 @@
|
||||
from agents.deals import Deal, QualityDealSelection, Opportunity
|
||||
|
||||
from agents.scanner_agent import ScannerAgent
|
||||
from agents.ensemble_agent import EnsembleAgent
|
||||
from agents.messaging_agent import MessagingAgent
|
||||
|
||||
|
||||
class PlanningAgent:
|
||||
|
||||
def __init__(self, collection):
|
||||
self.scanner = ScannerAgent()
|
||||
self.ensemble = EnsembleAgent(collection)
|
||||
self.messenger = MessagingAgent()
|
||||
|
||||
def plan(self):
|
||||
opportunities = []
|
||||
deal_selection = self.scanner.scan()
|
||||
for deal in deal_selection.quality_deals[:5]:
|
||||
estimate = self.ensemble.price(deal.product_description)
|
||||
opportunities.append(Opportunity(deal, estimate, estimate - deal.price))
|
||||
opportunities.sort(key=lambda opp: opp.discount, reverse=True)
|
||||
print(opportunities)
|
||||
if opportunities[0].discount > 50:
|
||||
self.messenger.alert(opportunities[0])
|
||||
18
week8_wip/agents/random_forest_agent.py
Normal file
18
week8_wip/agents/random_forest_agent.py
Normal file
@@ -0,0 +1,18 @@
|
||||
# imports
|
||||
|
||||
import os
|
||||
import re
|
||||
from typing import List
|
||||
from sentence_transformers import SentenceTransformer
|
||||
import joblib
|
||||
|
||||
|
||||
class RandomForestAgent:
|
||||
|
||||
def __init__(self):
|
||||
self.vectorizer = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
|
||||
self.model = joblib.load('random_forest_model.pkl')
|
||||
|
||||
def price(self, description: str) -> float:
|
||||
vector = self.vectorizer.encode([description])
|
||||
return max(0, self.model.predict(vector)[0])
|
||||
46
week8_wip/agents/scanner_agent.py
Normal file
46
week8_wip/agents/scanner_agent.py
Normal file
@@ -0,0 +1,46 @@
|
||||
import os
|
||||
import json
|
||||
from openai import OpenAI
|
||||
from agents.deals import Deal, QualityDealSelection
|
||||
|
||||
class ScannerAgent:
|
||||
|
||||
MODEL = "gpt-4o-mini"
|
||||
|
||||
SYSTEM_PROMPT = """You identify and summarize the 5 most detailed deals from a list, by selecting deals that have the most detailed, high quality description and the most clear price.
|
||||
Respond strictly in JSON with no explanation, using this format. You should provide the price as a number derived from the description. If the price of a deal isn't clear, do not include that deal in your response.
|
||||
Most important is that you respond with the 5 deals that have the most detailed product description with price. It's not important to mention the terms of the deal; most important is a thorough description of the product.
|
||||
|
||||
{"quality_deals": [
|
||||
{
|
||||
"product_description": "Your clearly expressed summary of the product in 4-5 sentences. Details of the item are much more important than why it's a good deal. Avoid mentioning discounts and coupons; focus on the item itself. There should be a paragpraph of text for each item you choose.",
|
||||
"price": 99.99,
|
||||
"url": "the url as provided"
|
||||
},
|
||||
...
|
||||
]}"""
|
||||
|
||||
USER_PROMPT_PREFIX = """Respond with the most promising 5 deals from this list, selecting those which have the most detailed, high quality product description and a clear price.
|
||||
Respond strictly in JSON, and only JSON. You should rephrase the description to be a summary of the product itself, not the terms of the deal.
|
||||
Remember to respond with a paragraph of text in the product_description field for each of the 5 items that you select.
|
||||
|
||||
Deals:
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.openai = OpenAI()
|
||||
|
||||
def scan(self) -> QualityDealSelection:
|
||||
deals = Deal.fetch()
|
||||
user_prompt = self.USER_PROMPT_PREFIX + '\n\n'.join([deal.describe() for deal in deals])
|
||||
completion = self.openai.beta.chat.completions.parse(
|
||||
model=self.MODEL,
|
||||
messages=[
|
||||
{"role": "system", "content": self.SYSTEM_PROMPT},
|
||||
{"role": "user", "content": user_prompt}
|
||||
],
|
||||
response_format=QualityDealSelection
|
||||
)
|
||||
result = completion.choices[0].message.parsed
|
||||
return result
|
||||
10
week8_wip/agents/specialist_agent.py
Normal file
10
week8_wip/agents/specialist_agent.py
Normal file
@@ -0,0 +1,10 @@
|
||||
import modal
|
||||
|
||||
class SpecialistAgent:
|
||||
|
||||
def __init__(self):
|
||||
Pricer = modal.Cls.lookup("pricer-service", "Pricer")
|
||||
self.pricer = Pricer()
|
||||
|
||||
def price(self, description: str) -> float:
|
||||
return self.pricer.price.remote(description)
|
||||
Reference in New Issue
Block a user