Fixed Google Colab link in week 3 day 4, and latest week 8 updates

This commit is contained in:
Edward Donner
2024-09-27 08:35:09 -04:00
parent 95596c52f8
commit e02dca5058
18 changed files with 74561 additions and 858 deletions

84
week8_wip/agents/deals.py Normal file
View File

@@ -0,0 +1,84 @@
from pydantic import BaseModel
from typing import List
from bs4 import BeautifulSoup
import re
import feedparser
from tqdm import tqdm
import requests
import time
feeds = [
"https://www.dealnews.com/c142/Electronics/?rss=1",
"https://www.dealnews.com/c39/Computers/?rss=1",
"https://www.dealnews.com/c238/Automotive/?rss=1",
"https://www.dealnews.com/f1912/Smart-Home/?rss=1",
"https://www.dealnews.com/c196/Home-Garden/?rss=1",
]
def extract(html_snippet):
soup = BeautifulSoup(html_snippet, 'html.parser')
snippet_div = soup.find('div', class_='snippet summary')
if snippet_div:
description = snippet_div.get_text(strip=True)
description = BeautifulSoup(description, 'html.parser').get_text()
description = re.sub('<[^<]+?>', '', description)
result = description.strip()
else:
result = html_snippet
return result.replace('\n', ' ')
class Deal:
category: str
title: str
summary: str
url: str
item_id: int
details: str
features: str
def __init__(self, entry, id):
self.title = entry['title']
self.summary = extract(entry['summary'])
self.url = entry['links'][0]['href']
self.item_id = id
stuff = requests.get(self.url).content
soup = BeautifulSoup(stuff, 'html.parser')
content = soup.find('div', class_='content-section').get_text()
content = content.replace('\nmore', '').replace('\n', ' ')
if "Features" in content:
self.details, self.features = content.split("Features")
else:
self.details = content
self.features = ""
def __repr__(self):
return f"<{self.title}>"
def describe(self):
return f"Title: {self.title}\nDetails: {self.details.strip()}\nFeatures: {self.features.strip()}\nURL: {self.url}"
@classmethod
def fetch(cls):
deals = []
item_id = 1001
for feed_url in tqdm(feeds):
feed = feedparser.parse(feed_url)
for entry in feed.entries[:10]:
deals.append(cls(entry, item_id))
item_id += 1
time.sleep(1)
return deals
class QualityDeal(BaseModel):
product_description: str
price: float
url: str
class QualityDealSelection(BaseModel):
quality_deals: List[QualityDeal]
class Opportunity(BaseModel):
quality_deal: QualityDeal
estimate: float
discount: float

View File

@@ -0,0 +1,29 @@
import pandas as pd
from sklearn.linear_model import LinearRegression
import joblib
from agents.specialist_agent import SpecialistAgent
from agents.frontier_agent import FrontierAgent
from agents.random_forest_agent import RandomForestAgent
class EnsembleAgent:
def __init__(self, collection):
self.specialist = SpecialistAgent()
self.frontier = FrontierAgent(collection)
self.random_forest = RandomForestAgent()
self.model = joblib.load('ensemble_model.pkl')
def price(self, description):
specialist = self.specialist.price(description)
frontier = self.frontier.price(description)
random_forest = self.random_forest.price(description)
X = pd.DataFrame({
'Specialist': [specialist],
'Frontier': [frontier],
'RandomForest': [random_forest],
'Min': [min(specialist, frontier, random_forest)],
'Max': [max(specialist, frontier, random_forest)],
})
y = self.model.predict(X)
return y[0]

View File

@@ -0,0 +1,63 @@
# imports
import os
import re
import math
import json
from typing import List
from openai import OpenAI
from sentence_transformers import SentenceTransformer
from datasets import load_dataset
import chromadb
from items import Item
from testing import Tester
class FrontierAgent:
MODEL = "gpt-4o-mini"
def __init__(self, collection):
self.openai = OpenAI()
self.collection = collection
self.model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
def make_context(self, similars: List[str], prices: List[float]):
message = "To provide some context, here are some other items that might be similar to the item you need to estimate.\n\n"
for similar, price in zip(similars, prices):
message += f"Potentially related product:\n{similar}\nPrice is ${price:.2f}\n\n"
return message
def messages_for(self, description: str, similars: List[str], prices: List[float]):
system_message = "You estimate prices of items. Reply only with the price, no explanation"
user_prompt = self.make_context(similars, prices)
user_prompt += "And now the question for you:\n\n"
user_prompt += "How much does this cost?\n\n" + description
return [
{"role": "system", "content": system_message},
{"role": "user", "content": user_prompt},
{"role": "assistant", "content": "Price is $"}
]
def find_similars(self, description: str):
vector = self.model.encode([description])
results = self.collection.query(query_embeddings=vector.astype(float).tolist(), n_results=5)
documents = results['documents'][0][:]
prices = [m['price'] for m in results['metadatas'][0][:]]
return documents, prices
def get_price(self, s) -> float:
s = s.replace('$','').replace(',','')
match = re.search(r"[-+]?\d*\.\d+|\d+", s)
return float(match.group()) if match else 0.0
def price(self, description: str) -> float:
documents, prices = self.find_similars(description)
response = self.openai.chat.completions.create(
model=self.MODEL,
messages=self.messages_for(description, documents, prices),
seed=42,
max_tokens=5
)
reply = response.choices[0].message.content
return self.get_price(reply)

View File

@@ -0,0 +1,28 @@
import os
from twilio.rest import Client
from agents.deals import Opportunity
class MessagingAgent:
def __init__(self):
account_sid = os.getenv('TWILIO_ACCOUNT_SID', 'your-sid-if-not-using-env')
auth_token = os.getenv('TWILIO_AUTH_TOKEN', 'your-auth-if-not-using-env')
self.me_from = 'whatsapp:+14155238886'
self.me_to = f"whatsapp:+1{os.getenv('MY_PHONE_NUMBER', 'your-phone-number-if-not-using-env')}"
self.client = Client(account_sid, auth_token)
def message(self, text):
message = self.client.messages.create(
from_=self.me_from,
body=text,
to=self.me_to
)
def alert(self, opportunity: Opportunity):
text = f"Deal! Price=${opportunity.quality_deal.price:.2f}, "
text += f"Estimate=${opportunity.estimate:.2f} :"
text += opportunity.quality_deal.product_description[:10]+'... '
text += opportunity.quality_deal.url
self.message(text)

View File

@@ -0,0 +1,24 @@
from agents.deals import Deal, QualityDealSelection, Opportunity
from agents.scanner_agent import ScannerAgent
from agents.ensemble_agent import EnsembleAgent
from agents.messaging_agent import MessagingAgent
class PlanningAgent:
def __init__(self, collection):
self.scanner = ScannerAgent()
self.ensemble = EnsembleAgent(collection)
self.messenger = MessagingAgent()
def plan(self):
opportunities = []
deal_selection = self.scanner.scan()
for deal in deal_selection.quality_deals[:5]:
estimate = self.ensemble.price(deal.product_description)
opportunities.append(Opportunity(deal, estimate, estimate - deal.price))
opportunities.sort(key=lambda opp: opp.discount, reverse=True)
print(opportunities)
if opportunities[0].discount > 50:
self.messenger.alert(opportunities[0])

View File

@@ -0,0 +1,18 @@
# imports
import os
import re
from typing import List
from sentence_transformers import SentenceTransformer
import joblib
class RandomForestAgent:
def __init__(self):
self.vectorizer = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
self.model = joblib.load('random_forest_model.pkl')
def price(self, description: str) -> float:
vector = self.vectorizer.encode([description])
return max(0, self.model.predict(vector)[0])

View File

@@ -0,0 +1,46 @@
import os
import json
from openai import OpenAI
from agents.deals import Deal, QualityDealSelection
class ScannerAgent:
MODEL = "gpt-4o-mini"
SYSTEM_PROMPT = """You identify and summarize the 5 most detailed deals from a list, by selecting deals that have the most detailed, high quality description and the most clear price.
Respond strictly in JSON with no explanation, using this format. You should provide the price as a number derived from the description. If the price of a deal isn't clear, do not include that deal in your response.
Most important is that you respond with the 5 deals that have the most detailed product description with price. It's not important to mention the terms of the deal; most important is a thorough description of the product.
{"quality_deals": [
{
"product_description": "Your clearly expressed summary of the product in 4-5 sentences. Details of the item are much more important than why it's a good deal. Avoid mentioning discounts and coupons; focus on the item itself. There should be a paragpraph of text for each item you choose.",
"price": 99.99,
"url": "the url as provided"
},
...
]}"""
USER_PROMPT_PREFIX = """Respond with the most promising 5 deals from this list, selecting those which have the most detailed, high quality product description and a clear price.
Respond strictly in JSON, and only JSON. You should rephrase the description to be a summary of the product itself, not the terms of the deal.
Remember to respond with a paragraph of text in the product_description field for each of the 5 items that you select.
Deals:
"""
def __init__(self):
self.openai = OpenAI()
def scan(self) -> QualityDealSelection:
deals = Deal.fetch()
user_prompt = self.USER_PROMPT_PREFIX + '\n\n'.join([deal.describe() for deal in deals])
completion = self.openai.beta.chat.completions.parse(
model=self.MODEL,
messages=[
{"role": "system", "content": self.SYSTEM_PROMPT},
{"role": "user", "content": user_prompt}
],
response_format=QualityDealSelection
)
result = completion.choices[0].message.parsed
return result

View File

@@ -0,0 +1,10 @@
import modal
class SpecialistAgent:
def __init__(self):
Pricer = modal.Cls.lookup("pricer-service", "Pricer")
self.pricer = Pricer()
def price(self, description: str) -> float:
return self.pricer.price.remote(description)