Fixed Google Colab link in week 3 day 4, and latest week 8 updates

2024-09-27 08:35:09 -04:00
parent 95596c52f8
commit e02dca5058
18 changed files with 74561 additions and 858 deletions
--- a/week8_wip/agents/deals.py
+++ b/week8_wip/agents/deals.py
@@ -0,0 +1,84 @@
+from pydantic import BaseModel
+from typing import List
+from bs4 import BeautifulSoup
+import re
+import feedparser
+from tqdm import tqdm
+import requests
+import time
+
+feeds = [
+    "https://www.dealnews.com/c142/Electronics/?rss=1",
+        "https://www.dealnews.com/c39/Computers/?rss=1",
+        "https://www.dealnews.com/c238/Automotive/?rss=1",
+        "https://www.dealnews.com/f1912/Smart-Home/?rss=1",
+        "https://www.dealnews.com/c196/Home-Garden/?rss=1",
+       ]
+
+def extract(html_snippet):
+    soup = BeautifulSoup(html_snippet, 'html.parser')
+    snippet_div = soup.find('div', class_='snippet summary')
+    
+    if snippet_div:
+        description = snippet_div.get_text(strip=True)
+        description = BeautifulSoup(description, 'html.parser').get_text()
+        description = re.sub('<[^<]+?>', '', description)
+        result = description.strip()
+    else:
+        result = html_snippet
+    return result.replace('\n', ' ')
+
+class Deal:
+    category: str
+    title: str
+    summary: str
+    url: str
+    item_id: int
+    details: str
+    features: str
+
+    def __init__(self, entry, id):
+        self.title = entry['title']
+        self.summary = extract(entry['summary'])
+        self.url = entry['links'][0]['href']
+        self.item_id = id
+        stuff = requests.get(self.url).content
+        soup = BeautifulSoup(stuff, 'html.parser')
+        content = soup.find('div', class_='content-section').get_text()
+        content = content.replace('\nmore', '').replace('\n', ' ')
+        if "Features" in content:
+            self.details, self.features = content.split("Features")
+        else:
+            self.details = content
+            self.features = ""
+
+    def __repr__(self):
+        return f"<{self.title}>"
+
+    def describe(self):
+        return f"Title: {self.title}\nDetails: {self.details.strip()}\nFeatures: {self.features.strip()}\nURL: {self.url}"
+
+    @classmethod
+    def fetch(cls):
+        deals = []
+        item_id = 1001
+        for feed_url in tqdm(feeds):
+            feed = feedparser.parse(feed_url)
+            for entry in feed.entries[:10]:
+                deals.append(cls(entry, item_id))
+                item_id += 1
+                time.sleep(1)
+        return deals
+
+class QualityDeal(BaseModel):
+    product_description: str
+    price: float
+    url: str
+
+class QualityDealSelection(BaseModel):
+    quality_deals: List[QualityDeal]
+
+class Opportunity(BaseModel):
+    quality_deal: QualityDeal
+    estimate: float
+    discount: float
--- a/week8_wip/agents/ensemble_agent.py
+++ b/week8_wip/agents/ensemble_agent.py
@@ -0,0 +1,29 @@
+import pandas as pd
+from sklearn.linear_model import LinearRegression
+import joblib
+
+from agents.specialist_agent import SpecialistAgent
+from agents.frontier_agent import FrontierAgent
+from agents.random_forest_agent import RandomForestAgent
+
+class EnsembleAgent:
+
+    def __init__(self, collection):
+        self.specialist = SpecialistAgent()
+        self.frontier = FrontierAgent(collection)
+        self.random_forest = RandomForestAgent()
+        self.model = joblib.load('ensemble_model.pkl')
+
+    def price(self, description):
+        specialist = self.specialist.price(description)
+        frontier = self.frontier.price(description)
+        random_forest = self.random_forest.price(description)
+        X = pd.DataFrame({
+            'Specialist': [specialist],
+            'Frontier': [frontier],
+            'RandomForest': [random_forest],
+            'Min': [min(specialist, frontier, random_forest)],
+            'Max': [max(specialist, frontier, random_forest)],
+        })
+        y = self.model.predict(X)
+        return y[0]
--- a/week8_wip/agents/frontier_agent.py
+++ b/week8_wip/agents/frontier_agent.py
@@ -0,0 +1,63 @@
+# imports
+
+import os
+import re
+import math
+import json
+from typing import List
+from openai import OpenAI
+from sentence_transformers import SentenceTransformer
+from datasets import load_dataset
+import chromadb
+from items import Item
+from testing import Tester
+
+class FrontierAgent:
+
+    MODEL = "gpt-4o-mini"
+    
+    def __init__(self, collection):
+        self.openai = OpenAI()
+        self.collection = collection
+        self.model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
+
+    def make_context(self, similars: List[str], prices: List[float]):
+        message = "To provide some context, here are some other items that might be similar to the item you need to estimate.\n\n"
+        for similar, price in zip(similars, prices):
+            message += f"Potentially related product:\n{similar}\nPrice is ${price:.2f}\n\n"
+        return message
+
+    def messages_for(self, description: str, similars: List[str], prices: List[float]):
+        system_message = "You estimate prices of items. Reply only with the price, no explanation"
+        user_prompt = self.make_context(similars, prices)
+        user_prompt += "And now the question for you:\n\n"
+        user_prompt += "How much does this cost?\n\n" + description
+        return [
+            {"role": "system", "content": system_message},
+            {"role": "user", "content": user_prompt},
+            {"role": "assistant", "content": "Price is $"}
+        ]
+
+    def find_similars(self, description: str):
+        vector = self.model.encode([description])
+        results = self.collection.query(query_embeddings=vector.astype(float).tolist(), n_results=5)
+        documents = results['documents'][0][:]
+        prices = [m['price'] for m in results['metadatas'][0][:]]
+        return documents, prices
+
+    def get_price(self, s) -> float:
+        s = s.replace('$','').replace(',','')
+        match = re.search(r"[-+]?\d*\.\d+|\d+", s)
+        return float(match.group()) if match else 0.0
+
+    def price(self, description: str) -> float:
+        documents, prices = self.find_similars(description)
+        response = self.openai.chat.completions.create(
+            model=self.MODEL, 
+            messages=self.messages_for(description, documents, prices),
+            seed=42,
+            max_tokens=5
+        )
+        reply = response.choices[0].message.content
+        return self.get_price(reply)
+        
--- a/week8_wip/agents/messaging_agent.py
+++ b/week8_wip/agents/messaging_agent.py
@@ -0,0 +1,28 @@
+import os
+from twilio.rest import Client
+from agents.deals import Opportunity
+
+class MessagingAgent:
+
+    def __init__(self):
+        account_sid = os.getenv('TWILIO_ACCOUNT_SID', 'your-sid-if-not-using-env')
+        auth_token = os.getenv('TWILIO_AUTH_TOKEN', 'your-auth-if-not-using-env')
+        self.me_from = 'whatsapp:+14155238886'
+        self.me_to = f"whatsapp:+1{os.getenv('MY_PHONE_NUMBER', 'your-phone-number-if-not-using-env')}"
+        self.client = Client(account_sid, auth_token)
+
+    def message(self, text):
+        message = self.client.messages.create(
+          from_=self.me_from,
+          body=text,
+          to=self.me_to
+        )
+
+    def alert(self, opportunity: Opportunity):
+        text = f"Deal! Price=${opportunity.quality_deal.price:.2f}, "
+        text += f"Estimate=${opportunity.estimate:.2f} :"
+        text += opportunity.quality_deal.product_description[:10]+'... '
+        text += opportunity.quality_deal.url
+        self.message(text)
+    
+        
--- a/week8_wip/agents/planning_agent.py
+++ b/week8_wip/agents/planning_agent.py
@@ -0,0 +1,24 @@
+from agents.deals import Deal, QualityDealSelection, Opportunity
+
+from agents.scanner_agent import ScannerAgent
+from agents.ensemble_agent import EnsembleAgent
+from agents.messaging_agent import MessagingAgent
+
+
+class PlanningAgent:
+
+    def __init__(self, collection):
+        self.scanner = ScannerAgent()
+        self.ensemble = EnsembleAgent(collection)
+        self.messenger = MessagingAgent()
+
+    def plan(self):
+        opportunities = []
+        deal_selection = self.scanner.scan()
+        for deal in deal_selection.quality_deals[:5]:
+            estimate = self.ensemble.price(deal.product_description)
+            opportunities.append(Opportunity(deal, estimate, estimate - deal.price))
+        opportunities.sort(key=lambda opp: opp.discount, reverse=True)
+        print(opportunities)
+        if opportunities[0].discount > 50:
+            self.messenger.alert(opportunities[0])
--- a/week8_wip/agents/random_forest_agent.py
+++ b/week8_wip/agents/random_forest_agent.py
@@ -0,0 +1,18 @@
+# imports
+
+import os
+import re
+from typing import List
+from sentence_transformers import SentenceTransformer
+import joblib
+
+
+class RandomForestAgent:
+
+    def __init__(self):
+        self.vectorizer = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
+        self.model = joblib.load('random_forest_model.pkl')
+
+    def price(self, description: str) -> float:
+        vector = self.vectorizer.encode([description])
+        return max(0, self.model.predict(vector)[0])
--- a/week8_wip/agents/scanner_agent.py
+++ b/week8_wip/agents/scanner_agent.py
@@ -0,0 +1,46 @@
+import os
+import json
+from openai import OpenAI
+from agents.deals import Deal, QualityDealSelection
+
+class ScannerAgent:
+
+    MODEL = "gpt-4o-mini"
+
+    SYSTEM_PROMPT = """You identify and summarize the 5 most detailed deals from a list, by selecting deals that have the most detailed, high quality description and the most clear price.
+    Respond strictly in JSON with no explanation, using this format. You should provide the price as a number derived from the description. If the price of a deal isn't clear, do not include that deal in your response.
+    Most important is that you respond with the 5 deals that have the most detailed product description with price. It's not important to mention the terms of the deal; most important is a thorough description of the product.
+    
+    {"quality_deals": [
+        {
+            "product_description": "Your clearly expressed summary of the product in 4-5 sentences. Details of the item are much more important than why it's a good deal. Avoid mentioning discounts and coupons; focus on the item itself. There should be a paragpraph of text for each item you choose.",
+            "price": 99.99,
+            "url": "the url as provided"
+        },
+        ...
+    ]}"""
+    
+    USER_PROMPT_PREFIX = """Respond with the most promising 5 deals from this list, selecting those which have the most detailed, high quality product description and a clear price.
+    Respond strictly in JSON, and only JSON. You should rephrase the description to be a summary of the product itself, not the terms of the deal.
+    Remember to respond with a paragraph of text in the product_description field for each of the 5 items that you select.
+    
+    Deals:
+    
+    """
+
+    def __init__(self):
+        self.openai = OpenAI()
+
+    def scan(self) -> QualityDealSelection:
+        deals = Deal.fetch()
+        user_prompt = self.USER_PROMPT_PREFIX + '\n\n'.join([deal.describe() for deal in deals])
+        completion = self.openai.beta.chat.completions.parse(
+            model=self.MODEL,
+            messages=[
+                {"role": "system", "content": self.SYSTEM_PROMPT},
+                {"role": "user", "content": user_prompt}
+          ],
+            response_format=QualityDealSelection
+        )
+        result = completion.choices[0].message.parsed
+        return result
--- a/week8_wip/agents/specialist_agent.py
+++ b/week8_wip/agents/specialist_agent.py
@@ -0,0 +1,10 @@
+import modal
+
+class SpecialistAgent:
+
+    def __init__(self):
+        Pricer = modal.Cls.lookup("pricer-service", "Pricer")
+        self.pricer = Pricer()
+        
+    def price(self, description: str) -> float:
+        return self.pricer.price.remote(description)