Fixed Google Colab link in week 3 day 4, and latest week 8 updates

This commit is contained in:
Edward Donner
2024-09-27 08:35:09 -04:00
parent 95596c52f8
commit e02dca5058
18 changed files with 74561 additions and 858 deletions

View File

@@ -32,6 +32,7 @@ dependencies:
- plotly
- twilio
- duckdb
- feedparser
- pip:
- transformers
- sentence-transformers

84
week8_wip/agents/deals.py Normal file
View File

@@ -0,0 +1,84 @@
from pydantic import BaseModel
from typing import List
from bs4 import BeautifulSoup
import re
import feedparser
from tqdm import tqdm
import requests
import time
feeds = [
"https://www.dealnews.com/c142/Electronics/?rss=1",
"https://www.dealnews.com/c39/Computers/?rss=1",
"https://www.dealnews.com/c238/Automotive/?rss=1",
"https://www.dealnews.com/f1912/Smart-Home/?rss=1",
"https://www.dealnews.com/c196/Home-Garden/?rss=1",
]
def extract(html_snippet):
soup = BeautifulSoup(html_snippet, 'html.parser')
snippet_div = soup.find('div', class_='snippet summary')
if snippet_div:
description = snippet_div.get_text(strip=True)
description = BeautifulSoup(description, 'html.parser').get_text()
description = re.sub('<[^<]+?>', '', description)
result = description.strip()
else:
result = html_snippet
return result.replace('\n', ' ')
class Deal:
category: str
title: str
summary: str
url: str
item_id: int
details: str
features: str
def __init__(self, entry, id):
self.title = entry['title']
self.summary = extract(entry['summary'])
self.url = entry['links'][0]['href']
self.item_id = id
stuff = requests.get(self.url).content
soup = BeautifulSoup(stuff, 'html.parser')
content = soup.find('div', class_='content-section').get_text()
content = content.replace('\nmore', '').replace('\n', ' ')
if "Features" in content:
self.details, self.features = content.split("Features")
else:
self.details = content
self.features = ""
def __repr__(self):
return f"<{self.title}>"
def describe(self):
return f"Title: {self.title}\nDetails: {self.details.strip()}\nFeatures: {self.features.strip()}\nURL: {self.url}"
@classmethod
def fetch(cls):
deals = []
item_id = 1001
for feed_url in tqdm(feeds):
feed = feedparser.parse(feed_url)
for entry in feed.entries[:10]:
deals.append(cls(entry, item_id))
item_id += 1
time.sleep(1)
return deals
class QualityDeal(BaseModel):
product_description: str
price: float
url: str
class QualityDealSelection(BaseModel):
quality_deals: List[QualityDeal]
class Opportunity(BaseModel):
quality_deal: QualityDeal
estimate: float
discount: float

View File

@@ -0,0 +1,29 @@
import pandas as pd
from sklearn.linear_model import LinearRegression
import joblib
from agents.specialist_agent import SpecialistAgent
from agents.frontier_agent import FrontierAgent
from agents.random_forest_agent import RandomForestAgent
class EnsembleAgent:
def __init__(self, collection):
self.specialist = SpecialistAgent()
self.frontier = FrontierAgent(collection)
self.random_forest = RandomForestAgent()
self.model = joblib.load('ensemble_model.pkl')
def price(self, description):
specialist = self.specialist.price(description)
frontier = self.frontier.price(description)
random_forest = self.random_forest.price(description)
X = pd.DataFrame({
'Specialist': [specialist],
'Frontier': [frontier],
'RandomForest': [random_forest],
'Min': [min(specialist, frontier, random_forest)],
'Max': [max(specialist, frontier, random_forest)],
})
y = self.model.predict(X)
return y[0]

View File

@@ -0,0 +1,63 @@
# imports
import os
import re
import math
import json
from typing import List
from openai import OpenAI
from sentence_transformers import SentenceTransformer
from datasets import load_dataset
import chromadb
from items import Item
from testing import Tester
class FrontierAgent:
MODEL = "gpt-4o-mini"
def __init__(self, collection):
self.openai = OpenAI()
self.collection = collection
self.model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
def make_context(self, similars: List[str], prices: List[float]):
message = "To provide some context, here are some other items that might be similar to the item you need to estimate.\n\n"
for similar, price in zip(similars, prices):
message += f"Potentially related product:\n{similar}\nPrice is ${price:.2f}\n\n"
return message
def messages_for(self, description: str, similars: List[str], prices: List[float]):
system_message = "You estimate prices of items. Reply only with the price, no explanation"
user_prompt = self.make_context(similars, prices)
user_prompt += "And now the question for you:\n\n"
user_prompt += "How much does this cost?\n\n" + description
return [
{"role": "system", "content": system_message},
{"role": "user", "content": user_prompt},
{"role": "assistant", "content": "Price is $"}
]
def find_similars(self, description: str):
vector = self.model.encode([description])
results = self.collection.query(query_embeddings=vector.astype(float).tolist(), n_results=5)
documents = results['documents'][0][:]
prices = [m['price'] for m in results['metadatas'][0][:]]
return documents, prices
def get_price(self, s) -> float:
s = s.replace('$','').replace(',','')
match = re.search(r"[-+]?\d*\.\d+|\d+", s)
return float(match.group()) if match else 0.0
def price(self, description: str) -> float:
documents, prices = self.find_similars(description)
response = self.openai.chat.completions.create(
model=self.MODEL,
messages=self.messages_for(description, documents, prices),
seed=42,
max_tokens=5
)
reply = response.choices[0].message.content
return self.get_price(reply)

View File

@@ -0,0 +1,28 @@
import os
from twilio.rest import Client
from agents.deals import Opportunity
class MessagingAgent:
def __init__(self):
account_sid = os.getenv('TWILIO_ACCOUNT_SID', 'your-sid-if-not-using-env')
auth_token = os.getenv('TWILIO_AUTH_TOKEN', 'your-auth-if-not-using-env')
self.me_from = 'whatsapp:+14155238886'
self.me_to = f"whatsapp:+1{os.getenv('MY_PHONE_NUMBER', 'your-phone-number-if-not-using-env')}"
self.client = Client(account_sid, auth_token)
def message(self, text):
message = self.client.messages.create(
from_=self.me_from,
body=text,
to=self.me_to
)
def alert(self, opportunity: Opportunity):
text = f"Deal! Price=${opportunity.quality_deal.price:.2f}, "
text += f"Estimate=${opportunity.estimate:.2f} :"
text += opportunity.quality_deal.product_description[:10]+'... '
text += opportunity.quality_deal.url
self.message(text)

View File

@@ -0,0 +1,24 @@
from agents.deals import Deal, QualityDealSelection, Opportunity
from agents.scanner_agent import ScannerAgent
from agents.ensemble_agent import EnsembleAgent
from agents.messaging_agent import MessagingAgent
class PlanningAgent:
def __init__(self, collection):
self.scanner = ScannerAgent()
self.ensemble = EnsembleAgent(collection)
self.messenger = MessagingAgent()
def plan(self):
opportunities = []
deal_selection = self.scanner.scan()
for deal in deal_selection.quality_deals[:5]:
estimate = self.ensemble.price(deal.product_description)
opportunities.append(Opportunity(deal, estimate, estimate - deal.price))
opportunities.sort(key=lambda opp: opp.discount, reverse=True)
print(opportunities)
if opportunities[0].discount > 50:
self.messenger.alert(opportunities[0])

View File

@@ -0,0 +1,18 @@
# imports
import os
import re
from typing import List
from sentence_transformers import SentenceTransformer
import joblib
class RandomForestAgent:
def __init__(self):
self.vectorizer = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
self.model = joblib.load('random_forest_model.pkl')
def price(self, description: str) -> float:
vector = self.vectorizer.encode([description])
return max(0, self.model.predict(vector)[0])

View File

@@ -0,0 +1,46 @@
import os
import json
from openai import OpenAI
from agents.deals import Deal, QualityDealSelection
class ScannerAgent:
MODEL = "gpt-4o-mini"
SYSTEM_PROMPT = """You identify and summarize the 5 most detailed deals from a list, by selecting deals that have the most detailed, high quality description and the most clear price.
Respond strictly in JSON with no explanation, using this format. You should provide the price as a number derived from the description. If the price of a deal isn't clear, do not include that deal in your response.
Most important is that you respond with the 5 deals that have the most detailed product description with price. It's not important to mention the terms of the deal; most important is a thorough description of the product.
{"quality_deals": [
{
"product_description": "Your clearly expressed summary of the product in 4-5 sentences. Details of the item are much more important than why it's a good deal. Avoid mentioning discounts and coupons; focus on the item itself. There should be a paragpraph of text for each item you choose.",
"price": 99.99,
"url": "the url as provided"
},
...
]}"""
USER_PROMPT_PREFIX = """Respond with the most promising 5 deals from this list, selecting those which have the most detailed, high quality product description and a clear price.
Respond strictly in JSON, and only JSON. You should rephrase the description to be a summary of the product itself, not the terms of the deal.
Remember to respond with a paragraph of text in the product_description field for each of the 5 items that you select.
Deals:
"""
def __init__(self):
self.openai = OpenAI()
def scan(self) -> QualityDealSelection:
deals = Deal.fetch()
user_prompt = self.USER_PROMPT_PREFIX + '\n\n'.join([deal.describe() for deal in deals])
completion = self.openai.beta.chat.completions.parse(
model=self.MODEL,
messages=[
{"role": "system", "content": self.SYSTEM_PROMPT},
{"role": "user", "content": user_prompt}
],
response_format=QualityDealSelection
)
result = completion.choices[0].message.parsed
return result

View File

@@ -0,0 +1,10 @@
import modal
class SpecialistAgent:
def __init__(self):
Pricer = modal.Cls.lookup("pricer-service", "Pricer")
self.pricer = Pricer()
def price(self, description: str) -> float:
return self.pricer.price.remote(description)

View File

@@ -0,0 +1,566 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "fbcdfea8-7241-46d7-a771-c0381a3e7063",
"metadata": {},
"outputs": [],
"source": [
"# imports\n",
"\n",
"import os\n",
"import re\n",
"import math\n",
"import json\n",
"from tqdm import tqdm\n",
"import random\n",
"from dotenv import load_dotenv\n",
"from huggingface_hub import login\n",
"import numpy as np\n",
"import pickle\n",
"from openai import OpenAI\n",
"from sentence_transformers import SentenceTransformer\n",
"from datasets import load_dataset\n",
"import chromadb\n",
"from items import Item\n",
"from testing import Tester\n",
"from agents.pricer_agent import price\n",
"import pandas as pd\n",
"import numpy as np\n",
"from sklearn.linear_model import LinearRegression\n",
"from sklearn.metrics import mean_squared_error, r2_score"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "e6e88bd1-f89c-4b98-92fa-aa4bc1575bca",
"metadata": {},
"outputs": [],
"source": [
"# CONSTANTS\n",
"\n",
"QUESTION = \"How much does this cost to the nearest dollar?\\n\\n\"\n",
"DB = \"products_vectorstore\""
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "98666e73-938e-469d-8987-e6e55ba5e034",
"metadata": {},
"outputs": [],
"source": [
"# environment\n",
"\n",
"load_dotenv()\n",
"os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY', 'your-key-if-not-using-env')\n",
"os.environ['HF_TOKEN'] = os.getenv('HF_TOKEN', 'your-key-if-not-using-env')"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "9a25a5cf-8f6c-4b5d-ad98-fdd096f5adf8",
"metadata": {},
"outputs": [],
"source": [
"openai = OpenAI()"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "dc696493-0b6f-48aa-9fa8-b1ae0ecaf3cd",
"metadata": {},
"outputs": [],
"source": [
"# Load in the test pickle file:\n",
"\n",
"with open('test.pkl', 'rb') as file:\n",
" test = pickle.load(file)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "33d38a06-0c0d-4e96-94d1-35ee183416ce",
"metadata": {},
"outputs": [],
"source": [
"def make_context(similars, prices):\n",
" message = \"To provide some context, here are some other items that might be similar to the item you need to estimate.\\n\\n\"\n",
" for similar, price in zip(similars, prices):\n",
" message += f\"Potentially related product:\\n{similar}\\nPrice is ${price:.2f}\\n\\n\"\n",
" return message"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "61f203b7-63b6-48ed-869b-e393b5bfcad3",
"metadata": {},
"outputs": [],
"source": [
"def messages_for(item, similars, prices):\n",
" system_message = \"You estimate prices of items. Reply only with the price, no explanation\"\n",
" user_prompt = make_context(similars, prices)\n",
" user_prompt += \"And now the question for you:\\n\\n\"\n",
" user_prompt += item.test_prompt().replace(\" to the nearest dollar\",\"\").replace(\"\\n\\nPrice is $\",\"\")\n",
" return [\n",
" {\"role\": \"system\", \"content\": system_message},\n",
" {\"role\": \"user\", \"content\": user_prompt},\n",
" {\"role\": \"assistant\", \"content\": \"Price is $\"}\n",
" ]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b26f405d-6e1f-4caa-b97f-1f62cd9d1ebc",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "d26a1104-cd11-4361-ab25-85fb576e0582",
"metadata": {},
"outputs": [],
"source": [
"client = chromadb.PersistentClient(path=DB)\n",
"collection = client.get_or_create_collection('products')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1e339760-96d8-4485-bec7-43fadcd30c4d",
"metadata": {},
"outputs": [],
"source": [
"def description(item):\n",
" text = item.prompt.replace(\"How much does this cost to the nearest dollar?\\n\\n\", \"\")\n",
" return text.split(\"\\n\\nPrice is $\")[0]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9f759bd2-7a7e-4c1a-80a0-e12470feca89",
"metadata": {},
"outputs": [],
"source": [
"model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e44dbd25-fb95-4b6b-bbbb-8da5fc817105",
"metadata": {},
"outputs": [],
"source": [
"def vector(item):\n",
" return model.encode([description(item)])"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ffd5ee47-db5d-4263-b0d9-80d568c91341",
"metadata": {},
"outputs": [],
"source": [
"def find_similars(item):\n",
" results = collection.query(query_embeddings=vector(item).astype(float).tolist(), n_results=5)\n",
" documents = results['documents'][0][:]\n",
" prices = [m['price'] for m in results['metadatas'][0][:]]\n",
" return documents, prices"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d11f1c8d-7480-4d64-a274-b030d701f1b8",
"metadata": {},
"outputs": [],
"source": [
"def get_price(s):\n",
" s = s.replace('$','').replace(',','')\n",
" match = re.search(r\"[-+]?\\d*\\.\\d+|\\d+\", s)\n",
" return float(match.group()) if match else 0"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a919cf7d-b3d3-4968-8c96-54a0da0b0219",
"metadata": {},
"outputs": [],
"source": [
"# The function for gpt-4o-mini\n",
"\n",
"def gpt_4o_mini_rag(item):\n",
" documents, prices = find_similars(item)\n",
" response = openai.chat.completions.create(\n",
" model=\"gpt-4o-mini\", \n",
" messages=messages_for(item, documents, prices),\n",
" seed=42,\n",
" max_tokens=5\n",
" )\n",
" reply = response.choices[0].message.content\n",
" return get_price(reply)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "8b918cfc-76c1-442a-8caa-bec500cd504b",
"metadata": {},
"outputs": [],
"source": [
"gpt_4o_mini_rag(test[1000])"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c92cfc0b-b36d-456f-94cc-fe3f315cc25e",
"metadata": {},
"outputs": [],
"source": [
"test[1000]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e6d5deb3-6a2a-4484-872c-37176c5e1f07",
"metadata": {},
"outputs": [],
"source": [
"def proprietary(item):\n",
" text = item.prompt.split(\"to the nearest dollar?\\n\\n\")[1].split(\"\\n\\nPrice is $\")[0]\n",
" return price(text)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "bacdf607-37b9-4997-adb1-d63abfb645b1",
"metadata": {},
"outputs": [],
"source": [
"print(proprietary(test[1]))\n",
"print(gpt_4o_mini_rag(test[1]))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b35532e7-098a-4ab9-a8f7-8f101b437181",
"metadata": {},
"outputs": [],
"source": [
"truths = []\n",
"proprietaries = []\n",
"rags = []\n",
"for i in tqdm(range(1000,1250)):\n",
" item = test[i]\n",
" truths.append(item.price)\n",
" proprietaries.append(proprietary(item))\n",
" rags.append(gpt_4o_mini_rag(item))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e6ae54c7-6e8e-4333-b075-b59978fed560",
"metadata": {},
"outputs": [],
"source": [
"mins = [min(p,r) for p,r in zip(proprietaries, rags)]\n",
"maxes = [max(p,r) for p,r in zip(proprietaries, rags)]\n",
"\n",
"X = pd.DataFrame({\n",
" 'Proprietary': proprietaries,\n",
" 'RAG': rags,\n",
" 'Min': mins,\n",
" 'Max': maxes,\n",
"})\n",
"\n",
"# Convert y to a Series\n",
"y = pd.Series(truths)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e68684ed-d029-4d95-bb13-eead19b20e49",
"metadata": {},
"outputs": [],
"source": [
"# Train a Linear Regression\n",
"np.random.seed(42)\n",
"\n",
"lr = LinearRegression()\n",
"lr.fit(X, y)\n",
"\n",
"feature_columns = [\"Proprietary\", \"RAG\", \"Min\", \"Max\"]\n",
"\n",
"for feature, coef in zip(feature_columns, lr.coef_):\n",
" print(f\"{feature}: {coef:.2f}\")\n",
"print(f\"Intercept={lr.intercept_:.2f}\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "28530362-97b8-42a0-bf89-967539b6f170",
"metadata": {},
"outputs": [],
"source": [
"def ensemble(item):\n",
" prop = proprietary(item)\n",
" rag = gpt_4o_mini_rag(item)\n",
" Xt = pd.DataFrame({\n",
" 'Proprietary': [prop],\n",
" 'RAG': [rag],\n",
" 'Min': [min(prop,rag)],\n",
" 'Max': [max(prop,rag)],\n",
" })\n",
" yt = lr.predict(Xt)\n",
" return yt[0]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "08021c05-340b-4ee2-9d11-4b280766976f",
"metadata": {},
"outputs": [],
"source": [
"ensemble(test[0])"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d8308c74-546f-4fc0-ada4-1974addacfd1",
"metadata": {},
"outputs": [],
"source": [
"test[0].price"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "80792910-c59f-4d96-aa53-683464a8e60c",
"metadata": {},
"outputs": [],
"source": [
"Tester.test(ensemble, test)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d0c41043-2049-4883-947f-2aad2f6954c2",
"metadata": {},
"outputs": [],
"source": [
"from sklearn.ensemble import RandomForestRegressor\n",
"\n",
"result = collection.get(include=['embeddings', 'documents', 'metadatas'])\n",
"vectors = np.array(result['embeddings'])\n",
"documents = result['documents']\n",
"prices = [metadata['price'] for metadata in result['metadatas']]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e9c3276f-ae01-478d-bb27-dc73b567b41a",
"metadata": {},
"outputs": [],
"source": [
"rf_model = RandomForestRegressor(n_estimators=100, random_state=42, n_jobs=8)\n",
"rf_model.fit(vectors, prices)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3e8f70cd-4147-40c6-9861-a3513b7e5499",
"metadata": {},
"outputs": [],
"source": [
"def new_rf(item):\n",
" text = item.prompt.split(\"to the nearest dollar?\\n\\n\")[1].split(\"\\n\\nPrice is $\")[0]\n",
" vector = model.encode([text])\n",
" return max(0, rf_model.predict(vector)[0])"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a2e3340f-7ed4-47eb-a5a9-dff4c0353f58",
"metadata": {},
"outputs": [],
"source": [
"new_rf(test[0])"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f91c903b-8db1-4374-807e-3a8ce282ef30",
"metadata": {},
"outputs": [],
"source": [
"Tester.test(new_rf, test)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3c8e23c5-1ed3-4bd1-a3c0-129d4712c93a",
"metadata": {},
"outputs": [],
"source": [
"forests = []\n",
"for i in tqdm(range(1000,1250)):\n",
" item = test[i]\n",
" forests.append(new_rf(item))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "8e2eca63-8230-4904-9a79-7e779747479e",
"metadata": {},
"outputs": [],
"source": [
"truths2 = []\n",
"proprietaries2 = []\n",
"rags2 = []\n",
"forests2 = []\n",
"for i in tqdm(range(1000,2000)):\n",
" item = test[i]\n",
" truths2.append(item.price)\n",
" proprietaries2.append(proprietary(item))\n",
" rags2.append(gpt_4o_mini_rag(item))\n",
" forests2.append(new_rf(item))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0a3e057f-05c5-4f8f-8b3b-0afdfccc1412",
"metadata": {},
"outputs": [],
"source": [
"mins2 = [min(p,r,f) for p,r,f in zip(proprietaries2, rags2, forests2)]\n",
"maxes2 = [max(p,r,f) for p,r,f in zip(proprietaries2, rags2, forests2)]\n",
"\n",
"\n",
"\n",
"X2 = pd.DataFrame({\n",
" 'Proprietary': proprietaries2,\n",
" 'RAG': rags2,\n",
" 'Forest': forests2,\n",
" 'Min': mins2,\n",
" 'Max': maxes2,\n",
"})\n",
"\n",
"# Convert y to a Series\n",
"y2 = pd.Series(truths2)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1ae62175-b955-428e-b077-705c49ee71bd",
"metadata": {},
"outputs": [],
"source": [
"# Train a Linear Regression\n",
"np.random.seed(42)\n",
"\n",
"lr2 = LinearRegression()\n",
"lr2.fit(X2, y2)\n",
"\n",
"feature_columns = X2.columns.tolist()\n",
"\n",
"for feature, coef in zip(feature_columns, lr2.coef_):\n",
" print(f\"{feature}: {coef:.2f}\")\n",
"print(f\"Intercept={lr.intercept_:.2f}\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "214a3831-c464-4218-a349-534b6bda7f12",
"metadata": {},
"outputs": [],
"source": [
"def ensemble2(item):\n",
" prop = proprietary(item)\n",
" rag = gpt_4o_mini_rag(item)\n",
" r_f = new_rf(item)\n",
" Xt2 = pd.DataFrame({\n",
" 'Proprietary': [prop],\n",
" 'RAG': [rag],\n",
" 'Forest': [r_f],\n",
" 'Min': [min(prop,rag, r_f)],\n",
" 'Max': [max(prop,rag, r_f)],\n",
" })\n",
" yt2 = lr.predict(Xt2)\n",
" return yt2[0]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b234cb68-af68-4475-ae18-8892aac6b74e",
"metadata": {},
"outputs": [],
"source": [
"Tester.test(ensemble2, test)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "10a7275f-1aa9-4446-9100-a7a0ba0215f2",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.10"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@@ -1,9 +1,21 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "e426cd04-c053-43e8-b505-63cee7956a53",
"metadata": {},
"source": [
"May need to update environment if cloned git after Sep 26\n",
"```\n",
"git pull\n",
"conda env update --f environment.yml --prune\n",
"```"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "56297249-4a8c-4e67-b8c3-a0d8652c104e",
"execution_count": null,
"id": "bc0e1c1c-be6a-4395-bbbd-eeafc9330d7e",
"metadata": {},
"outputs": [],
"source": [
@@ -263,10 +275,41 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 5,
"id": "ba9aedca-6a7b-4d30-9f64-59d76f76fb6d",
"metadata": {},
"outputs": [],
"source": [
"from agents.specialist_agent import price"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "fe5843e5-e958-4a65-8326-8f5b4686de7f",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"133.0"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"price(\"Quadcast HyperX condenser mic, connects via usb-c to your computer for crystal clear audio\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f5a3181b-1310-4102-8d7d-52caf4c00538",
"metadata": {},
"outputs": [],
"source": []
}
],

View File

@@ -210,98 +210,6 @@
"CATEGORIES = ['Appliances', 'Automotive', 'Cell_Phones_and_Accessories', 'Electronics','Musical_Instruments', 'Office_Products', 'Tools_and_Home_Improvement', 'Toys_and_Games']\n",
"COLORS = ['red', 'blue', 'brown', 'orange', 'yellow', 'green' , 'purple', 'cyan']"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a4cf1c9a-1ced-48d4-974c-3c850905034e",
"metadata": {},
"outputs": [],
"source": [
"# Prework\n",
"\n",
"vectors_np = np.array(vectors)\n",
"colors = [COLORS[CATEGORIES.index(t)] for t in categories]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0c6718b3-e0fd-4319-a1b5-d9d34d6b1dd9",
"metadata": {},
"outputs": [],
"source": [
"# We humans find it easier to visalize things in 2D!\n",
"# Reduce the dimensionality of the vectors to 2D using t-SNE\n",
"# (t-distributed stochastic neighbor embedding)\n",
"\n",
"tsne = TSNE(n_components=2, random_state=42)\n",
"reduced_vectors = tsne.fit_transform(vectors_np)\n",
"\n",
"# Create the 2D scatter plot\n",
"fig = go.Figure(data=[go.Scatter(\n",
" x=reduced_vectors[:, 0],\n",
" y=reduced_vectors[:, 1],\n",
" mode='markers',\n",
" marker=dict(size=3, color=colors, opacity=0.8),\n",
" text=[f\"Category: {c}<br>Text: {d[:100]}...\" for c, d in zip(categories, descriptions)],\n",
" hoverinfo='text'\n",
")])\n",
"\n",
"fig.update_layout(\n",
" title='2D Chroma Vector Store Visualization',\n",
" scene=dict(xaxis_title='x',yaxis_title='y'),\n",
" width=1200,\n",
" height=800,\n",
" margin=dict(r=20, b=10, l=10, t=40)\n",
")\n",
"\n",
"fig.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c54df150-c8d8-4bc3-8877-6759691eeb42",
"metadata": {},
"outputs": [],
"source": [
"# Let's try 3D!\n",
"\n",
"tsne = TSNE(n_components=3, random_state=42)\n",
"reduced_vectors = tsne.fit_transform(vectors_np)\n",
"\n",
"# Create the 3D scatter plot\n",
"fig = go.Figure(data=[go.Scatter3d(\n",
" x=reduced_vectors[:, 0],\n",
" y=reduced_vectors[:, 1],\n",
" z=reduced_vectors[:, 2],\n",
" mode='markers',\n",
" marker=dict(size=3, color=colors, opacity=0.7),\n",
" text=[f\"Category: {c}<br>Text: {d[:100]}...\" for c, d in zip(categories, descriptions)],\n",
" hoverinfo='text'\n",
")])\n",
"\n",
"fig.update_layout(\n",
" title='3D Chroma Vector Store Visualization',\n",
" scene=dict(xaxis_title='x', yaxis_title='y', zaxis_title='z'),\n",
" width=1200,\n",
" height=800,\n",
" margin=dict(r=20, b=10, l=10, t=40)\n",
")\n",
"\n",
"fig.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e8fb2a63-24c5-4dce-9e63-aa208272f82d",
"metadata": {},
"outputs": [],
"source": [
"def "
]
}
],
"metadata": {

31093
week8_wip/day2.1.ipynb Normal file

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 55,
"execution_count": 1,
"id": "fbcdfea8-7241-46d7-a771-c0381a3e7063",
"metadata": {},
"outputs": [],
@@ -20,7 +20,6 @@
"import matplotlib.pyplot as plt\n",
"import numpy as np\n",
"import pickle\n",
"from collections import Counter\n",
"from openai import OpenAI\n",
"from sentence_transformers import SentenceTransformer\n",
"from datasets import load_dataset\n",
@@ -60,7 +59,7 @@
},
{
"cell_type": "code",
"execution_count": 59,
"execution_count": 4,
"id": "9a25a5cf-8f6c-4b5d-ad98-fdd096f5adf8",
"metadata": {},
"outputs": [],
@@ -124,7 +123,7 @@
},
{
"cell_type": "code",
"execution_count": 39,
"execution_count": 5,
"id": "d26a1104-cd11-4361-ab25-85fb576e0582",
"metadata": {},
"outputs": [],
@@ -587,10 +586,101 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 6,
"id": "e6d5deb3-6a2a-4484-872c-37176c5e1f07",
"metadata": {},
"outputs": [],
"source": [
"from agents.frontier_agent import FrontierAgent"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "56e8dd5d-ed36-49d8-95f7-dc82e548255b",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/ed/miniconda3/envs/llms/lib/python3.11/site-packages/transformers/tokenization_utils_base.py:1617: FutureWarning: `clean_up_tokenization_spaces` was not set. It will be set to `True` by default. This behavior will be deprecated in transformers v4.45, and will be then set to `False` by default. For more details check this issue: https://github.com/huggingface/transformers/issues/31884\n",
" warnings.warn(\n"
]
}
],
"source": [
"agent = FrontierAgent(collection)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "980dd126-f675-4499-8817-0cc0bb73e247",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"139.99"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"agent.price(\"Quadcast HyperX condenser mic for high quality podcasting\")"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "66c18a06-d0f1-4ec9-8aff-ec3ca294dd09",
"metadata": {},
"outputs": [],
"source": [
"from agents.specialist_agent import SpecialistAgent"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "ba672fb4-2c3e-42ee-9ea0-21bfcfc5260c",
"metadata": {},
"outputs": [],
"source": [
"agent2 = SpecialistAgent()"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "a5a97004-95b4-46ea-b12d-a4ead22fcb2a",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"189.0"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"agent2.price(\"Quadcast HyperX condenser mic for high quality podcasting\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "26d5ddc6-baa6-4760-a430-05671847ac47",
"metadata": {},
"outputs": [],
"source": []
}
],

File diff suppressed because one or more lines are too long

202
week8_wip/day3.ipynb Normal file
View File

@@ -0,0 +1,202 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "d3763a79-8a5a-4300-8de4-93e85475af10",
"metadata": {},
"outputs": [],
"source": [
"# imports\n",
"\n",
"import os\n",
"import json\n",
"from dotenv import load_dotenv\n",
"from openai import OpenAI\n",
"from agents.deals import Deal, QualityDealSelection"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c6469e32-16c3-4443-9475-ade710ef6933",
"metadata": {},
"outputs": [],
"source": [
"# Initialize and constants\n",
"\n",
"load_dotenv()\n",
"os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY', 'your-key-if-not-using-env')\n",
"MODEL = 'gpt-4o-mini'\n",
"openai = OpenAI()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "afece9db-8cd4-46be-ac57-0b472e84da7d",
"metadata": {},
"outputs": [],
"source": [
"deals = Deal.fetch()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "8cd15c4d-eb44-4601-bf0c-f945c1d8e3ec",
"metadata": {},
"outputs": [],
"source": [
"len(deals)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4259f30a-6455-49ed-8863-2f9ddd4776cb",
"metadata": {},
"outputs": [],
"source": [
"deals[44].describe()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "8100e5ac-38f5-40c1-a712-08ae12c85038",
"metadata": {},
"outputs": [],
"source": [
"system_prompt = \"\"\"You identify and summarize the 5 most detailed deals from a list, by selecting deals that have the most detailed, high quality description and the most clear price.\n",
"Respond strictly in JSON with no explanation, using this format. You should provide the price as a number derived from the description. If the price of a deal isn't clear, do not include that deal in your response.\n",
"Most important is that you respond with the 5 deals that have the most detailed product description with price. It's not important to mention the terms of the deal; most important is a thorough description of the product.\n",
"\n",
"{\"quality_deals\": [\n",
" {\n",
" \"product_description\": \"Your clearly expressed summary of the product in 4-5 sentences. Details of the item are much more important than why it's a good deal. Avoid mentioning discounts and coupons; focus on the item itself. There should be a paragpraph of text for each item you choose.\",\n",
" \"price\": 99.99,\n",
" \"url\": \"the url as provided\"\n",
" },\n",
" ...\n",
"]}\"\"\""
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f4bca170-af71-40c9-9597-1d72980c74d8",
"metadata": {},
"outputs": [],
"source": [
"user_prompt = \"\"\"Respond with the most promising 5 deals from this list, selecting those which have the most detailed, high quality product description and a clear price.\n",
"Respond strictly in JSON, and only JSON. You should rephrase the description to be a summary of the product itself, not the terms of the deal.\n",
"Remember to respond with a paragraph of text in the product_description field for each of the 5 items that you select.\n",
"\n",
"Deals:\n",
"\n",
"\"\"\"\n",
"user_prompt += '\\n\\n'.join([deal.describe() for deal in deals])"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "020947a6-561b-417b-98a0-a085e31d2ce3",
"metadata": {},
"outputs": [],
"source": [
"print(user_prompt[:2000])"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7de46f74-868c-4127-8a68-cf2da7d600bb",
"metadata": {},
"outputs": [],
"source": [
"def get_recommendations():\n",
" completion = openai.beta.chat.completions.parse(\n",
" model=\"gpt-4o-mini\",\n",
" messages=[\n",
" {\"role\": \"system\", \"content\": system_prompt},\n",
" {\"role\": \"user\", \"content\": user_prompt}\n",
" ],\n",
" response_format=QualityDealSelection\n",
" )\n",
" result = completion.choices[0].message.parsed\n",
" return result"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4c06270d-8c17-4d5a-9cfe-b6cefe788d5e",
"metadata": {},
"outputs": [],
"source": [
"result = get_recommendations()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e5554a0a-ae40-4684-ad3e-faa3d22e030c",
"metadata": {},
"outputs": [],
"source": [
"result.quality_deals[0]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "8bdc57fb-7497-47af-a643-6ba5a21cc17e",
"metadata": {},
"outputs": [],
"source": [
"from agents.scanner_agent import scan"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "132278bc-217a-43a6-b6c4-724140c6a225",
"metadata": {},
"outputs": [],
"source": [
"scan()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "2e1d013a-c930-4dad-901b-41433379e14b",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.10"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

151
week8_wip/day4.ipynb Normal file
View File

@@ -0,0 +1,151 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "80d683d9-9e92-44ae-af87-a413ca84db21",
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"from twilio.rest import Client\n",
"from dotenv import load_dotenv"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5ba769cc-5301-4810-b01f-cab584cfb3b3",
"metadata": {},
"outputs": [],
"source": [
"load_dotenv()\n",
"os.environ['TWILIO_ACCOUNT_SID'] = os.getenv('TWILIO_ACCOUNT_SID', 'your-sid-if-not-using-env')\n",
"os.environ['TWILIO_AUTH_TOKEN'] = os.getenv('TWILIO_AUTH_TOKEN', 'your-auth-if-not-using-env')\n",
"os.environ['MY_PHONE_NUMBER'] = os.getenv('MY_PHONE_NUMBER', 'your-phone-if-not-using-env')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "761e6460-d201-4f69-ba31-a641a059e47d",
"metadata": {},
"outputs": [],
"source": [
"ME_FROM = 'whatsapp:+14155238886'\n",
"ME_TO = f\"whatsapp:+1{os.environ['MY_PHONE_NUMBER']}\""
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f77f8b08-6c92-47e2-9dd0-3ddaf01beb07",
"metadata": {},
"outputs": [],
"source": [
"account_sid = os.environ['TWILIO_ACCOUNT_SID']\n",
"auth_token = os.environ['TWILIO_AUTH_TOKEN']\n",
"client = Client(account_sid, auth_token)\n",
"\n",
"message = client.messages.create(\n",
" from_=ME_FROM,\n",
" body='hello, me!',\n",
" to=ME_TO\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6794a7de-352f-46d2-8451-ff79c9654b31",
"metadata": {},
"outputs": [],
"source": [
"from agents.messaging_agent import MessagingAgent"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e05cc427-3d2c-4792-ade1-d356f95a82a9",
"metadata": {},
"outputs": [],
"source": [
"agent = MessagingAgent()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5ec518f5-dae4-44b1-a185-d7eaf853ec00",
"metadata": {},
"outputs": [],
"source": [
"agent.message(\"Hi!!\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "57b3a014-0b15-425a-a29b-6fefc5006dee",
"metadata": {},
"outputs": [],
"source": [
"import chromadb\n",
"DB = \"products_vectorstore\"\n",
"client = chromadb.PersistentClient(path=DB)\n",
"collection = client.get_or_create_collection('products')\n",
"from agents.planning_agent import PlanningAgent"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a5c31c39-e357-446e-9cec-b4775c298941",
"metadata": {},
"outputs": [],
"source": [
"planner = PlanningAgent(collection)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d9ac771b-ea12-41c0-a7ce-05f12e27ad9e",
"metadata": {},
"outputs": [],
"source": [
"planner.plan()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "70200a3c-64fb-4c34-bdd8-57aaf009ec60",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.10"
}
},
"nbformat": 4,
"nbformat_minor": 5
}