Fixed Google Colab link in week 3 day 4, and latest week 8 updates
This commit is contained in:
@@ -32,6 +32,7 @@ dependencies:
|
||||
- plotly
|
||||
- twilio
|
||||
- duckdb
|
||||
- feedparser
|
||||
- pip:
|
||||
- transformers
|
||||
- sentence-transformers
|
||||
|
||||
84
week8_wip/agents/deals.py
Normal file
84
week8_wip/agents/deals.py
Normal file
@@ -0,0 +1,84 @@
|
||||
from pydantic import BaseModel
|
||||
from typing import List
|
||||
from bs4 import BeautifulSoup
|
||||
import re
|
||||
import feedparser
|
||||
from tqdm import tqdm
|
||||
import requests
|
||||
import time
|
||||
|
||||
feeds = [
|
||||
"https://www.dealnews.com/c142/Electronics/?rss=1",
|
||||
"https://www.dealnews.com/c39/Computers/?rss=1",
|
||||
"https://www.dealnews.com/c238/Automotive/?rss=1",
|
||||
"https://www.dealnews.com/f1912/Smart-Home/?rss=1",
|
||||
"https://www.dealnews.com/c196/Home-Garden/?rss=1",
|
||||
]
|
||||
|
||||
def extract(html_snippet):
|
||||
soup = BeautifulSoup(html_snippet, 'html.parser')
|
||||
snippet_div = soup.find('div', class_='snippet summary')
|
||||
|
||||
if snippet_div:
|
||||
description = snippet_div.get_text(strip=True)
|
||||
description = BeautifulSoup(description, 'html.parser').get_text()
|
||||
description = re.sub('<[^<]+?>', '', description)
|
||||
result = description.strip()
|
||||
else:
|
||||
result = html_snippet
|
||||
return result.replace('\n', ' ')
|
||||
|
||||
class Deal:
|
||||
category: str
|
||||
title: str
|
||||
summary: str
|
||||
url: str
|
||||
item_id: int
|
||||
details: str
|
||||
features: str
|
||||
|
||||
def __init__(self, entry, id):
|
||||
self.title = entry['title']
|
||||
self.summary = extract(entry['summary'])
|
||||
self.url = entry['links'][0]['href']
|
||||
self.item_id = id
|
||||
stuff = requests.get(self.url).content
|
||||
soup = BeautifulSoup(stuff, 'html.parser')
|
||||
content = soup.find('div', class_='content-section').get_text()
|
||||
content = content.replace('\nmore', '').replace('\n', ' ')
|
||||
if "Features" in content:
|
||||
self.details, self.features = content.split("Features")
|
||||
else:
|
||||
self.details = content
|
||||
self.features = ""
|
||||
|
||||
def __repr__(self):
|
||||
return f"<{self.title}>"
|
||||
|
||||
def describe(self):
|
||||
return f"Title: {self.title}\nDetails: {self.details.strip()}\nFeatures: {self.features.strip()}\nURL: {self.url}"
|
||||
|
||||
@classmethod
|
||||
def fetch(cls):
|
||||
deals = []
|
||||
item_id = 1001
|
||||
for feed_url in tqdm(feeds):
|
||||
feed = feedparser.parse(feed_url)
|
||||
for entry in feed.entries[:10]:
|
||||
deals.append(cls(entry, item_id))
|
||||
item_id += 1
|
||||
time.sleep(1)
|
||||
return deals
|
||||
|
||||
class QualityDeal(BaseModel):
|
||||
product_description: str
|
||||
price: float
|
||||
url: str
|
||||
|
||||
class QualityDealSelection(BaseModel):
|
||||
quality_deals: List[QualityDeal]
|
||||
|
||||
class Opportunity(BaseModel):
|
||||
quality_deal: QualityDeal
|
||||
estimate: float
|
||||
discount: float
|
||||
29
week8_wip/agents/ensemble_agent.py
Normal file
29
week8_wip/agents/ensemble_agent.py
Normal file
@@ -0,0 +1,29 @@
|
||||
import pandas as pd
|
||||
from sklearn.linear_model import LinearRegression
|
||||
import joblib
|
||||
|
||||
from agents.specialist_agent import SpecialistAgent
|
||||
from agents.frontier_agent import FrontierAgent
|
||||
from agents.random_forest_agent import RandomForestAgent
|
||||
|
||||
class EnsembleAgent:
|
||||
|
||||
def __init__(self, collection):
|
||||
self.specialist = SpecialistAgent()
|
||||
self.frontier = FrontierAgent(collection)
|
||||
self.random_forest = RandomForestAgent()
|
||||
self.model = joblib.load('ensemble_model.pkl')
|
||||
|
||||
def price(self, description):
|
||||
specialist = self.specialist.price(description)
|
||||
frontier = self.frontier.price(description)
|
||||
random_forest = self.random_forest.price(description)
|
||||
X = pd.DataFrame({
|
||||
'Specialist': [specialist],
|
||||
'Frontier': [frontier],
|
||||
'RandomForest': [random_forest],
|
||||
'Min': [min(specialist, frontier, random_forest)],
|
||||
'Max': [max(specialist, frontier, random_forest)],
|
||||
})
|
||||
y = self.model.predict(X)
|
||||
return y[0]
|
||||
63
week8_wip/agents/frontier_agent.py
Normal file
63
week8_wip/agents/frontier_agent.py
Normal file
@@ -0,0 +1,63 @@
|
||||
# imports
|
||||
|
||||
import os
|
||||
import re
|
||||
import math
|
||||
import json
|
||||
from typing import List
|
||||
from openai import OpenAI
|
||||
from sentence_transformers import SentenceTransformer
|
||||
from datasets import load_dataset
|
||||
import chromadb
|
||||
from items import Item
|
||||
from testing import Tester
|
||||
|
||||
class FrontierAgent:
|
||||
|
||||
MODEL = "gpt-4o-mini"
|
||||
|
||||
def __init__(self, collection):
|
||||
self.openai = OpenAI()
|
||||
self.collection = collection
|
||||
self.model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
|
||||
|
||||
def make_context(self, similars: List[str], prices: List[float]):
|
||||
message = "To provide some context, here are some other items that might be similar to the item you need to estimate.\n\n"
|
||||
for similar, price in zip(similars, prices):
|
||||
message += f"Potentially related product:\n{similar}\nPrice is ${price:.2f}\n\n"
|
||||
return message
|
||||
|
||||
def messages_for(self, description: str, similars: List[str], prices: List[float]):
|
||||
system_message = "You estimate prices of items. Reply only with the price, no explanation"
|
||||
user_prompt = self.make_context(similars, prices)
|
||||
user_prompt += "And now the question for you:\n\n"
|
||||
user_prompt += "How much does this cost?\n\n" + description
|
||||
return [
|
||||
{"role": "system", "content": system_message},
|
||||
{"role": "user", "content": user_prompt},
|
||||
{"role": "assistant", "content": "Price is $"}
|
||||
]
|
||||
|
||||
def find_similars(self, description: str):
|
||||
vector = self.model.encode([description])
|
||||
results = self.collection.query(query_embeddings=vector.astype(float).tolist(), n_results=5)
|
||||
documents = results['documents'][0][:]
|
||||
prices = [m['price'] for m in results['metadatas'][0][:]]
|
||||
return documents, prices
|
||||
|
||||
def get_price(self, s) -> float:
|
||||
s = s.replace('$','').replace(',','')
|
||||
match = re.search(r"[-+]?\d*\.\d+|\d+", s)
|
||||
return float(match.group()) if match else 0.0
|
||||
|
||||
def price(self, description: str) -> float:
|
||||
documents, prices = self.find_similars(description)
|
||||
response = self.openai.chat.completions.create(
|
||||
model=self.MODEL,
|
||||
messages=self.messages_for(description, documents, prices),
|
||||
seed=42,
|
||||
max_tokens=5
|
||||
)
|
||||
reply = response.choices[0].message.content
|
||||
return self.get_price(reply)
|
||||
|
||||
28
week8_wip/agents/messaging_agent.py
Normal file
28
week8_wip/agents/messaging_agent.py
Normal file
@@ -0,0 +1,28 @@
|
||||
import os
|
||||
from twilio.rest import Client
|
||||
from agents.deals import Opportunity
|
||||
|
||||
class MessagingAgent:
|
||||
|
||||
def __init__(self):
|
||||
account_sid = os.getenv('TWILIO_ACCOUNT_SID', 'your-sid-if-not-using-env')
|
||||
auth_token = os.getenv('TWILIO_AUTH_TOKEN', 'your-auth-if-not-using-env')
|
||||
self.me_from = 'whatsapp:+14155238886'
|
||||
self.me_to = f"whatsapp:+1{os.getenv('MY_PHONE_NUMBER', 'your-phone-number-if-not-using-env')}"
|
||||
self.client = Client(account_sid, auth_token)
|
||||
|
||||
def message(self, text):
|
||||
message = self.client.messages.create(
|
||||
from_=self.me_from,
|
||||
body=text,
|
||||
to=self.me_to
|
||||
)
|
||||
|
||||
def alert(self, opportunity: Opportunity):
|
||||
text = f"Deal! Price=${opportunity.quality_deal.price:.2f}, "
|
||||
text += f"Estimate=${opportunity.estimate:.2f} :"
|
||||
text += opportunity.quality_deal.product_description[:10]+'... '
|
||||
text += opportunity.quality_deal.url
|
||||
self.message(text)
|
||||
|
||||
|
||||
24
week8_wip/agents/planning_agent.py
Normal file
24
week8_wip/agents/planning_agent.py
Normal file
@@ -0,0 +1,24 @@
|
||||
from agents.deals import Deal, QualityDealSelection, Opportunity
|
||||
|
||||
from agents.scanner_agent import ScannerAgent
|
||||
from agents.ensemble_agent import EnsembleAgent
|
||||
from agents.messaging_agent import MessagingAgent
|
||||
|
||||
|
||||
class PlanningAgent:
|
||||
|
||||
def __init__(self, collection):
|
||||
self.scanner = ScannerAgent()
|
||||
self.ensemble = EnsembleAgent(collection)
|
||||
self.messenger = MessagingAgent()
|
||||
|
||||
def plan(self):
|
||||
opportunities = []
|
||||
deal_selection = self.scanner.scan()
|
||||
for deal in deal_selection.quality_deals[:5]:
|
||||
estimate = self.ensemble.price(deal.product_description)
|
||||
opportunities.append(Opportunity(deal, estimate, estimate - deal.price))
|
||||
opportunities.sort(key=lambda opp: opp.discount, reverse=True)
|
||||
print(opportunities)
|
||||
if opportunities[0].discount > 50:
|
||||
self.messenger.alert(opportunities[0])
|
||||
18
week8_wip/agents/random_forest_agent.py
Normal file
18
week8_wip/agents/random_forest_agent.py
Normal file
@@ -0,0 +1,18 @@
|
||||
# imports
|
||||
|
||||
import os
|
||||
import re
|
||||
from typing import List
|
||||
from sentence_transformers import SentenceTransformer
|
||||
import joblib
|
||||
|
||||
|
||||
class RandomForestAgent:
|
||||
|
||||
def __init__(self):
|
||||
self.vectorizer = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
|
||||
self.model = joblib.load('random_forest_model.pkl')
|
||||
|
||||
def price(self, description: str) -> float:
|
||||
vector = self.vectorizer.encode([description])
|
||||
return max(0, self.model.predict(vector)[0])
|
||||
46
week8_wip/agents/scanner_agent.py
Normal file
46
week8_wip/agents/scanner_agent.py
Normal file
@@ -0,0 +1,46 @@
|
||||
import os
|
||||
import json
|
||||
from openai import OpenAI
|
||||
from agents.deals import Deal, QualityDealSelection
|
||||
|
||||
class ScannerAgent:
|
||||
|
||||
MODEL = "gpt-4o-mini"
|
||||
|
||||
SYSTEM_PROMPT = """You identify and summarize the 5 most detailed deals from a list, by selecting deals that have the most detailed, high quality description and the most clear price.
|
||||
Respond strictly in JSON with no explanation, using this format. You should provide the price as a number derived from the description. If the price of a deal isn't clear, do not include that deal in your response.
|
||||
Most important is that you respond with the 5 deals that have the most detailed product description with price. It's not important to mention the terms of the deal; most important is a thorough description of the product.
|
||||
|
||||
{"quality_deals": [
|
||||
{
|
||||
"product_description": "Your clearly expressed summary of the product in 4-5 sentences. Details of the item are much more important than why it's a good deal. Avoid mentioning discounts and coupons; focus on the item itself. There should be a paragpraph of text for each item you choose.",
|
||||
"price": 99.99,
|
||||
"url": "the url as provided"
|
||||
},
|
||||
...
|
||||
]}"""
|
||||
|
||||
USER_PROMPT_PREFIX = """Respond with the most promising 5 deals from this list, selecting those which have the most detailed, high quality product description and a clear price.
|
||||
Respond strictly in JSON, and only JSON. You should rephrase the description to be a summary of the product itself, not the terms of the deal.
|
||||
Remember to respond with a paragraph of text in the product_description field for each of the 5 items that you select.
|
||||
|
||||
Deals:
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.openai = OpenAI()
|
||||
|
||||
def scan(self) -> QualityDealSelection:
|
||||
deals = Deal.fetch()
|
||||
user_prompt = self.USER_PROMPT_PREFIX + '\n\n'.join([deal.describe() for deal in deals])
|
||||
completion = self.openai.beta.chat.completions.parse(
|
||||
model=self.MODEL,
|
||||
messages=[
|
||||
{"role": "system", "content": self.SYSTEM_PROMPT},
|
||||
{"role": "user", "content": user_prompt}
|
||||
],
|
||||
response_format=QualityDealSelection
|
||||
)
|
||||
result = completion.choices[0].message.parsed
|
||||
return result
|
||||
10
week8_wip/agents/specialist_agent.py
Normal file
10
week8_wip/agents/specialist_agent.py
Normal file
@@ -0,0 +1,10 @@
|
||||
import modal
|
||||
|
||||
class SpecialistAgent:
|
||||
|
||||
def __init__(self):
|
||||
Pricer = modal.Cls.lookup("pricer-service", "Pricer")
|
||||
self.pricer = Pricer()
|
||||
|
||||
def price(self, description: str) -> float:
|
||||
return self.pricer.price.remote(description)
|
||||
566
week8_wip/backup_day2.4.ipynb
Normal file
566
week8_wip/backup_day2.4.ipynb
Normal file
@@ -0,0 +1,566 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "fbcdfea8-7241-46d7-a771-c0381a3e7063",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# imports\n",
|
||||
"\n",
|
||||
"import os\n",
|
||||
"import re\n",
|
||||
"import math\n",
|
||||
"import json\n",
|
||||
"from tqdm import tqdm\n",
|
||||
"import random\n",
|
||||
"from dotenv import load_dotenv\n",
|
||||
"from huggingface_hub import login\n",
|
||||
"import numpy as np\n",
|
||||
"import pickle\n",
|
||||
"from openai import OpenAI\n",
|
||||
"from sentence_transformers import SentenceTransformer\n",
|
||||
"from datasets import load_dataset\n",
|
||||
"import chromadb\n",
|
||||
"from items import Item\n",
|
||||
"from testing import Tester\n",
|
||||
"from agents.pricer_agent import price\n",
|
||||
"import pandas as pd\n",
|
||||
"import numpy as np\n",
|
||||
"from sklearn.linear_model import LinearRegression\n",
|
||||
"from sklearn.metrics import mean_squared_error, r2_score"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "e6e88bd1-f89c-4b98-92fa-aa4bc1575bca",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# CONSTANTS\n",
|
||||
"\n",
|
||||
"QUESTION = \"How much does this cost to the nearest dollar?\\n\\n\"\n",
|
||||
"DB = \"products_vectorstore\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "98666e73-938e-469d-8987-e6e55ba5e034",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# environment\n",
|
||||
"\n",
|
||||
"load_dotenv()\n",
|
||||
"os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY', 'your-key-if-not-using-env')\n",
|
||||
"os.environ['HF_TOKEN'] = os.getenv('HF_TOKEN', 'your-key-if-not-using-env')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "9a25a5cf-8f6c-4b5d-ad98-fdd096f5adf8",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"openai = OpenAI()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "dc696493-0b6f-48aa-9fa8-b1ae0ecaf3cd",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Load in the test pickle file:\n",
|
||||
"\n",
|
||||
"with open('test.pkl', 'rb') as file:\n",
|
||||
" test = pickle.load(file)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "33d38a06-0c0d-4e96-94d1-35ee183416ce",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def make_context(similars, prices):\n",
|
||||
" message = \"To provide some context, here are some other items that might be similar to the item you need to estimate.\\n\\n\"\n",
|
||||
" for similar, price in zip(similars, prices):\n",
|
||||
" message += f\"Potentially related product:\\n{similar}\\nPrice is ${price:.2f}\\n\\n\"\n",
|
||||
" return message"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "61f203b7-63b6-48ed-869b-e393b5bfcad3",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def messages_for(item, similars, prices):\n",
|
||||
" system_message = \"You estimate prices of items. Reply only with the price, no explanation\"\n",
|
||||
" user_prompt = make_context(similars, prices)\n",
|
||||
" user_prompt += \"And now the question for you:\\n\\n\"\n",
|
||||
" user_prompt += item.test_prompt().replace(\" to the nearest dollar\",\"\").replace(\"\\n\\nPrice is $\",\"\")\n",
|
||||
" return [\n",
|
||||
" {\"role\": \"system\", \"content\": system_message},\n",
|
||||
" {\"role\": \"user\", \"content\": user_prompt},\n",
|
||||
" {\"role\": \"assistant\", \"content\": \"Price is $\"}\n",
|
||||
" ]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "b26f405d-6e1f-4caa-b97f-1f62cd9d1ebc",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "d26a1104-cd11-4361-ab25-85fb576e0582",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"client = chromadb.PersistentClient(path=DB)\n",
|
||||
"collection = client.get_or_create_collection('products')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "1e339760-96d8-4485-bec7-43fadcd30c4d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def description(item):\n",
|
||||
" text = item.prompt.replace(\"How much does this cost to the nearest dollar?\\n\\n\", \"\")\n",
|
||||
" return text.split(\"\\n\\nPrice is $\")[0]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "9f759bd2-7a7e-4c1a-80a0-e12470feca89",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "e44dbd25-fb95-4b6b-bbbb-8da5fc817105",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def vector(item):\n",
|
||||
" return model.encode([description(item)])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "ffd5ee47-db5d-4263-b0d9-80d568c91341",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def find_similars(item):\n",
|
||||
" results = collection.query(query_embeddings=vector(item).astype(float).tolist(), n_results=5)\n",
|
||||
" documents = results['documents'][0][:]\n",
|
||||
" prices = [m['price'] for m in results['metadatas'][0][:]]\n",
|
||||
" return documents, prices"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "d11f1c8d-7480-4d64-a274-b030d701f1b8",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def get_price(s):\n",
|
||||
" s = s.replace('$','').replace(',','')\n",
|
||||
" match = re.search(r\"[-+]?\\d*\\.\\d+|\\d+\", s)\n",
|
||||
" return float(match.group()) if match else 0"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "a919cf7d-b3d3-4968-8c96-54a0da0b0219",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# The function for gpt-4o-mini\n",
|
||||
"\n",
|
||||
"def gpt_4o_mini_rag(item):\n",
|
||||
" documents, prices = find_similars(item)\n",
|
||||
" response = openai.chat.completions.create(\n",
|
||||
" model=\"gpt-4o-mini\", \n",
|
||||
" messages=messages_for(item, documents, prices),\n",
|
||||
" seed=42,\n",
|
||||
" max_tokens=5\n",
|
||||
" )\n",
|
||||
" reply = response.choices[0].message.content\n",
|
||||
" return get_price(reply)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "8b918cfc-76c1-442a-8caa-bec500cd504b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"gpt_4o_mini_rag(test[1000])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "c92cfc0b-b36d-456f-94cc-fe3f315cc25e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"test[1000]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "e6d5deb3-6a2a-4484-872c-37176c5e1f07",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def proprietary(item):\n",
|
||||
" text = item.prompt.split(\"to the nearest dollar?\\n\\n\")[1].split(\"\\n\\nPrice is $\")[0]\n",
|
||||
" return price(text)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "bacdf607-37b9-4997-adb1-d63abfb645b1",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(proprietary(test[1]))\n",
|
||||
"print(gpt_4o_mini_rag(test[1]))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "b35532e7-098a-4ab9-a8f7-8f101b437181",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"truths = []\n",
|
||||
"proprietaries = []\n",
|
||||
"rags = []\n",
|
||||
"for i in tqdm(range(1000,1250)):\n",
|
||||
" item = test[i]\n",
|
||||
" truths.append(item.price)\n",
|
||||
" proprietaries.append(proprietary(item))\n",
|
||||
" rags.append(gpt_4o_mini_rag(item))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "e6ae54c7-6e8e-4333-b075-b59978fed560",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"mins = [min(p,r) for p,r in zip(proprietaries, rags)]\n",
|
||||
"maxes = [max(p,r) for p,r in zip(proprietaries, rags)]\n",
|
||||
"\n",
|
||||
"X = pd.DataFrame({\n",
|
||||
" 'Proprietary': proprietaries,\n",
|
||||
" 'RAG': rags,\n",
|
||||
" 'Min': mins,\n",
|
||||
" 'Max': maxes,\n",
|
||||
"})\n",
|
||||
"\n",
|
||||
"# Convert y to a Series\n",
|
||||
"y = pd.Series(truths)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "e68684ed-d029-4d95-bb13-eead19b20e49",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Train a Linear Regression\n",
|
||||
"np.random.seed(42)\n",
|
||||
"\n",
|
||||
"lr = LinearRegression()\n",
|
||||
"lr.fit(X, y)\n",
|
||||
"\n",
|
||||
"feature_columns = [\"Proprietary\", \"RAG\", \"Min\", \"Max\"]\n",
|
||||
"\n",
|
||||
"for feature, coef in zip(feature_columns, lr.coef_):\n",
|
||||
" print(f\"{feature}: {coef:.2f}\")\n",
|
||||
"print(f\"Intercept={lr.intercept_:.2f}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "28530362-97b8-42a0-bf89-967539b6f170",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def ensemble(item):\n",
|
||||
" prop = proprietary(item)\n",
|
||||
" rag = gpt_4o_mini_rag(item)\n",
|
||||
" Xt = pd.DataFrame({\n",
|
||||
" 'Proprietary': [prop],\n",
|
||||
" 'RAG': [rag],\n",
|
||||
" 'Min': [min(prop,rag)],\n",
|
||||
" 'Max': [max(prop,rag)],\n",
|
||||
" })\n",
|
||||
" yt = lr.predict(Xt)\n",
|
||||
" return yt[0]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "08021c05-340b-4ee2-9d11-4b280766976f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"ensemble(test[0])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "d8308c74-546f-4fc0-ada4-1974addacfd1",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"test[0].price"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "80792910-c59f-4d96-aa53-683464a8e60c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"Tester.test(ensemble, test)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "d0c41043-2049-4883-947f-2aad2f6954c2",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from sklearn.ensemble import RandomForestRegressor\n",
|
||||
"\n",
|
||||
"result = collection.get(include=['embeddings', 'documents', 'metadatas'])\n",
|
||||
"vectors = np.array(result['embeddings'])\n",
|
||||
"documents = result['documents']\n",
|
||||
"prices = [metadata['price'] for metadata in result['metadatas']]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "e9c3276f-ae01-478d-bb27-dc73b567b41a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"rf_model = RandomForestRegressor(n_estimators=100, random_state=42, n_jobs=8)\n",
|
||||
"rf_model.fit(vectors, prices)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "3e8f70cd-4147-40c6-9861-a3513b7e5499",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def new_rf(item):\n",
|
||||
" text = item.prompt.split(\"to the nearest dollar?\\n\\n\")[1].split(\"\\n\\nPrice is $\")[0]\n",
|
||||
" vector = model.encode([text])\n",
|
||||
" return max(0, rf_model.predict(vector)[0])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "a2e3340f-7ed4-47eb-a5a9-dff4c0353f58",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"new_rf(test[0])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "f91c903b-8db1-4374-807e-3a8ce282ef30",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"Tester.test(new_rf, test)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "3c8e23c5-1ed3-4bd1-a3c0-129d4712c93a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"forests = []\n",
|
||||
"for i in tqdm(range(1000,1250)):\n",
|
||||
" item = test[i]\n",
|
||||
" forests.append(new_rf(item))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "8e2eca63-8230-4904-9a79-7e779747479e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"truths2 = []\n",
|
||||
"proprietaries2 = []\n",
|
||||
"rags2 = []\n",
|
||||
"forests2 = []\n",
|
||||
"for i in tqdm(range(1000,2000)):\n",
|
||||
" item = test[i]\n",
|
||||
" truths2.append(item.price)\n",
|
||||
" proprietaries2.append(proprietary(item))\n",
|
||||
" rags2.append(gpt_4o_mini_rag(item))\n",
|
||||
" forests2.append(new_rf(item))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "0a3e057f-05c5-4f8f-8b3b-0afdfccc1412",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"mins2 = [min(p,r,f) for p,r,f in zip(proprietaries2, rags2, forests2)]\n",
|
||||
"maxes2 = [max(p,r,f) for p,r,f in zip(proprietaries2, rags2, forests2)]\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"X2 = pd.DataFrame({\n",
|
||||
" 'Proprietary': proprietaries2,\n",
|
||||
" 'RAG': rags2,\n",
|
||||
" 'Forest': forests2,\n",
|
||||
" 'Min': mins2,\n",
|
||||
" 'Max': maxes2,\n",
|
||||
"})\n",
|
||||
"\n",
|
||||
"# Convert y to a Series\n",
|
||||
"y2 = pd.Series(truths2)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "1ae62175-b955-428e-b077-705c49ee71bd",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Train a Linear Regression\n",
|
||||
"np.random.seed(42)\n",
|
||||
"\n",
|
||||
"lr2 = LinearRegression()\n",
|
||||
"lr2.fit(X2, y2)\n",
|
||||
"\n",
|
||||
"feature_columns = X2.columns.tolist()\n",
|
||||
"\n",
|
||||
"for feature, coef in zip(feature_columns, lr2.coef_):\n",
|
||||
" print(f\"{feature}: {coef:.2f}\")\n",
|
||||
"print(f\"Intercept={lr.intercept_:.2f}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "214a3831-c464-4218-a349-534b6bda7f12",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def ensemble2(item):\n",
|
||||
" prop = proprietary(item)\n",
|
||||
" rag = gpt_4o_mini_rag(item)\n",
|
||||
" r_f = new_rf(item)\n",
|
||||
" Xt2 = pd.DataFrame({\n",
|
||||
" 'Proprietary': [prop],\n",
|
||||
" 'RAG': [rag],\n",
|
||||
" 'Forest': [r_f],\n",
|
||||
" 'Min': [min(prop,rag, r_f)],\n",
|
||||
" 'Max': [max(prop,rag, r_f)],\n",
|
||||
" })\n",
|
||||
" yt2 = lr.predict(Xt2)\n",
|
||||
" return yt2[0]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "b234cb68-af68-4475-ae18-8892aac6b74e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"Tester.test(ensemble2, test)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "10a7275f-1aa9-4446-9100-a7a0ba0215f2",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.10"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -1,9 +1,21 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e426cd04-c053-43e8-b505-63cee7956a53",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"May need to update environment if cloned git after Sep 26\n",
|
||||
"```\n",
|
||||
"git pull\n",
|
||||
"conda env update --f environment.yml --prune\n",
|
||||
"```"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "56297249-4a8c-4e67-b8c3-a0d8652c104e",
|
||||
"execution_count": null,
|
||||
"id": "bc0e1c1c-be6a-4395-bbbd-eeafc9330d7e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -263,10 +275,41 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 5,
|
||||
"id": "ba9aedca-6a7b-4d30-9f64-59d76f76fb6d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from agents.specialist_agent import price"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "fe5843e5-e958-4a65-8326-8f5b4686de7f",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"133.0"
|
||||
]
|
||||
},
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"price(\"Quadcast HyperX condenser mic, connects via usb-c to your computer for crystal clear audio\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "f5a3181b-1310-4102-8d7d-52caf4c00538",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
|
||||
@@ -210,98 +210,6 @@
|
||||
"CATEGORIES = ['Appliances', 'Automotive', 'Cell_Phones_and_Accessories', 'Electronics','Musical_Instruments', 'Office_Products', 'Tools_and_Home_Improvement', 'Toys_and_Games']\n",
|
||||
"COLORS = ['red', 'blue', 'brown', 'orange', 'yellow', 'green' , 'purple', 'cyan']"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "a4cf1c9a-1ced-48d4-974c-3c850905034e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Prework\n",
|
||||
"\n",
|
||||
"vectors_np = np.array(vectors)\n",
|
||||
"colors = [COLORS[CATEGORIES.index(t)] for t in categories]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "0c6718b3-e0fd-4319-a1b5-d9d34d6b1dd9",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# We humans find it easier to visalize things in 2D!\n",
|
||||
"# Reduce the dimensionality of the vectors to 2D using t-SNE\n",
|
||||
"# (t-distributed stochastic neighbor embedding)\n",
|
||||
"\n",
|
||||
"tsne = TSNE(n_components=2, random_state=42)\n",
|
||||
"reduced_vectors = tsne.fit_transform(vectors_np)\n",
|
||||
"\n",
|
||||
"# Create the 2D scatter plot\n",
|
||||
"fig = go.Figure(data=[go.Scatter(\n",
|
||||
" x=reduced_vectors[:, 0],\n",
|
||||
" y=reduced_vectors[:, 1],\n",
|
||||
" mode='markers',\n",
|
||||
" marker=dict(size=3, color=colors, opacity=0.8),\n",
|
||||
" text=[f\"Category: {c}<br>Text: {d[:100]}...\" for c, d in zip(categories, descriptions)],\n",
|
||||
" hoverinfo='text'\n",
|
||||
")])\n",
|
||||
"\n",
|
||||
"fig.update_layout(\n",
|
||||
" title='2D Chroma Vector Store Visualization',\n",
|
||||
" scene=dict(xaxis_title='x',yaxis_title='y'),\n",
|
||||
" width=1200,\n",
|
||||
" height=800,\n",
|
||||
" margin=dict(r=20, b=10, l=10, t=40)\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"fig.show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "c54df150-c8d8-4bc3-8877-6759691eeb42",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Let's try 3D!\n",
|
||||
"\n",
|
||||
"tsne = TSNE(n_components=3, random_state=42)\n",
|
||||
"reduced_vectors = tsne.fit_transform(vectors_np)\n",
|
||||
"\n",
|
||||
"# Create the 3D scatter plot\n",
|
||||
"fig = go.Figure(data=[go.Scatter3d(\n",
|
||||
" x=reduced_vectors[:, 0],\n",
|
||||
" y=reduced_vectors[:, 1],\n",
|
||||
" z=reduced_vectors[:, 2],\n",
|
||||
" mode='markers',\n",
|
||||
" marker=dict(size=3, color=colors, opacity=0.7),\n",
|
||||
" text=[f\"Category: {c}<br>Text: {d[:100]}...\" for c, d in zip(categories, descriptions)],\n",
|
||||
" hoverinfo='text'\n",
|
||||
")])\n",
|
||||
"\n",
|
||||
"fig.update_layout(\n",
|
||||
" title='3D Chroma Vector Store Visualization',\n",
|
||||
" scene=dict(xaxis_title='x', yaxis_title='y', zaxis_title='z'),\n",
|
||||
" width=1200,\n",
|
||||
" height=800,\n",
|
||||
" margin=dict(r=20, b=10, l=10, t=40)\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"fig.show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "e8fb2a63-24c5-4dce-9e63-aa208272f82d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def "
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
|
||||
31093
week8_wip/day2.1.ipynb
Normal file
31093
week8_wip/day2.1.ipynb
Normal file
File diff suppressed because one or more lines are too long
40984
week8_wip/day2.2.ipynb
40984
week8_wip/day2.2.ipynb
File diff suppressed because one or more lines are too long
@@ -2,7 +2,7 @@
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 55,
|
||||
"execution_count": 1,
|
||||
"id": "fbcdfea8-7241-46d7-a771-c0381a3e7063",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -20,7 +20,6 @@
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"import numpy as np\n",
|
||||
"import pickle\n",
|
||||
"from collections import Counter\n",
|
||||
"from openai import OpenAI\n",
|
||||
"from sentence_transformers import SentenceTransformer\n",
|
||||
"from datasets import load_dataset\n",
|
||||
@@ -60,7 +59,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 59,
|
||||
"execution_count": 4,
|
||||
"id": "9a25a5cf-8f6c-4b5d-ad98-fdd096f5adf8",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -124,7 +123,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 39,
|
||||
"execution_count": 5,
|
||||
"id": "d26a1104-cd11-4361-ab25-85fb576e0582",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -587,10 +586,101 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 6,
|
||||
"id": "e6d5deb3-6a2a-4484-872c-37176c5e1f07",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from agents.frontier_agent import FrontierAgent"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "56e8dd5d-ed36-49d8-95f7-dc82e548255b",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/Users/ed/miniconda3/envs/llms/lib/python3.11/site-packages/transformers/tokenization_utils_base.py:1617: FutureWarning: `clean_up_tokenization_spaces` was not set. It will be set to `True` by default. This behavior will be deprecated in transformers v4.45, and will be then set to `False` by default. For more details check this issue: https://github.com/huggingface/transformers/issues/31884\n",
|
||||
" warnings.warn(\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent = FrontierAgent(collection)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "980dd126-f675-4499-8817-0cc0bb73e247",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"139.99"
|
||||
]
|
||||
},
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent.price(\"Quadcast HyperX condenser mic for high quality podcasting\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "66c18a06-d0f1-4ec9-8aff-ec3ca294dd09",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from agents.specialist_agent import SpecialistAgent"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "ba672fb4-2c3e-42ee-9ea0-21bfcfc5260c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"agent2 = SpecialistAgent()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"id": "a5a97004-95b4-46ea-b12d-a4ead22fcb2a",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"189.0"
|
||||
]
|
||||
},
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent2.price(\"Quadcast HyperX condenser mic for high quality podcasting\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "26d5ddc6-baa6-4760-a430-05671847ac47",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
|
||||
File diff suppressed because one or more lines are too long
202
week8_wip/day3.ipynb
Normal file
202
week8_wip/day3.ipynb
Normal file
@@ -0,0 +1,202 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "d3763a79-8a5a-4300-8de4-93e85475af10",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# imports\n",
|
||||
"\n",
|
||||
"import os\n",
|
||||
"import json\n",
|
||||
"from dotenv import load_dotenv\n",
|
||||
"from openai import OpenAI\n",
|
||||
"from agents.deals import Deal, QualityDealSelection"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "c6469e32-16c3-4443-9475-ade710ef6933",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Initialize and constants\n",
|
||||
"\n",
|
||||
"load_dotenv()\n",
|
||||
"os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY', 'your-key-if-not-using-env')\n",
|
||||
"MODEL = 'gpt-4o-mini'\n",
|
||||
"openai = OpenAI()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "afece9db-8cd4-46be-ac57-0b472e84da7d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"deals = Deal.fetch()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "8cd15c4d-eb44-4601-bf0c-f945c1d8e3ec",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"len(deals)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "4259f30a-6455-49ed-8863-2f9ddd4776cb",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"deals[44].describe()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "8100e5ac-38f5-40c1-a712-08ae12c85038",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"system_prompt = \"\"\"You identify and summarize the 5 most detailed deals from a list, by selecting deals that have the most detailed, high quality description and the most clear price.\n",
|
||||
"Respond strictly in JSON with no explanation, using this format. You should provide the price as a number derived from the description. If the price of a deal isn't clear, do not include that deal in your response.\n",
|
||||
"Most important is that you respond with the 5 deals that have the most detailed product description with price. It's not important to mention the terms of the deal; most important is a thorough description of the product.\n",
|
||||
"\n",
|
||||
"{\"quality_deals\": [\n",
|
||||
" {\n",
|
||||
" \"product_description\": \"Your clearly expressed summary of the product in 4-5 sentences. Details of the item are much more important than why it's a good deal. Avoid mentioning discounts and coupons; focus on the item itself. There should be a paragpraph of text for each item you choose.\",\n",
|
||||
" \"price\": 99.99,\n",
|
||||
" \"url\": \"the url as provided\"\n",
|
||||
" },\n",
|
||||
" ...\n",
|
||||
"]}\"\"\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "f4bca170-af71-40c9-9597-1d72980c74d8",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"user_prompt = \"\"\"Respond with the most promising 5 deals from this list, selecting those which have the most detailed, high quality product description and a clear price.\n",
|
||||
"Respond strictly in JSON, and only JSON. You should rephrase the description to be a summary of the product itself, not the terms of the deal.\n",
|
||||
"Remember to respond with a paragraph of text in the product_description field for each of the 5 items that you select.\n",
|
||||
"\n",
|
||||
"Deals:\n",
|
||||
"\n",
|
||||
"\"\"\"\n",
|
||||
"user_prompt += '\\n\\n'.join([deal.describe() for deal in deals])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "020947a6-561b-417b-98a0-a085e31d2ce3",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(user_prompt[:2000])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "7de46f74-868c-4127-8a68-cf2da7d600bb",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def get_recommendations():\n",
|
||||
" completion = openai.beta.chat.completions.parse(\n",
|
||||
" model=\"gpt-4o-mini\",\n",
|
||||
" messages=[\n",
|
||||
" {\"role\": \"system\", \"content\": system_prompt},\n",
|
||||
" {\"role\": \"user\", \"content\": user_prompt}\n",
|
||||
" ],\n",
|
||||
" response_format=QualityDealSelection\n",
|
||||
" )\n",
|
||||
" result = completion.choices[0].message.parsed\n",
|
||||
" return result"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "4c06270d-8c17-4d5a-9cfe-b6cefe788d5e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"result = get_recommendations()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "e5554a0a-ae40-4684-ad3e-faa3d22e030c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"result.quality_deals[0]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "8bdc57fb-7497-47af-a643-6ba5a21cc17e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from agents.scanner_agent import scan"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "132278bc-217a-43a6-b6c4-724140c6a225",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"scan()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "2e1d013a-c930-4dad-901b-41433379e14b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.10"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
151
week8_wip/day4.ipynb
Normal file
151
week8_wip/day4.ipynb
Normal file
@@ -0,0 +1,151 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "80d683d9-9e92-44ae-af87-a413ca84db21",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"from twilio.rest import Client\n",
|
||||
"from dotenv import load_dotenv"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "5ba769cc-5301-4810-b01f-cab584cfb3b3",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"load_dotenv()\n",
|
||||
"os.environ['TWILIO_ACCOUNT_SID'] = os.getenv('TWILIO_ACCOUNT_SID', 'your-sid-if-not-using-env')\n",
|
||||
"os.environ['TWILIO_AUTH_TOKEN'] = os.getenv('TWILIO_AUTH_TOKEN', 'your-auth-if-not-using-env')\n",
|
||||
"os.environ['MY_PHONE_NUMBER'] = os.getenv('MY_PHONE_NUMBER', 'your-phone-if-not-using-env')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "761e6460-d201-4f69-ba31-a641a059e47d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"ME_FROM = 'whatsapp:+14155238886'\n",
|
||||
"ME_TO = f\"whatsapp:+1{os.environ['MY_PHONE_NUMBER']}\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "f77f8b08-6c92-47e2-9dd0-3ddaf01beb07",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"account_sid = os.environ['TWILIO_ACCOUNT_SID']\n",
|
||||
"auth_token = os.environ['TWILIO_AUTH_TOKEN']\n",
|
||||
"client = Client(account_sid, auth_token)\n",
|
||||
"\n",
|
||||
"message = client.messages.create(\n",
|
||||
" from_=ME_FROM,\n",
|
||||
" body='hello, me!',\n",
|
||||
" to=ME_TO\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "6794a7de-352f-46d2-8451-ff79c9654b31",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from agents.messaging_agent import MessagingAgent"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "e05cc427-3d2c-4792-ade1-d356f95a82a9",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"agent = MessagingAgent()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "5ec518f5-dae4-44b1-a185-d7eaf853ec00",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"agent.message(\"Hi!!\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "57b3a014-0b15-425a-a29b-6fefc5006dee",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import chromadb\n",
|
||||
"DB = \"products_vectorstore\"\n",
|
||||
"client = chromadb.PersistentClient(path=DB)\n",
|
||||
"collection = client.get_or_create_collection('products')\n",
|
||||
"from agents.planning_agent import PlanningAgent"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "a5c31c39-e357-446e-9cec-b4775c298941",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"planner = PlanningAgent(collection)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "d9ac771b-ea12-41c0-a7ce-05f12e27ad9e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"planner.plan()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "70200a3c-64fb-4c34-bdd8-57aaf009ec60",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.10"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
Reference in New Issue
Block a user