Added my contributions to community-contributions
This commit is contained in:
@@ -0,0 +1,181 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "a71ed017-e1b0-4299-88b3-f0eb05adc4df",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Build UI\n",
|
||||
"\n",
|
||||
"We will use more advanced aspects of Gradio - building piece by piece."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "614c6202-4575-448d-98ee-78b735775d2b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import gradio as gr\n",
|
||||
"from deal_agent_framework import DealAgentFramework\n",
|
||||
"from agents.deals import Opportunity, Deal"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "0534e714-5a9c-45c6-998c-3472ac0bb8b5",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"with gr.Blocks(title=\"Deal Intel\", fill_width=True) as ui:\n",
|
||||
"\n",
|
||||
" with gr.Row():\n",
|
||||
" gr.Markdown('<div style=\"text-align: center;font-size:24px\">Deal Intel - Deal Hunting Agentic AI</div>')\n",
|
||||
" with gr.Row():\n",
|
||||
" gr.Markdown('<div style=\"text-align: center;font-size:14px\">Autonomous agent framework that finds online deals, collaborating with a proprietary fine-tuned LLM deployed on Modal, and a RAG pipeline with a frontier model and Chroma.</div>')\n",
|
||||
" \n",
|
||||
"\n",
|
||||
"ui.launch(inbrowser=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "18c12c10-750c-4da3-8df5-f2bc3393f9e0",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Updated to change from height to max_height due to change in Gradio v5\n",
|
||||
"# With much thanks to student Ed B. for raising this\n",
|
||||
"\n",
|
||||
"with gr.Blocks(title=\"Deal Intel\", fill_width=True) as ui:\n",
|
||||
"\n",
|
||||
" initial_deal = Deal(product_description=\"Example description\", price=100.0, url=\"https://cnn.com\")\n",
|
||||
" initial_opportunity = Opportunity(deal=initial_deal, estimate=200.0, discount=100.0)\n",
|
||||
" opportunities = gr.State([initial_opportunity])\n",
|
||||
"\n",
|
||||
" def get_table(opps):\n",
|
||||
" return [[opp.deal.product_description, opp.deal.price, opp.estimate, opp.discount, opp.deal.url] for opp in opps]\n",
|
||||
"\n",
|
||||
" with gr.Row():\n",
|
||||
" gr.Markdown('<div style=\"text-align: center;font-size:24px\">\"Deal Intel\" - Deal Hunting Agentic AI</div>')\n",
|
||||
" with gr.Row():\n",
|
||||
" gr.Markdown('<div style=\"text-align: center;font-size:14px\">Deals surfaced so far:</div>')\n",
|
||||
" with gr.Row():\n",
|
||||
" opportunities_dataframe = gr.Dataframe(\n",
|
||||
" headers=[\"Description\", \"Price\", \"Estimate\", \"Discount\", \"URL\"],\n",
|
||||
" wrap=True,\n",
|
||||
" column_widths=[4, 1, 1, 1, 2],\n",
|
||||
" row_count=10,\n",
|
||||
" col_count=5,\n",
|
||||
" max_height=400,\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" ui.load(get_table, inputs=[opportunities], outputs=[opportunities_dataframe])\n",
|
||||
"\n",
|
||||
"ui.launch(inbrowser=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "87106328-a17a-447e-90b9-c547613468da",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"agent_framework = DealAgentFramework()\n",
|
||||
"agent_framework.init_agents_as_needed()\n",
|
||||
"\n",
|
||||
"with gr.Blocks(title=\"Deal Intel\", fill_width=True) as ui:\n",
|
||||
"\n",
|
||||
" initial_deal = Deal(product_description=\"Example description\", price=100.0, url=\"https://cnn.com\")\n",
|
||||
" initial_opportunity = Opportunity(deal=initial_deal, estimate=200.0, discount=100.0)\n",
|
||||
" opportunities = gr.State([initial_opportunity])\n",
|
||||
"\n",
|
||||
" def get_table(opps):\n",
|
||||
" return [[opp.deal.product_description, opp.deal.price, opp.estimate, opp.discount, opp.deal.url] for opp in opps]\n",
|
||||
"\n",
|
||||
" def do_select(opportunities, selected_index: gr.SelectData):\n",
|
||||
" row = selected_index.index[0]\n",
|
||||
" opportunity = opportunities[row]\n",
|
||||
" agent_framework.planner.messenger.alert(opportunity)\n",
|
||||
"\n",
|
||||
" with gr.Row():\n",
|
||||
" gr.Markdown('<div style=\"text-align: center;font-size:24px\">\"Deal Intel\" - Deal Hunting Agentic AI</div>')\n",
|
||||
" with gr.Row():\n",
|
||||
" gr.Markdown('<div style=\"text-align: center;font-size:14px\">Deals surfaced so far:</div>')\n",
|
||||
" with gr.Row():\n",
|
||||
" opportunities_dataframe = gr.Dataframe(\n",
|
||||
" headers=[\"Description\", \"Price\", \"Estimate\", \"Discount\", \"URL\"],\n",
|
||||
" wrap=True,\n",
|
||||
" column_widths=[4, 1, 1, 1, 2],\n",
|
||||
" row_count=10,\n",
|
||||
" col_count=5,\n",
|
||||
" max_height=400,\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" ui.load(get_table, inputs=[opportunities], outputs=[opportunities_dataframe])\n",
|
||||
" opportunities_dataframe.select(do_select, inputs=[opportunities], outputs=[])\n",
|
||||
"\n",
|
||||
"ui.launch(inbrowser=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "48506465-1c7a-433f-a665-b277a8b4665c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!python price_is_right_final.py"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "f9dd0a27-7d46-4c9e-bbe4-a61c9c899c99",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "d1504cb8-7bf7-4dc4-9b1a-eaba79404aac",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "3ed84afd-4a04-43d6-8a3b-5143deaf96b2",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.11"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -0,0 +1,119 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "23f53670-1a73-46ba-a754-4a497e8e0e64",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Messaging Agent and Planning Agent\n",
|
||||
"\n",
|
||||
"Then we'll put it all together into an Agent Framework."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "80d683d9-9e92-44ae-af87-a413ca84db21",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from dotenv import load_dotenv\n",
|
||||
"from agents.messaging_agent import MessagingAgent"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "5ba769cc-5301-4810-b01f-cab584cfb3b3",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"load_dotenv(override=True)\n",
|
||||
"DB = \"products_vectorstore\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "e05cc427-3d2c-4792-ade1-d356f95a82a9",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"agent = MessagingAgent()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "5ec518f5-dae4-44b1-a185-d7eaf853ec00",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"agent.push(\"MASSIVE NEWS!!!\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "57b3a014-0b15-425a-a29b-6fefc5006dee",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import chromadb\n",
|
||||
"DB = \"products_vectorstore\"\n",
|
||||
"client = chromadb.PersistentClient(path=DB)\n",
|
||||
"collection = client.get_or_create_collection('products')\n",
|
||||
"from agents.planning_agent import PlanningAgent"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "a5c31c39-e357-446e-9cec-b4775c298941",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"planner = PlanningAgent(collection)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "d9ac771b-ea12-41c0-a7ce-05f12e27ad9e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"planner.plan()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "d91ac0bb-738e-4be5-9074-d583190b1e2a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.11"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -0,0 +1,342 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "fbcdfea8-7241-46d7-a771-c0381a3e7063",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# imports\n",
|
||||
"\n",
|
||||
"import os\n",
|
||||
"import re\n",
|
||||
"import math\n",
|
||||
"import json\n",
|
||||
"from tqdm import tqdm\n",
|
||||
"import random\n",
|
||||
"from dotenv import load_dotenv\n",
|
||||
"from huggingface_hub import login\n",
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"import numpy as np\n",
|
||||
"import pickle\n",
|
||||
"from openai import OpenAI\n",
|
||||
"from sentence_transformers import SentenceTransformer\n",
|
||||
"from datasets import load_dataset\n",
|
||||
"import chromadb\n",
|
||||
"from items import Item\n",
|
||||
"from testing import Tester"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "98666e73-938e-469d-8987-e6e55ba5e034",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# environment\n",
|
||||
"load_dotenv(override=True)\n",
|
||||
"os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY', 'your-key-if-not-using-env')\n",
|
||||
"os.environ['HF_TOKEN'] = os.getenv('HF_TOKEN', 'your-key-if-not-using-env')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "9a25a5cf-8f6c-4b5d-ad98-fdd096f5adf8",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"openai = OpenAI()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "dc696493-0b6f-48aa-9fa8-b1ae0ecaf3cd",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Load in the test pickle file\n",
|
||||
"with open('test.pkl', 'rb') as file:\n",
|
||||
" test = pickle.load(file)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "33d38a06-0c0d-4e96-94d1-35ee183416ce",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def make_context(similars, prices):\n",
|
||||
" message = \"To provide some context, here are some other items that might be similar to the item you need to estimate.\\n\\n\"\n",
|
||||
" for similar, price in zip(similars, prices):\n",
|
||||
" message += f\"Potentially related product:\\n{similar}\\nPrice is ${price:.2f}\\n\\n\"\n",
|
||||
" return message"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "61f203b7-63b6-48ed-869b-e393b5bfcad3",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def messages_for(item, similars, prices):\n",
|
||||
" system_message = \"You estimate prices of items. Reply only with the price, no explanation. Price is always below $1000.\"\n",
|
||||
" user_prompt = make_context(similars, prices)\n",
|
||||
" user_prompt += \"And now the question for you:\\n\\n\"\n",
|
||||
" user_prompt += item.test_prompt().replace(\" to the nearest dollar\",\"\").replace(\"\\n\\nPrice is $\",\"\")\n",
|
||||
" return [\n",
|
||||
" {\"role\": \"system\", \"content\": system_message},\n",
|
||||
" {\"role\": \"user\", \"content\": user_prompt},\n",
|
||||
" {\"role\": \"assistant\", \"content\": \"Price is $\"}\n",
|
||||
" ]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "b26f405d-6e1f-4caa-b97f-1f62cd9d1ebc",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"DB = \"products_vectorstore\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "d26a1104-cd11-4361-ab25-85fb576e0582",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"client = chromadb.PersistentClient(path=DB)\n",
|
||||
"collection = client.get_or_create_collection('products')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "1e339760-96d8-4485-bec7-43fadcd30c4d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def description(item):\n",
|
||||
" text = item.prompt.replace(\"How much does this cost to the nearest dollar?\\n\\n\", \"\")\n",
|
||||
" return text.split(\"\\n\\nPrice is $\")[0]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "9f759bd2-7a7e-4c1a-80a0-e12470feca89",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "e44dbd25-fb95-4b6b-bbbb-8da5fc817105",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def vector(item):\n",
|
||||
" return model.encode([description(item)])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "ffd5ee47-db5d-4263-b0d9-80d568c91341",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def find_similars(item):\n",
|
||||
" results = collection.query(query_embeddings=vector(item).astype(float).tolist(), n_results=5)\n",
|
||||
" documents = results['documents'][0][:]\n",
|
||||
" prices = [m['price'] for m in results['metadatas'][0][:]]\n",
|
||||
" return documents, prices"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "6f7b9ff9-fd90-4627-bb17-7c2f7bbd21f3",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(test[1].prompt)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "ff1b2659-cc6b-47aa-a797-dd1cd3d1d6c3",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"documents, prices = find_similars(test[1])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "24756d4d-edac-41ce-bb80-c3b6f1cea7ee",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(make_context(documents, prices))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "0b81eca2-0b58-4fe8-9dd6-47f13ba5f8ee",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(messages_for(test[1], documents, prices))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "d11f1c8d-7480-4d64-a274-b030d701f1b8",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def get_price(s):\n",
|
||||
" s = s.replace('$','').replace(',','')\n",
|
||||
" match = re.search(r\"[-+]?\\d*\\.\\d+|\\d+\", s)\n",
|
||||
" return float(match.group()) if match else 0"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "06743833-c362-47f8-b02a-139be2cd52ab",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"get_price(\"The price for this is $99.99\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "a919cf7d-b3d3-4968-8c96-54a0da0b0219",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# The function for gpt-4o-mini\n",
|
||||
"\n",
|
||||
"def gpt_4o_mini_rag(item):\n",
|
||||
" documents, prices = find_similars(item)\n",
|
||||
" response = openai.chat.completions.create(\n",
|
||||
" model=\"gpt-4o-mini\", \n",
|
||||
" messages=messages_for(item, documents, prices),\n",
|
||||
" seed=42,\n",
|
||||
" max_tokens=5\n",
|
||||
" )\n",
|
||||
" reply = response.choices[0].message.content\n",
|
||||
" return get_price(reply)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "5b42e1b9-eaa0-4b45-a847-e8932367f596",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# The function for gpt-4.1\n",
|
||||
"\n",
|
||||
"# def gpt_4_1_rag(item):\n",
|
||||
"# documents, prices = find_similars(item)\n",
|
||||
"# response = openai.chat.completions.create(\n",
|
||||
"# model=\"gpt-4.1\", \n",
|
||||
"# messages=messages_for(item, documents, prices),\n",
|
||||
"# seed=42,\n",
|
||||
"# max_tokens=5\n",
|
||||
"# )\n",
|
||||
"# reply = response.choices[0].message.content\n",
|
||||
"# return get_price(reply)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "3e519e26-ff15-4425-90bb-bfbf55deb39b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"gpt_4o_mini_rag(test[1])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "082c6a5a-0f2a-4941-a465-ffb3137a2e8d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# gpt_4_1_rag(test[1])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "ce78741b-2966-41d2-9831-cbf8f8d176be",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"test[1].price"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "16d90455-ff7d-4f5f-8b8c-8e061263d1c7",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"Tester.test(gpt_4o_mini_rag, test)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "26d5ddc6-baa6-4760-a430-05671847ac47",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.11"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
File diff suppressed because one or more lines are too long
@@ -0,0 +1,235 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "0df0d850-49eb-4a0b-a27a-146969db710d",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# ScanningAgent\n",
|
||||
"\n",
|
||||
"Looks for promising deals by subscribing to RSS feeds."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "d3763a79-8a5a-4300-8de4-93e85475af10",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# imports\n",
|
||||
"\n",
|
||||
"import os\n",
|
||||
"import json\n",
|
||||
"from dotenv import load_dotenv\n",
|
||||
"from openai import OpenAI\n",
|
||||
"from agents.deals import ScrapedDeal, DealSelection"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "c6469e32-16c3-4443-9475-ade710ef6933",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Initialize and constants\n",
|
||||
"\n",
|
||||
"load_dotenv(override=True)\n",
|
||||
"os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY', 'your-key-if-not-using-env')\n",
|
||||
"MODEL = 'gpt-4o-mini'\n",
|
||||
"openai = OpenAI()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "afece9db-8cd4-46be-ac57-0b472e84da7d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"deals = ScrapedDeal.fetch(show_progress=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "8cd15c4d-eb44-4601-bf0c-f945c1d8e3ec",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"len(deals)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "4259f30a-6455-49ed-8863-2f9ddd4776cb",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"deals[44].describe()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "8100e5ac-38f5-40c1-a712-08ae12c85038",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"system_prompt = \"\"\"You identify and summarize the 5 most detailed deals from a list, by selecting deals that have the most detailed, high quality description and the most clear price.\n",
|
||||
"Respond strictly in JSON with no explanation, using this format. You should provide the price as a number derived from the description. If the price of a deal isn't clear, do not include that deal in your response.\n",
|
||||
"Most important is that you respond with the 5 deals that have the most detailed product description with price. It's not important to mention the terms of the deal; most important is a thorough description of the product.\n",
|
||||
"Be careful with products that are described as \"$XXX off\" or \"reduced by $XXX\" - this isn't the actual price of the product. Only respond with products when you are highly confident about the price. \n",
|
||||
"\n",
|
||||
"{\"deals\": [\n",
|
||||
" {\n",
|
||||
" \"product_description\": \"Your clearly expressed summary of the product in 4-5 sentences. Details of the item are much more important than why it's a good deal. Avoid mentioning discounts and coupons; focus on the item itself. There should be a paragpraph of text for each item you choose.\",\n",
|
||||
" \"price\": 99.99,\n",
|
||||
" \"url\": \"the url as provided\"\n",
|
||||
" },\n",
|
||||
" ...\n",
|
||||
"]}\"\"\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "f4bca170-af71-40c9-9597-1d72980c74d8",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"user_prompt = \"\"\"Respond with the most promising 5 deals from this list, selecting those which have the most detailed, high quality product description and a clear price.\n",
|
||||
"Respond strictly in JSON, and only JSON. You should rephrase the description to be a summary of the product itself, not the terms of the deal.\n",
|
||||
"Remember to respond with a paragraph of text in the product_description field for each of the 5 items that you select.\n",
|
||||
"Be careful with products that are described as \"$XXX off\" or \"reduced by $XXX\" - this isn't the actual price of the product. Only respond with products when you are highly confident about the price. \n",
|
||||
"\n",
|
||||
"Deals:\n",
|
||||
"\n",
|
||||
"\"\"\"\n",
|
||||
"user_prompt += '\\n\\n'.join([deal.describe() for deal in deals])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "020947a6-561b-417b-98a0-a085e31d2ce3",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(user_prompt[:2000])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "7de46f74-868c-4127-8a68-cf2da7d600bb",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def get_recommendations():\n",
|
||||
" completion = openai.beta.chat.completions.parse(\n",
|
||||
" model=\"gpt-4o-mini\",\n",
|
||||
" messages=[\n",
|
||||
" {\"role\": \"system\", \"content\": system_prompt},\n",
|
||||
" {\"role\": \"user\", \"content\": user_prompt}\n",
|
||||
" ],\n",
|
||||
" response_format=DealSelection\n",
|
||||
" )\n",
|
||||
" result = completion.choices[0].message.parsed\n",
|
||||
" return result"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "4c06270d-8c17-4d5a-9cfe-b6cefe788d5e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"result = get_recommendations()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "84e62845-3338-441a-8161-c70097af4773",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"len(result.deals)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "e5554a0a-ae40-4684-ad3e-faa3d22e030c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"result.deals[1]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "8bdc57fb-7497-47af-a643-6ba5a21cc17e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from agents.scanner_agent import ScannerAgent"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "132278bc-217a-43a6-b6c4-724140c6a225",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"agent = ScannerAgent()\n",
|
||||
"result = agent.scan()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "2e1d013a-c930-4dad-901b-41433379e14b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"result"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "5ee2e837-1f1d-42d4-8bc4-51cccc343006",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.11"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -0,0 +1,208 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "993a2a24-1a58-42be-8034-6d116fb8d786",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# imports\n",
|
||||
"\n",
|
||||
"import os\n",
|
||||
"import re\n",
|
||||
"import math\n",
|
||||
"import json\n",
|
||||
"from tqdm import tqdm\n",
|
||||
"import random\n",
|
||||
"from dotenv import load_dotenv\n",
|
||||
"from huggingface_hub import login\n",
|
||||
"import numpy as np\n",
|
||||
"import pickle\n",
|
||||
"from sentence_transformers import SentenceTransformer\n",
|
||||
"from datasets import load_dataset\n",
|
||||
"import chromadb\n",
|
||||
"from items import Item\n",
|
||||
"from sklearn.manifold import TSNE\n",
|
||||
"import plotly.graph_objects as go"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "2359ccc0-dbf2-4b1e-9473-e472b32f548b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# environment\n",
|
||||
"\n",
|
||||
"load_dotenv(override=True)\n",
|
||||
"os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY', 'your-key-if-not-using-env')\n",
|
||||
"os.environ['HF_TOKEN'] = os.getenv('HF_TOKEN', 'your-key-if-not-using-env')\n",
|
||||
"DB = \"products_vectorstore\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "645167e6-cf0d-42d2-949f-1089a25a2841",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Log in to HuggingFace\n",
|
||||
"\n",
|
||||
"hf_token = os.environ['HF_TOKEN']\n",
|
||||
"login(hf_token, add_to_git_credential=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "688bd995-ec3e-43cd-8179-7fe14b275877",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# With train.pkl in this folder\n",
|
||||
"with open('train.pkl', 'rb') as file:\n",
|
||||
" train = pickle.load(file)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "f4aab95e-d719-4476-b6e7-e248120df25a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"client = chromadb.PersistentClient(path=DB)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "5f95dafd-ab80-464e-ba8a-dec7a2424780",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Check if the collection exists and delete it if it does\n",
|
||||
"collection_name = \"products\"\n",
|
||||
"existing_collection_names = [collection.name for collection in client.list_collections()]\n",
|
||||
"if collection_name in existing_collection_names:\n",
|
||||
" client.delete_collection(collection_name)\n",
|
||||
" print(f\"Deleted existing collection: {collection_name}\")\n",
|
||||
"\n",
|
||||
"collection = client.create_collection(collection_name)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "a87db200-d19d-44bf-acbd-15c45c70f5c9",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "9b23a025-4c35-4d3a-96ad-b956cad37b0a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Pass in a list of texts, get back a numpy array of vectors\n",
|
||||
"vector = model.encode([\"Well hi there\"])[0]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "8adde63f-e732-4f7c-bba9-f8b2a469f14e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"vector"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "38de1bf8-c9b5-45b4-9f4b-86af93b3f80d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def description(item):\n",
|
||||
" text = item.prompt.replace(\"How much does this cost to the nearest dollar?\\n\\n\", \"\")\n",
|
||||
" return text.split(\"\\n\\nPrice is $\")[0]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "8c1205bd-4692-44ef-8ea4-69f255354537",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"description(train[0])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "8c79e2fe-1f50-4ebf-9a93-34f3088f2996",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"for i in tqdm(range(0, len(train), 1000)):\n",
|
||||
" documents = [description(item) for item in train[i: i+1000]]\n",
|
||||
" vectors = model.encode(documents).astype(float).tolist()\n",
|
||||
" metadatas = [{\"category\": item.category, \"price\": item.price} for item in train[i: i+1000]]\n",
|
||||
" ids = [f\"doc_{j}\" for j in range(i, i+1000)]\n",
|
||||
" collection.add(\n",
|
||||
" ids=ids,\n",
|
||||
" documents=documents,\n",
|
||||
" embeddings=vectors,\n",
|
||||
" metadatas=metadatas\n",
|
||||
" )"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "5a9395db-7bc9-47f9-902f-af8d380c9c09",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "745f73d9-f1a6-4e9f-96d9-1c38a1dd7559",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.11"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -0,0 +1,104 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "bc0e1c1c-be6a-4395-bbbd-eeafc9330d7e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# import modal\n",
|
||||
"import modal"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "0d240622-8422-4c99-8464-c04d063e4cb6",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# !modal setup"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "0050c070-146f-4c26-8045-5ff284761199",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "ebf35de4-ef8f-4e5b-8d4e-9a1771bfbe25",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"os.environ['PYTHONIOENCODING'] = 'utf-8'"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "7f90d857-2f12-4521-bb90-28efd917f7d1",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!modal deploy pricer_service"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "1dec70ff-1986-4405-8624-9bbbe0ce1f4a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"pricer = modal.Cls.from_name(\"pricer-service\", \"Pricer\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "17776139-0d9e-4ad0-bcd0-82d3a92ca61f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"pricer().price.remote(\"Quadcast HyperX condenser mic, connects via usb-c to your computer for crystal clear audio\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "deb6cdf6-bcb0-49fb-8671-bb5eb22f02e3",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.11"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -0,0 +1,195 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "993a2a24-1a58-42be-8034-6d116fb8d786",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# imports\n",
|
||||
"\n",
|
||||
"import os\n",
|
||||
"import re\n",
|
||||
"import math\n",
|
||||
"import json\n",
|
||||
"from tqdm import tqdm\n",
|
||||
"import random\n",
|
||||
"from dotenv import load_dotenv\n",
|
||||
"from huggingface_hub import login\n",
|
||||
"import numpy as np\n",
|
||||
"import pickle\n",
|
||||
"from sentence_transformers import SentenceTransformer\n",
|
||||
"from datasets import load_dataset\n",
|
||||
"import chromadb\n",
|
||||
"from items import Item\n",
|
||||
"from sklearn.manifold import TSNE\n",
|
||||
"import plotly.graph_objects as go"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "1cc1fe53-612f-4228-aa02-8758f4c2098f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"MAXIMUM_DATAPOINTS = 30_000"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "f4aab95e-d719-4476-b6e7-e248120df25a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"DB = \"products_vectorstore\"\n",
|
||||
"client = chromadb.PersistentClient(path=DB)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "5f95dafd-ab80-464e-ba8a-dec7a2424780",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"collection = client.get_or_create_collection('products')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "525fc313-8a16-4ac0-8c42-6a6d1ba1c9b8",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"CATEGORIES = ['Appliances', 'Automotive', 'Cell_Phones_and_Accessories', 'Electronics','Musical_Instruments', 'Office_Products', 'Tools_and_Home_Improvement', 'Toys_and_Games']\n",
|
||||
"COLORS = ['red', 'blue', 'brown', 'orange', 'yellow', 'green' , 'purple', 'cyan']"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "a4cf1c9a-1ced-48d4-974c-3c850905034e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Prework\n",
|
||||
"result = collection.get(include=['embeddings', 'documents', 'metadatas'], limit=MAXIMUM_DATAPOINTS)\n",
|
||||
"vectors = np.array(result['embeddings'])\n",
|
||||
"documents = result['documents']\n",
|
||||
"categories = [metadata['category'] for metadata in result['metadatas']]\n",
|
||||
"colors = [COLORS[CATEGORIES.index(c)] for c in categories]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "c54df150-c8d8-4bc3-8877-6759691eeb42",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Let's try a 2D chart\n",
|
||||
"tsne_2d = TSNE(n_components=2, random_state=42, n_jobs=-1)\n",
|
||||
"reduced_vectors_2d = tsne_2d.fit_transform(vectors)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "c93457ab-d895-4d9c-8e5c-1173e2089cfd",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Let's try 3D!\n",
|
||||
"tsne_3d = TSNE(n_components=3, random_state=42, n_jobs=-1)\n",
|
||||
"reduced_vectors_3d = tsne_3d.fit_transform(vectors)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "e8fb2a63-24c5-4dce-9e63-aa208272f82d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Create the 2D scatter plot\n",
|
||||
"fig = go.Figure(data=[go.Scatter(\n",
|
||||
" x=reduced_vectors_2d[:, 0],\n",
|
||||
" y=reduced_vectors_2d[:, 1],\n",
|
||||
" mode='markers',\n",
|
||||
" marker=dict(size=3, color=colors, opacity=0.7),\n",
|
||||
")])\n",
|
||||
"\n",
|
||||
"fig.update_layout(\n",
|
||||
" title='2D Chroma Vectorstore Visualization',\n",
|
||||
" scene=dict(xaxis_title='x', yaxis_title='y'),\n",
|
||||
" width=1200,\n",
|
||||
" height=800,\n",
|
||||
" margin=dict(r=20, b=10, l=10, t=40)\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"fig.show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "5e4ae088-3d29-45d3-87a2-fea805fe2c65",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"\n",
|
||||
"# Create the 3D scatter plot\n",
|
||||
"fig = go.Figure(data=[go.Scatter3d(\n",
|
||||
" x=reduced_vectors_3d[:, 0],\n",
|
||||
" y=reduced_vectors_3d[:, 1],\n",
|
||||
" z=reduced_vectors_3d[:, 2],\n",
|
||||
" mode='markers',\n",
|
||||
" marker=dict(size=3, color=colors, opacity=0.7),\n",
|
||||
")])\n",
|
||||
"\n",
|
||||
"fig.update_layout(\n",
|
||||
" title='3D Chroma Vector Store Visualization',\n",
|
||||
" scene=dict(xaxis_title='x', yaxis_title='y', zaxis_title='z'),\n",
|
||||
" width=1200,\n",
|
||||
" height=800,\n",
|
||||
" margin=dict(r=20, b=10, l=10, t=40)\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"fig.show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "0a12d1e8-7da8-401d-8c8d-ba0098096ded",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.11"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -0,0 +1,33 @@
|
||||
import logging
|
||||
|
||||
class Agent:
|
||||
"""
|
||||
An abstract superclass for Agents
|
||||
Used to log messages in a way that can identify each Agent
|
||||
"""
|
||||
|
||||
# Foreground colors
|
||||
RED = '\033[31m'
|
||||
GREEN = '\033[32m'
|
||||
YELLOW = '\033[33m'
|
||||
BLUE = '\033[34m'
|
||||
MAGENTA = '\033[35m'
|
||||
CYAN = '\033[36m'
|
||||
WHITE = '\033[37m'
|
||||
|
||||
# Background color
|
||||
BG_BLACK = '\033[40m'
|
||||
|
||||
# Reset code to return to default color
|
||||
RESET = '\033[0m'
|
||||
|
||||
name: str = ""
|
||||
color: str = '\033[37m'
|
||||
|
||||
def log(self, message):
|
||||
"""
|
||||
Log this as an info message, identifying the agent
|
||||
"""
|
||||
color_code = self.BG_BLACK + self.color
|
||||
message = f"[{self.name}] {message}"
|
||||
logging.info(color_code + message + self.RESET)
|
||||
@@ -0,0 +1,109 @@
|
||||
from pydantic import BaseModel
|
||||
from typing import List, Dict, Self
|
||||
from bs4 import BeautifulSoup
|
||||
import re
|
||||
import feedparser
|
||||
from tqdm import tqdm
|
||||
import requests
|
||||
import time
|
||||
|
||||
feeds = [
|
||||
"https://www.dealnews.com/c142/Electronics/?rss=1",
|
||||
"https://www.dealnews.com/c39/Computers/?rss=1",
|
||||
"https://www.dealnews.com/c238/Automotive/?rss=1",
|
||||
"https://www.dealnews.com/f1912/Smart-Home/?rss=1",
|
||||
"https://www.dealnews.com/c196/Home-Garden/?rss=1",
|
||||
]
|
||||
|
||||
def extract(html_snippet: str) -> str:
|
||||
"""
|
||||
Use Beautiful Soup to clean up this HTML snippet and extract useful text
|
||||
"""
|
||||
soup = BeautifulSoup(html_snippet, 'html.parser')
|
||||
snippet_div = soup.find('div', class_='snippet summary')
|
||||
|
||||
if snippet_div:
|
||||
description = snippet_div.get_text(strip=True)
|
||||
description = BeautifulSoup(description, 'html.parser').get_text()
|
||||
description = re.sub('<[^<]+?>', '', description)
|
||||
result = description.strip()
|
||||
else:
|
||||
result = html_snippet
|
||||
return result.replace('\n', ' ')
|
||||
|
||||
class ScrapedDeal:
|
||||
"""
|
||||
A class to represent a Deal retrieved from an RSS feed
|
||||
"""
|
||||
category: str
|
||||
title: str
|
||||
summary: str
|
||||
url: str
|
||||
details: str
|
||||
features: str
|
||||
|
||||
def __init__(self, entry: Dict[str, str]):
|
||||
"""
|
||||
Populate this instance based on the provided dict
|
||||
"""
|
||||
self.title = entry['title']
|
||||
self.summary = extract(entry['summary'])
|
||||
self.url = entry['links'][0]['href']
|
||||
stuff = requests.get(self.url).content
|
||||
soup = BeautifulSoup(stuff, 'html.parser')
|
||||
content = soup.find('div', class_='content-section').get_text()
|
||||
content = content.replace('\nmore', '').replace('\n', ' ')
|
||||
if "Features" in content:
|
||||
self.details, self.features = content.split("Features")
|
||||
else:
|
||||
self.details = content
|
||||
self.features = ""
|
||||
|
||||
def __repr__(self):
|
||||
"""
|
||||
Return a string to describe this deal
|
||||
"""
|
||||
return f"<{self.title}>"
|
||||
|
||||
def describe(self):
|
||||
"""
|
||||
Return a longer string to describe this deal for use in calling a model
|
||||
"""
|
||||
return f"Title: {self.title}\nDetails: {self.details.strip()}\nFeatures: {self.features.strip()}\nURL: {self.url}"
|
||||
|
||||
@classmethod
|
||||
def fetch(cls, show_progress : bool = False) -> List[Self]:
|
||||
"""
|
||||
Retrieve all deals from the selected RSS feeds
|
||||
"""
|
||||
deals = []
|
||||
feed_iter = tqdm(feeds) if show_progress else feeds
|
||||
for feed_url in feed_iter:
|
||||
feed = feedparser.parse(feed_url)
|
||||
for entry in feed.entries[:10]:
|
||||
deals.append(cls(entry))
|
||||
time.sleep(0.5)
|
||||
return deals
|
||||
|
||||
class Deal(BaseModel):
|
||||
"""
|
||||
A class to Represent a Deal with a summary description
|
||||
"""
|
||||
product_description: str
|
||||
price: float
|
||||
url: str
|
||||
|
||||
class DealSelection(BaseModel):
|
||||
"""
|
||||
A class to Represent a list of Deals
|
||||
"""
|
||||
deals: List[Deal]
|
||||
|
||||
class Opportunity(BaseModel):
|
||||
"""
|
||||
A class to represent a possible opportunity: a Deal where we estimate
|
||||
it should cost more than it's being offered
|
||||
"""
|
||||
deal: Deal
|
||||
estimate: float
|
||||
discount: float
|
||||
@@ -0,0 +1,52 @@
|
||||
import pandas as pd
|
||||
from sklearn.linear_model import LinearRegression
|
||||
import joblib
|
||||
|
||||
from agents.agent import Agent
|
||||
from agents.specialist_agent import SpecialistAgent
|
||||
from agents.frontier_agent import FrontierAgent
|
||||
from agents.random_forest_agent import RandomForestAgent
|
||||
from agents.gradient_boosting_agent import GradientBoostingAgent
|
||||
|
||||
class EnsembleAgent(Agent):
|
||||
|
||||
name = "Ensemble Agent"
|
||||
color = Agent.YELLOW
|
||||
|
||||
def __init__(self, collection):
|
||||
"""
|
||||
Create an instance of Ensemble, by creating each of the models
|
||||
And loading the weights of the Ensemble
|
||||
"""
|
||||
self.log("Initializing Ensemble Agent")
|
||||
self.specialist = SpecialistAgent()
|
||||
self.frontier = FrontierAgent(collection)
|
||||
self.random_forest = RandomForestAgent()
|
||||
self.gradient_boosting = GradientBoostingAgent()
|
||||
self.model = joblib.load('ensemble_model.pkl')
|
||||
self.log("Ensemble Agent is ready")
|
||||
|
||||
def price(self, description: str) -> float:
|
||||
"""
|
||||
Run this ensemble model
|
||||
Ask each of the models to price the product
|
||||
Then use the Linear Regression model to return the weighted price
|
||||
:param description: the description of a product
|
||||
:return: an estimate of its price
|
||||
"""
|
||||
self.log("Running Ensemble Agent - collaborating with specialist, frontier and random forest agents")
|
||||
specialist = self.specialist.price(description)
|
||||
frontier = self.frontier.price(description)
|
||||
random_forest = self.random_forest.price(description)
|
||||
gradient_boosting = self.gradient_boosting.price(description)
|
||||
X = pd.DataFrame({
|
||||
'Specialist': [specialist],
|
||||
'Frontier': [frontier],
|
||||
'RandomForest': [random_forest],
|
||||
'GradientBoosting': [gradient_boosting],
|
||||
'Min': [min(specialist, frontier, random_forest)],
|
||||
'Max': [max(specialist, frontier, random_forest)],
|
||||
})
|
||||
y = max(0, self.model.predict(X)[0])
|
||||
self.log(f"Ensemble Agent complete - returning ${y:.2f}")
|
||||
return y
|
||||
@@ -0,0 +1,109 @@
|
||||
# imports
|
||||
|
||||
import os
|
||||
import re
|
||||
import math
|
||||
import json
|
||||
from typing import List, Dict
|
||||
import openai
|
||||
from openai import OpenAI
|
||||
from sentence_transformers import SentenceTransformer
|
||||
from datasets import load_dataset
|
||||
import chromadb
|
||||
from items import Item
|
||||
from testing import Tester
|
||||
from agents.agent import Agent
|
||||
|
||||
|
||||
class FrontierAgent(Agent):
|
||||
|
||||
name = "Frontier Agent"
|
||||
color = Agent.BLUE
|
||||
|
||||
MODEL = "gpt-4o-mini"
|
||||
|
||||
def __init__(self, collection):
|
||||
"""
|
||||
Set up this instance by connecting to OpenAI or DeepSeek, to the Chroma Datastore,
|
||||
And setting up the vector encoding model
|
||||
"""
|
||||
self.log("Initializing Frontier Agent")
|
||||
openai.api_key = os.getenv("OPENAI_API_KEY")
|
||||
self.client = OpenAI()
|
||||
self.MODEL = "gpt-4o-mini"
|
||||
self.log("Frontier Agent is setting up with OpenAI")
|
||||
self.collection = collection
|
||||
self.model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
|
||||
self.log("Frontier Agent is ready")
|
||||
|
||||
def make_context(self, similars: List[str], prices: List[float]) -> str:
|
||||
"""
|
||||
Create context that can be inserted into the prompt
|
||||
:param similars: similar products to the one being estimated
|
||||
:param prices: prices of the similar products
|
||||
:return: text to insert in the prompt that provides context
|
||||
"""
|
||||
message = "To provide some context, here are some other items that might be similar to the item you need to estimate.\n\n"
|
||||
for similar, price in zip(similars, prices):
|
||||
message += f"Potentially related product:\n{similar}\nPrice is ${price:.2f}\n\n"
|
||||
return message
|
||||
|
||||
def messages_for(self, description: str, similars: List[str], prices: List[float]) -> List[Dict[str, str]]:
|
||||
"""
|
||||
Create the message list to be included in a call to OpenAI
|
||||
With the system and user prompt
|
||||
:param description: a description of the product
|
||||
:param similars: similar products to this one
|
||||
:param prices: prices of similar products
|
||||
:return: the list of messages in the format expected by OpenAI
|
||||
"""
|
||||
system_message = "You estimate prices of items. Reply only with the price, no explanation. Price is always below $1000."
|
||||
user_prompt = self.make_context(similars, prices)
|
||||
user_prompt += "And now the question for you:\n\n"
|
||||
user_prompt += "How much does this cost?\n\n" + description
|
||||
return [
|
||||
{"role": "system", "content": system_message},
|
||||
{"role": "user", "content": user_prompt},
|
||||
{"role": "assistant", "content": "Price is $"}
|
||||
]
|
||||
|
||||
def find_similars(self, description: str):
|
||||
"""
|
||||
Return a list of items similar to the given one by looking in the Chroma datastore
|
||||
"""
|
||||
self.log("Frontier Agent is performing a RAG search of the Chroma datastore to find 5 similar products")
|
||||
vector = self.model.encode([description])
|
||||
results = self.collection.query(query_embeddings=vector.astype(float).tolist(), n_results=5)
|
||||
documents = results['documents'][0][:]
|
||||
prices = [m['price'] for m in results['metadatas'][0][:]]
|
||||
self.log("Frontier Agent has found similar products")
|
||||
return documents, prices
|
||||
|
||||
def get_price(self, s) -> float:
|
||||
"""
|
||||
A utility that plucks a floating point number out of a string
|
||||
"""
|
||||
s = s.replace('$','').replace(',','')
|
||||
match = re.search(r"[-+]?\d*\.\d+|\d+", s)
|
||||
return float(match.group()) if match else 0.0
|
||||
|
||||
def price(self, description: str) -> float:
|
||||
"""
|
||||
Make a call to OpenAI to estimate the price of the described product,
|
||||
by looking up 5 similar products and including them in the prompt to give context
|
||||
:param description: a description of the product
|
||||
:return: an estimate of the price
|
||||
"""
|
||||
documents, prices = self.find_similars(description)
|
||||
self.log(f"Frontier Agent is about to call {self.MODEL} with context including 5 similar products")
|
||||
response = self.client.chat.completions.create(
|
||||
model=self.MODEL,
|
||||
messages=self.messages_for(description, documents, prices),
|
||||
seed=42,
|
||||
max_tokens=5
|
||||
)
|
||||
reply = response.choices[0].message.content
|
||||
result = self.get_price(reply)
|
||||
self.log(f"Frontier Agent completed - predicting ${result:.2f}")
|
||||
return result
|
||||
|
||||
@@ -0,0 +1,37 @@
|
||||
# imports
|
||||
|
||||
import os
|
||||
import re
|
||||
from typing import List
|
||||
from sentence_transformers import SentenceTransformer
|
||||
import joblib
|
||||
from agents.agent import Agent
|
||||
|
||||
|
||||
|
||||
class GradientBoostingAgent(Agent):
|
||||
|
||||
name = "Gradient Boosting Agent"
|
||||
color = Agent.MAGENTA
|
||||
|
||||
def __init__(self):
|
||||
"""
|
||||
Initialize this object by loading in the saved model weights
|
||||
and the SentenceTransformer vector encoding model
|
||||
"""
|
||||
self.log("Gradient Boosting Agent is initializing")
|
||||
self.vectorizer = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
|
||||
self.model = joblib.load('gradient_boosting_model.pkl')
|
||||
self.log("Gradient Boosting Agent is ready")
|
||||
|
||||
def price(self, description: str) -> float:
|
||||
"""
|
||||
Use a Random Forest model to estimate the price of the described item
|
||||
:param description: the product to be estimated
|
||||
:return: the price as a float
|
||||
"""
|
||||
self.log("Gradient Boosting Agent is starting a prediction")
|
||||
vector = self.vectorizer.encode([description])
|
||||
result = max(0, self.model.predict(vector)[0])
|
||||
self.log(f"Gradient Boosting Agent completed - predicting ${result:.2f}")
|
||||
return result
|
||||
@@ -0,0 +1,79 @@
|
||||
import os
|
||||
# from twilio.rest import Client
|
||||
from agents.deals import Opportunity
|
||||
import http.client
|
||||
import urllib
|
||||
from agents.agent import Agent
|
||||
|
||||
# Uncomment the Twilio lines if you wish to use Twilio
|
||||
|
||||
DO_TEXT = False
|
||||
DO_PUSH = True
|
||||
|
||||
class MessagingAgent(Agent):
|
||||
|
||||
name = "Messaging Agent"
|
||||
color = Agent.WHITE
|
||||
|
||||
def __init__(self):
|
||||
"""
|
||||
Set up this object to either do push notifications via Pushover,
|
||||
or SMS via Twilio,
|
||||
whichever is specified in the constants
|
||||
"""
|
||||
self.log(f"Messaging Agent is initializing")
|
||||
if DO_TEXT:
|
||||
account_sid = os.getenv('TWILIO_ACCOUNT_SID', 'your-sid-if-not-using-env')
|
||||
auth_token = os.getenv('TWILIO_AUTH_TOKEN', 'your-auth-if-not-using-env')
|
||||
self.me_from = os.getenv('TWILIO_FROM', 'your-phone-number-if-not-using-env')
|
||||
self.me_to = os.getenv('MY_PHONE_NUMBER', 'your-phone-number-if-not-using-env')
|
||||
# self.client = Client(account_sid, auth_token)
|
||||
self.log("Messaging Agent has initialized Twilio")
|
||||
if DO_PUSH:
|
||||
self.pushover_user = os.getenv('PUSHOVER_USER', 'your-pushover-user-if-not-using-env')
|
||||
self.pushover_token = os.getenv('PUSHOVER_TOKEN', 'your-pushover-user-if-not-using-env')
|
||||
self.log("Messaging Agent has initialized Pushover")
|
||||
|
||||
def message(self, text):
|
||||
"""
|
||||
Send an SMS message using the Twilio API
|
||||
"""
|
||||
self.log("Messaging Agent is sending a text message")
|
||||
message = self.client.messages.create(
|
||||
from_=self.me_from,
|
||||
body=text,
|
||||
to=self.me_to
|
||||
)
|
||||
|
||||
def push(self, text):
|
||||
"""
|
||||
Send a Push Notification using the Pushover API
|
||||
"""
|
||||
self.log("Messaging Agent is sending a push notification")
|
||||
conn = http.client.HTTPSConnection("api.pushover.net:443")
|
||||
conn.request("POST", "/1/messages.json",
|
||||
urllib.parse.urlencode({
|
||||
"token": self.pushover_token,
|
||||
"user": self.pushover_user,
|
||||
"message": text,
|
||||
"sound": "cashregister"
|
||||
}), { "Content-type": "application/x-www-form-urlencoded" })
|
||||
conn.getresponse()
|
||||
|
||||
def alert(self, opportunity: Opportunity):
|
||||
"""
|
||||
Make an alert about the specified Opportunity
|
||||
"""
|
||||
text = f"Deal Alert! Price=${opportunity.deal.price:.2f}, "
|
||||
text += f"Estimate=${opportunity.estimate:.2f}, "
|
||||
text += f"Discount=${opportunity.discount:.2f} :"
|
||||
text += opportunity.deal.product_description[:10]+'... '
|
||||
text += opportunity.deal.url
|
||||
if DO_TEXT:
|
||||
self.message(text)
|
||||
if DO_PUSH:
|
||||
self.push(text)
|
||||
self.log("Messaging Agent has completed")
|
||||
|
||||
|
||||
|
||||
@@ -0,0 +1,57 @@
|
||||
from typing import Optional, List
|
||||
from agents.agent import Agent
|
||||
from agents.deals import ScrapedDeal, DealSelection, Deal, Opportunity
|
||||
from agents.scanner_agent import ScannerAgent
|
||||
from agents.ensemble_agent import EnsembleAgent
|
||||
from agents.messaging_agent import MessagingAgent
|
||||
|
||||
|
||||
class PlanningAgent(Agent):
|
||||
|
||||
name = "Planning Agent"
|
||||
color = Agent.GREEN
|
||||
DEAL_THRESHOLD = 50
|
||||
|
||||
def __init__(self, collection):
|
||||
"""
|
||||
Create instances of the 3 Agents that this planner coordinates across
|
||||
"""
|
||||
self.log("Planning Agent is initializing")
|
||||
self.scanner = ScannerAgent()
|
||||
self.ensemble = EnsembleAgent(collection)
|
||||
self.messenger = MessagingAgent()
|
||||
self.log("Planning Agent is ready")
|
||||
|
||||
def run(self, deal: Deal) -> Opportunity:
|
||||
"""
|
||||
Run the workflow for a particular deal
|
||||
:param deal: the deal, summarized from an RSS scrape
|
||||
:returns: an opportunity including the discount
|
||||
"""
|
||||
self.log("Planning Agent is pricing up a potential deal")
|
||||
estimate = self.ensemble.price(deal.product_description)
|
||||
discount = estimate - deal.price
|
||||
self.log(f"Planning Agent has processed a deal with discount ${discount:.2f}")
|
||||
return Opportunity(deal=deal, estimate=estimate, discount=discount)
|
||||
|
||||
def plan(self, memory: List[str] = []) -> Optional[Opportunity]:
|
||||
"""
|
||||
Run the full workflow:
|
||||
1. Use the ScannerAgent to find deals from RSS feeds
|
||||
2. Use the EnsembleAgent to estimate them
|
||||
3. Use the MessagingAgent to send a notification of deals
|
||||
:param memory: a list of URLs that have been surfaced in the past
|
||||
:return: an Opportunity if one was surfaced, otherwise None
|
||||
"""
|
||||
self.log("Planning Agent is kicking off a run")
|
||||
selection = self.scanner.scan(memory=memory)
|
||||
if selection:
|
||||
opportunities = [self.run(deal) for deal in selection.deals[:5]]
|
||||
opportunities.sort(key=lambda opp: opp.discount, reverse=True)
|
||||
best = opportunities[0]
|
||||
self.log(f"Planning Agent has identified the best deal has discount ${best.discount:.2f}")
|
||||
if best.discount > self.DEAL_THRESHOLD:
|
||||
self.messenger.alert(best)
|
||||
self.log("Planning Agent has completed a run")
|
||||
return best if best.discount > self.DEAL_THRESHOLD else None
|
||||
return None
|
||||
@@ -0,0 +1,37 @@
|
||||
# imports
|
||||
|
||||
import os
|
||||
import re
|
||||
from typing import List
|
||||
from sentence_transformers import SentenceTransformer
|
||||
import joblib
|
||||
from agents.agent import Agent
|
||||
|
||||
|
||||
|
||||
class RandomForestAgent(Agent):
|
||||
|
||||
name = "Random Forest Agent"
|
||||
color = Agent.MAGENTA
|
||||
|
||||
def __init__(self):
|
||||
"""
|
||||
Initialize this object by loading in the saved model weights
|
||||
and the SentenceTransformer vector encoding model
|
||||
"""
|
||||
self.log("Random Forest Agent is initializing")
|
||||
self.vectorizer = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
|
||||
self.model = joblib.load('random_forest_model.pkl')
|
||||
self.log("Random Forest Agent is ready")
|
||||
|
||||
def price(self, description: str) -> float:
|
||||
"""
|
||||
Use a Random Forest model to estimate the price of the described item
|
||||
:param description: the product to be estimated
|
||||
:return: the price as a float
|
||||
"""
|
||||
self.log("Random Forest Agent is starting a prediction")
|
||||
vector = self.vectorizer.encode([description])
|
||||
result = max(0, self.model.predict(vector)[0])
|
||||
self.log(f"Random Forest Agent completed - predicting ${result:.2f}")
|
||||
return result
|
||||
@@ -0,0 +1,94 @@
|
||||
import os
|
||||
import json
|
||||
from typing import Optional, List
|
||||
from openai import OpenAI
|
||||
from agents.deals import ScrapedDeal, DealSelection
|
||||
from agents.agent import Agent
|
||||
|
||||
|
||||
class ScannerAgent(Agent):
|
||||
|
||||
MODEL = "gpt-4o-mini"
|
||||
|
||||
SYSTEM_PROMPT = """You identify and summarize the 5 most detailed deals from a list, by selecting deals that have the most detailed, high quality description and the most clear price.
|
||||
Respond strictly in JSON with no explanation, using this format. You should provide the price as a number derived from the description. If the price of a deal isn't clear, do not include that deal in your response.
|
||||
Most important is that you respond with the 5 deals that have the most detailed product description with price. It's not important to mention the terms of the deal; most important is a thorough description of the product.
|
||||
Be careful with products that are described as "$XXX off" or "reduced by $XXX" - this isn't the actual price of the product. Only respond with products when you are highly confident about the price.
|
||||
|
||||
{"deals": [
|
||||
{
|
||||
"product_description": "Your clearly expressed summary of the product in 4-5 sentences. Details of the item are much more important than why it's a good deal. Avoid mentioning discounts and coupons; focus on the item itself. There should be a paragpraph of text for each item you choose.",
|
||||
"price": 99.99,
|
||||
"url": "the url as provided"
|
||||
},
|
||||
...
|
||||
]}"""
|
||||
|
||||
USER_PROMPT_PREFIX = """Respond with the most promising 5 deals from this list, selecting those which have the most detailed, high quality product description and a clear price that is greater than 0.
|
||||
Respond strictly in JSON, and only JSON. You should rephrase the description to be a summary of the product itself, not the terms of the deal.
|
||||
Remember to respond with a paragraph of text in the product_description field for each of the 5 items that you select.
|
||||
Be careful with products that are described as "$XXX off" or "reduced by $XXX" - this isn't the actual price of the product. Only respond with products when you are highly confident about the price.
|
||||
|
||||
Deals:
|
||||
|
||||
"""
|
||||
|
||||
USER_PROMPT_SUFFIX = "\n\nStrictly respond in JSON and include exactly 5 deals, no more."
|
||||
|
||||
name = "Scanner Agent"
|
||||
color = Agent.CYAN
|
||||
|
||||
def __init__(self):
|
||||
"""
|
||||
Set up this instance by initializing OpenAI
|
||||
"""
|
||||
self.log("Scanner Agent is initializing")
|
||||
self.openai = OpenAI()
|
||||
self.log("Scanner Agent is ready")
|
||||
|
||||
def fetch_deals(self, memory) -> List[ScrapedDeal]:
|
||||
"""
|
||||
Look up deals published on RSS feeds
|
||||
Return any new deals that are not already in the memory provided
|
||||
"""
|
||||
self.log("Scanner Agent is about to fetch deals from RSS feed")
|
||||
urls = [opp.deal.url for opp in memory]
|
||||
scraped = ScrapedDeal.fetch()
|
||||
result = [scrape for scrape in scraped if scrape.url not in urls]
|
||||
self.log(f"Scanner Agent received {len(result)} deals not already scraped")
|
||||
return result
|
||||
|
||||
def make_user_prompt(self, scraped) -> str:
|
||||
"""
|
||||
Create a user prompt for OpenAI based on the scraped deals provided
|
||||
"""
|
||||
user_prompt = self.USER_PROMPT_PREFIX
|
||||
user_prompt += '\n\n'.join([scrape.describe() for scrape in scraped])
|
||||
user_prompt += self.USER_PROMPT_SUFFIX
|
||||
return user_prompt
|
||||
|
||||
def scan(self, memory: List[str]=[]) -> Optional[DealSelection]:
|
||||
"""
|
||||
Call OpenAI to provide a high potential list of deals with good descriptions and prices
|
||||
Use StructuredOutputs to ensure it conforms to our specifications
|
||||
:param memory: a list of URLs representing deals already raised
|
||||
:return: a selection of good deals, or None if there aren't any
|
||||
"""
|
||||
scraped = self.fetch_deals(memory)
|
||||
if scraped:
|
||||
user_prompt = self.make_user_prompt(scraped)
|
||||
self.log("Scanner Agent is calling OpenAI using Structured Output")
|
||||
result = self.openai.beta.chat.completions.parse(
|
||||
model=self.MODEL,
|
||||
messages=[
|
||||
{"role": "system", "content": self.SYSTEM_PROMPT},
|
||||
{"role": "user", "content": user_prompt}
|
||||
],
|
||||
response_format=DealSelection
|
||||
)
|
||||
result = result.choices[0].message.parsed
|
||||
result.deals = [deal for deal in result.deals if deal.price>0]
|
||||
self.log(f"Scanner Agent received {len(result.deals)} selected deals with price>0 from OpenAI")
|
||||
return result
|
||||
return None
|
||||
|
||||
@@ -0,0 +1,29 @@
|
||||
import modal
|
||||
from agents.agent import Agent
|
||||
|
||||
|
||||
class SpecialistAgent(Agent):
|
||||
"""
|
||||
An Agent that runs our fine-tuned LLM that's running remotely on Modal
|
||||
"""
|
||||
|
||||
name = "Specialist Agent"
|
||||
color = Agent.RED
|
||||
|
||||
def __init__(self):
|
||||
"""
|
||||
Set up this Agent by creating an instance of the modal class
|
||||
"""
|
||||
self.log("Specialist Agent is initializing - connecting to modal")
|
||||
Pricer = modal.Cls.from_name("pricer-service", "Pricer")
|
||||
self.pricer = Pricer()
|
||||
self.log("Specialist Agent is ready")
|
||||
|
||||
def price(self, description: str) -> float:
|
||||
"""
|
||||
Make a remote call to return the estimate of the price of this item
|
||||
"""
|
||||
self.log("Specialist Agent is calling remote fine-tuned model")
|
||||
result = self.pricer.price.remote(description)
|
||||
self.log(f"Specialist Agent completed - predicting ${result:.2f}")
|
||||
return result
|
||||
@@ -0,0 +1,99 @@
|
||||
import os
|
||||
import sys
|
||||
import logging
|
||||
import json
|
||||
from typing import List, Optional
|
||||
from twilio.rest import Client
|
||||
from dotenv import load_dotenv
|
||||
import chromadb
|
||||
from agents.planning_agent import PlanningAgent
|
||||
from agents.deals import Opportunity
|
||||
from sklearn.manifold import TSNE
|
||||
import numpy as np
|
||||
|
||||
|
||||
# Colors for logging
|
||||
BG_BLUE = '\033[44m'
|
||||
WHITE = '\033[37m'
|
||||
RESET = '\033[0m'
|
||||
|
||||
# Colors for plot
|
||||
CATEGORIES = ['Appliances', 'Automotive', 'Cell_Phones_and_Accessories', 'Electronics','Musical_Instruments', 'Office_Products', 'Tools_and_Home_Improvement', 'Toys_and_Games']
|
||||
COLORS = ['red', 'blue', 'brown', 'orange', 'yellow', 'green' , 'purple', 'cyan']
|
||||
|
||||
def init_logging():
|
||||
root = logging.getLogger()
|
||||
root.setLevel(logging.INFO)
|
||||
|
||||
handler = logging.StreamHandler(sys.stdout)
|
||||
handler.setLevel(logging.INFO)
|
||||
formatter = logging.Formatter(
|
||||
"[%(asctime)s] [Agents] [%(levelname)s] %(message)s",
|
||||
datefmt="%Y-%m-%d %H:%M:%S %z",
|
||||
)
|
||||
handler.setFormatter(formatter)
|
||||
root.addHandler(handler)
|
||||
|
||||
class DealAgentFramework:
|
||||
|
||||
DB = "products_vectorstore"
|
||||
MEMORY_FILENAME = "memory.json"
|
||||
|
||||
def __init__(self):
|
||||
init_logging()
|
||||
load_dotenv()
|
||||
client = chromadb.PersistentClient(path=self.DB)
|
||||
self.memory = self.read_memory()
|
||||
self.collection = client.get_or_create_collection('products')
|
||||
self.planner = None
|
||||
|
||||
def init_agents_as_needed(self):
|
||||
if not self.planner:
|
||||
self.log("Initializing Agent Framework")
|
||||
self.planner = PlanningAgent(self.collection)
|
||||
self.log("Agent Framework is ready")
|
||||
|
||||
def read_memory(self) -> List[Opportunity]:
|
||||
if os.path.exists(self.MEMORY_FILENAME):
|
||||
with open(self.MEMORY_FILENAME, "r") as file:
|
||||
data = json.load(file)
|
||||
opportunities = [Opportunity(**item) for item in data]
|
||||
return opportunities
|
||||
return []
|
||||
|
||||
def write_memory(self) -> None:
|
||||
data = [opportunity.dict() for opportunity in self.memory]
|
||||
with open(self.MEMORY_FILENAME, "w") as file:
|
||||
json.dump(data, file, indent=2)
|
||||
|
||||
def log(self, message: str):
|
||||
text = BG_BLUE + WHITE + "[Agent Framework] " + message + RESET
|
||||
logging.info(text)
|
||||
|
||||
def run(self) -> List[Opportunity]:
|
||||
self.init_agents_as_needed()
|
||||
logging.info("Kicking off Planning Agent")
|
||||
result = self.planner.plan(memory=self.memory)
|
||||
logging.info(f"Planning Agent has completed and returned: {result}")
|
||||
if result:
|
||||
self.memory.append(result)
|
||||
self.write_memory()
|
||||
return self.memory
|
||||
|
||||
@classmethod
|
||||
def get_plot_data(cls, max_datapoints=10000):
|
||||
client = chromadb.PersistentClient(path=cls.DB)
|
||||
collection = client.get_or_create_collection('products')
|
||||
result = collection.get(include=['embeddings', 'documents', 'metadatas'], limit=max_datapoints)
|
||||
vectors = np.array(result['embeddings'])
|
||||
documents = result['documents']
|
||||
categories = [metadata['category'] for metadata in result['metadatas']]
|
||||
colors = [COLORS[CATEGORIES.index(c)] for c in categories]
|
||||
tsne = TSNE(n_components=3, random_state=42, n_jobs=-1)
|
||||
reduced_vectors = tsne.fit_transform(vectors)
|
||||
return documents, reduced_vectors, colors
|
||||
|
||||
|
||||
if __name__=="__main__":
|
||||
DealAgentFramework().run()
|
||||
|
||||
101
week8/community_contributions/Ensemble_with_xgboost/items.py
Normal file
101
week8/community_contributions/Ensemble_with_xgboost/items.py
Normal file
@@ -0,0 +1,101 @@
|
||||
from typing import Optional
|
||||
from transformers import AutoTokenizer
|
||||
import re
|
||||
|
||||
BASE_MODEL = "meta-llama/Meta-Llama-3.1-8B"
|
||||
MIN_TOKENS = 150
|
||||
MAX_TOKENS = 160
|
||||
MIN_CHARS = 300
|
||||
CEILING_CHARS = MAX_TOKENS * 7
|
||||
|
||||
class Item:
|
||||
"""
|
||||
An Item is a cleaned, curated datapoint of a Product with a Price
|
||||
"""
|
||||
|
||||
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code=True)
|
||||
PREFIX = "Price is $"
|
||||
QUESTION = "How much does this cost to the nearest dollar?"
|
||||
REMOVALS = ['"Batteries Included?": "No"', '"Batteries Included?": "Yes"', '"Batteries Required?": "No"', '"Batteries Required?": "Yes"', "By Manufacturer", "Item", "Date First", "Package", ":", "Number of", "Best Sellers", "Number", "Product "]
|
||||
|
||||
title: str
|
||||
price: float
|
||||
category: str
|
||||
token_count: int = 0
|
||||
details: Optional[str]
|
||||
prompt: Optional[str] = None
|
||||
include = False
|
||||
|
||||
def __init__(self, data, price):
|
||||
self.title = data['title']
|
||||
self.price = price
|
||||
self.parse(data)
|
||||
|
||||
def scrub_details(self):
|
||||
"""
|
||||
Clean up the details string by removing common text that doesn't add value
|
||||
"""
|
||||
details = self.details
|
||||
for remove in self.REMOVALS:
|
||||
details = details.replace(remove, "")
|
||||
return details
|
||||
|
||||
def scrub(self, stuff):
|
||||
"""
|
||||
Clean up the provided text by removing unnecessary characters and whitespace
|
||||
Also remove words that are 7+ chars and contain numbers, as these are likely irrelevant product numbers
|
||||
"""
|
||||
stuff = re.sub(r'[:\[\]"{}【】\s]+', ' ', stuff).strip()
|
||||
stuff = stuff.replace(" ,", ",").replace(",,,",",").replace(",,",",")
|
||||
words = stuff.split(' ')
|
||||
select = [word for word in words if len(word)<7 or not any(char.isdigit() for char in word)]
|
||||
return " ".join(select)
|
||||
|
||||
def parse(self, data):
|
||||
"""
|
||||
Parse this datapoint and if it fits within the allowed Token range,
|
||||
then set include to True
|
||||
"""
|
||||
contents = '\n'.join(data['description'])
|
||||
if contents:
|
||||
contents += '\n'
|
||||
features = '\n'.join(data['features'])
|
||||
if features:
|
||||
contents += features + '\n'
|
||||
self.details = data['details']
|
||||
if self.details:
|
||||
contents += self.scrub_details() + '\n'
|
||||
if len(contents) > MIN_CHARS:
|
||||
contents = contents[:CEILING_CHARS]
|
||||
text = f"{self.scrub(self.title)}\n{self.scrub(contents)}"
|
||||
tokens = self.tokenizer.encode(text, add_special_tokens=False)
|
||||
if len(tokens) > MIN_TOKENS:
|
||||
tokens = tokens[:MAX_TOKENS]
|
||||
text = self.tokenizer.decode(tokens)
|
||||
self.make_prompt(text)
|
||||
self.include = True
|
||||
|
||||
def make_prompt(self, text):
|
||||
"""
|
||||
Set the prompt instance variable to be a prompt appropriate for training
|
||||
"""
|
||||
self.prompt = f"{self.QUESTION}\n\n{text}\n\n"
|
||||
self.prompt += f"{self.PREFIX}{str(round(self.price))}.00"
|
||||
self.token_count = len(self.tokenizer.encode(self.prompt, add_special_tokens=False))
|
||||
|
||||
def test_prompt(self):
|
||||
"""
|
||||
Return a prompt suitable for testing, with the actual price removed
|
||||
"""
|
||||
return self.prompt.split(self.PREFIX)[0] + self.PREFIX
|
||||
|
||||
def __repr__(self):
|
||||
"""
|
||||
Return a String version of this Item
|
||||
"""
|
||||
return f"<{self.title} = ${self.price}>"
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -0,0 +1,35 @@
|
||||
# Foreground colors
|
||||
RED = '\033[31m'
|
||||
GREEN = '\033[32m'
|
||||
YELLOW = '\033[33m'
|
||||
BLUE = '\033[34m'
|
||||
MAGENTA = '\033[35m'
|
||||
CYAN = '\033[36m'
|
||||
WHITE = '\033[37m'
|
||||
|
||||
# Background color
|
||||
BG_BLACK = '\033[40m'
|
||||
BG_BLUE = '\033[44m'
|
||||
|
||||
# Reset code to return to default color
|
||||
RESET = '\033[0m'
|
||||
|
||||
mapper = {
|
||||
BG_BLACK+RED: "#dd0000",
|
||||
BG_BLACK+GREEN: "#00dd00",
|
||||
BG_BLACK+YELLOW: "#dddd00",
|
||||
BG_BLACK+BLUE: "#0000ee",
|
||||
BG_BLACK+MAGENTA: "#aa00dd",
|
||||
BG_BLACK+CYAN: "#00dddd",
|
||||
BG_BLACK+WHITE: "#87CEEB",
|
||||
BG_BLUE+WHITE: "#ff7800"
|
||||
}
|
||||
|
||||
|
||||
def reformat(message):
|
||||
for key, value in mapper.items():
|
||||
message = message.replace(key, f'<span style="color: {value}">')
|
||||
message = message.replace(RESET, '</span>')
|
||||
return message
|
||||
|
||||
|
||||
@@ -0,0 +1,62 @@
|
||||
import gradio as gr
|
||||
from deal_agent_framework import DealAgentFramework
|
||||
from agents.deals import Opportunity, Deal
|
||||
|
||||
class App:
|
||||
|
||||
def __init__(self):
|
||||
self.agent_framework = None
|
||||
|
||||
def run(self):
|
||||
with gr.Blocks(title="Deal Intel", fill_width=True) as ui:
|
||||
|
||||
def table_for(opps):
|
||||
return [[opp.deal.product_description, f"${opp.deal.price:.2f}", f"${opp.estimate:.2f}", f"${opp.discount:.2f}", opp.deal.url] for opp in opps]
|
||||
|
||||
def start():
|
||||
self.agent_framework = DealAgentFramework()
|
||||
self.agent_framework.init_agents_as_needed()
|
||||
opportunities = self.agent_framework.memory
|
||||
table = table_for(opportunities)
|
||||
return table
|
||||
|
||||
def go():
|
||||
self.agent_framework.run()
|
||||
new_opportunities = self.agent_framework.memory
|
||||
table = table_for(new_opportunities)
|
||||
return table
|
||||
|
||||
def do_select(selected_index: gr.SelectData):
|
||||
opportunities = self.agent_framework.memory
|
||||
row = selected_index.index[0]
|
||||
opportunity = opportunities[row]
|
||||
self.agent_framework.planner.messenger.alert(opportunity)
|
||||
|
||||
with gr.Row():
|
||||
gr.Markdown('<div style="text-align: center;font-size:24px">"Deal Intel" - Deal Hunting Agentic AI</div>')
|
||||
with gr.Row():
|
||||
gr.Markdown('<div style="text-align: center;font-size:14px">Autonomous agent framework that finds online deals, collaborating with a proprietary fine-tuned LLM deployed on Modal, and a RAG pipeline with a frontier model and Chroma.</div>')
|
||||
with gr.Row():
|
||||
gr.Markdown('<div style="text-align: center;font-size:14px">Deals surfaced so far:</div>')
|
||||
with gr.Row():
|
||||
opportunities_dataframe = gr.Dataframe(
|
||||
headers=["Description", "Price", "Estimate", "Discount", "URL"],
|
||||
wrap=True,
|
||||
column_widths=[4, 1, 1, 1, 2],
|
||||
row_count=10,
|
||||
col_count=5,
|
||||
max_height=400,
|
||||
)
|
||||
|
||||
ui.load(start, inputs=[], outputs=[opportunities_dataframe])
|
||||
|
||||
timer = gr.Timer(value=60)
|
||||
timer.tick(go, inputs=[], outputs=[opportunities_dataframe])
|
||||
|
||||
opportunities_dataframe.select(do_select)
|
||||
|
||||
ui.launch(share=False, inbrowser=True)
|
||||
|
||||
if __name__=="__main__":
|
||||
App().run()
|
||||
|
||||
@@ -0,0 +1,166 @@
|
||||
import logging
|
||||
import queue
|
||||
import threading
|
||||
import time
|
||||
import gradio as gr
|
||||
from deal_agent_framework import DealAgentFramework
|
||||
from agents.deals import Opportunity, Deal
|
||||
from log_utils import reformat
|
||||
import plotly.graph_objects as go
|
||||
|
||||
|
||||
class QueueHandler(logging.Handler):
|
||||
def __init__(self, log_queue):
|
||||
super().__init__()
|
||||
self.log_queue = log_queue
|
||||
|
||||
def emit(self, record):
|
||||
self.log_queue.put(self.format(record))
|
||||
|
||||
def html_for(log_data):
|
||||
output = '<br>'.join(log_data[-18:])
|
||||
return f"""
|
||||
<div id="scrollContent" style="height: 400px; overflow-y: auto; border: 1px solid #ccc; background-color: #222229; padding: 10px;">
|
||||
{output}
|
||||
</div>
|
||||
"""
|
||||
|
||||
def setup_logging(log_queue):
|
||||
handler = QueueHandler(log_queue)
|
||||
formatter = logging.Formatter(
|
||||
"[%(asctime)s] %(message)s",
|
||||
datefmt="%Y-%m-%d %H:%M:%S %z",
|
||||
)
|
||||
handler.setFormatter(formatter)
|
||||
logger = logging.getLogger()
|
||||
logger.addHandler(handler)
|
||||
logger.setLevel(logging.INFO)
|
||||
|
||||
|
||||
class App:
|
||||
|
||||
def __init__(self):
|
||||
self.agent_framework = None
|
||||
|
||||
def get_agent_framework(self):
|
||||
if not self.agent_framework:
|
||||
self.agent_framework = DealAgentFramework()
|
||||
self.agent_framework.init_agents_as_needed()
|
||||
return self.agent_framework
|
||||
|
||||
def run(self):
|
||||
with gr.Blocks(title="Deal Intel", fill_width=True) as ui:
|
||||
|
||||
log_data = gr.State([])
|
||||
|
||||
def table_for(opps):
|
||||
return [[opp.deal.product_description, f"${opp.deal.price:.2f}", f"${opp.estimate:.2f}", f"${opp.discount:.2f}", opp.deal.url] for opp in opps]
|
||||
|
||||
def update_output(log_data, log_queue, result_queue):
|
||||
initial_result = table_for(self.get_agent_framework().memory)
|
||||
final_result = None
|
||||
while True:
|
||||
try:
|
||||
message = log_queue.get_nowait()
|
||||
log_data.append(reformat(message))
|
||||
yield log_data, html_for(log_data), final_result or initial_result
|
||||
except queue.Empty:
|
||||
try:
|
||||
final_result = result_queue.get_nowait()
|
||||
yield log_data, html_for(log_data), final_result or initial_result
|
||||
except queue.Empty:
|
||||
if final_result is not None:
|
||||
break
|
||||
time.sleep(0.1)
|
||||
|
||||
def get_initial_plot():
|
||||
fig = go.Figure()
|
||||
fig.update_layout(
|
||||
title='Loading vector DB...',
|
||||
height=400,
|
||||
)
|
||||
return fig
|
||||
|
||||
def get_plot():
|
||||
documents, vectors, colors = DealAgentFramework.get_plot_data(max_datapoints=1000)
|
||||
# Create the 3D scatter plot
|
||||
fig = go.Figure(data=[go.Scatter3d(
|
||||
x=vectors[:, 0],
|
||||
y=vectors[:, 1],
|
||||
z=vectors[:, 2],
|
||||
mode='markers',
|
||||
marker=dict(size=2, color=colors, opacity=0.7),
|
||||
)])
|
||||
|
||||
fig.update_layout(
|
||||
scene=dict(xaxis_title='x',
|
||||
yaxis_title='y',
|
||||
zaxis_title='z',
|
||||
aspectmode='manual',
|
||||
aspectratio=dict(x=2.2, y=2.2, z=1), # Make x-axis twice as long
|
||||
camera=dict(
|
||||
eye=dict(x=1.6, y=1.6, z=0.8) # Adjust camera position
|
||||
)),
|
||||
height=400,
|
||||
margin=dict(r=5, b=1, l=5, t=2)
|
||||
)
|
||||
|
||||
return fig
|
||||
|
||||
def do_run():
|
||||
new_opportunities = self.get_agent_framework().run()
|
||||
table = table_for(new_opportunities)
|
||||
return table
|
||||
|
||||
def run_with_logging(initial_log_data):
|
||||
log_queue = queue.Queue()
|
||||
result_queue = queue.Queue()
|
||||
setup_logging(log_queue)
|
||||
|
||||
def worker():
|
||||
result = do_run()
|
||||
result_queue.put(result)
|
||||
|
||||
thread = threading.Thread(target=worker)
|
||||
thread.start()
|
||||
|
||||
for log_data, output, final_result in update_output(initial_log_data, log_queue, result_queue):
|
||||
yield log_data, output, final_result
|
||||
|
||||
def do_select(selected_index: gr.SelectData):
|
||||
opportunities = self.get_agent_framework().memory
|
||||
row = selected_index.index[0]
|
||||
opportunity = opportunities[row]
|
||||
self.get_agent_framework().planner.messenger.alert(opportunity)
|
||||
|
||||
with gr.Row():
|
||||
gr.Markdown('<div style="text-align: center;font-size:24px"><strong>Deal Intel</strong> - Autonomous Agent Framework that hunts for deals</div>')
|
||||
with gr.Row():
|
||||
gr.Markdown('<div style="text-align: center;font-size:14px">A proprietary fine-tuned LLM deployed on Modal and a RAG pipeline with a frontier model collaborate to send push notifications with great online deals.</div>')
|
||||
with gr.Row():
|
||||
opportunities_dataframe = gr.Dataframe(
|
||||
headers=["Deals found so far", "Price", "Estimate", "Discount", "URL"],
|
||||
wrap=True,
|
||||
column_widths=[6, 1, 1, 1, 3],
|
||||
row_count=10,
|
||||
col_count=5,
|
||||
max_height=400,
|
||||
)
|
||||
with gr.Row():
|
||||
with gr.Column(scale=1):
|
||||
logs = gr.HTML()
|
||||
with gr.Column(scale=1):
|
||||
plot = gr.Plot(value=get_plot(), show_label=False)
|
||||
|
||||
ui.load(run_with_logging, inputs=[log_data], outputs=[log_data, logs, opportunities_dataframe])
|
||||
|
||||
timer = gr.Timer(value=300, active=True)
|
||||
timer.tick(run_with_logging, inputs=[log_data], outputs=[log_data, logs, opportunities_dataframe])
|
||||
|
||||
opportunities_dataframe.select(do_select)
|
||||
|
||||
ui.launch(share=False, inbrowser=True)
|
||||
|
||||
if __name__=="__main__":
|
||||
App().run()
|
||||
|
||||
@@ -0,0 +1,66 @@
|
||||
import modal
|
||||
from modal import App, Image
|
||||
|
||||
# Setup
|
||||
|
||||
app = modal.App("pricer")
|
||||
image = Image.debian_slim().pip_install("torch", "transformers", "bitsandbytes", "accelerate", "peft")
|
||||
secrets = [modal.Secret.from_name("hf-secret")]
|
||||
|
||||
# Constants
|
||||
|
||||
GPU = "T4"
|
||||
BASE_MODEL = "meta-llama/Meta-Llama-3.1-8B"
|
||||
PROJECT_NAME = "pricer"
|
||||
HF_USER = "ed-donner" # your HF name here! Or use mine if you just want to reproduce my results.
|
||||
RUN_NAME = "2024-09-13_13.04.39"
|
||||
PROJECT_RUN_NAME = f"{PROJECT_NAME}-{RUN_NAME}"
|
||||
REVISION = "e8d637df551603dc86cd7a1598a8f44af4d7ae36"
|
||||
FINETUNED_MODEL = f"{HF_USER}/{PROJECT_RUN_NAME}"
|
||||
|
||||
|
||||
@app.function(image=image, secrets=secrets, gpu=GPU, timeout=1800)
|
||||
def price(description: str) -> float:
|
||||
import os
|
||||
import re
|
||||
import torch
|
||||
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, set_seed
|
||||
from peft import PeftModel
|
||||
|
||||
QUESTION = "How much does this cost to the nearest dollar?"
|
||||
PREFIX = "Price is $"
|
||||
|
||||
prompt = f"{QUESTION}\n{description}\n{PREFIX}"
|
||||
|
||||
# Quant Config
|
||||
quant_config = BitsAndBytesConfig(
|
||||
load_in_4bit=True,
|
||||
bnb_4bit_use_double_quant=True,
|
||||
bnb_4bit_compute_dtype=torch.bfloat16,
|
||||
bnb_4bit_quant_type="nf4"
|
||||
)
|
||||
|
||||
# Load model and tokenizer
|
||||
|
||||
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
|
||||
tokenizer.pad_token = tokenizer.eos_token
|
||||
tokenizer.padding_side = "right"
|
||||
|
||||
base_model = AutoModelForCausalLM.from_pretrained(
|
||||
BASE_MODEL,
|
||||
quantization_config=quant_config,
|
||||
device_map="auto"
|
||||
)
|
||||
|
||||
fine_tuned_model = PeftModel.from_pretrained(base_model, FINETUNED_MODEL, revision=REVISION)
|
||||
|
||||
set_seed(42)
|
||||
inputs = tokenizer.encode(prompt, return_tensors="pt").to("cuda")
|
||||
attention_mask = torch.ones(inputs.shape, device="cuda")
|
||||
outputs = fine_tuned_model.generate(inputs, attention_mask=attention_mask, max_new_tokens=5, num_return_sequences=1)
|
||||
result = tokenizer.decode(outputs[0])
|
||||
|
||||
contents = result.split("Price is $")[1]
|
||||
contents = contents.replace(',','')
|
||||
match = re.search(r"[-+]?\d*\.\d+|\d+", contents)
|
||||
return float(match.group()) if match else 0
|
||||
@@ -0,0 +1,89 @@
|
||||
import modal
|
||||
from modal import App, Volume, Image
|
||||
|
||||
# Setup - define our infrastructure with code!
|
||||
|
||||
app = modal.App("pricer-service")
|
||||
image = Image.debian_slim().pip_install("huggingface", "torch", "transformers", "bitsandbytes", "accelerate", "peft")
|
||||
secrets = [modal.Secret.from_name("hf-secret")]
|
||||
|
||||
# Constants
|
||||
|
||||
GPU = "T4"
|
||||
BASE_MODEL = "meta-llama/Meta-Llama-3.1-8B"
|
||||
PROJECT_NAME = "pricer"
|
||||
HF_USER = "ed-donner" # your HF name here! Or use mine if you just want to reproduce my results.
|
||||
RUN_NAME = "2024-09-13_13.04.39"
|
||||
PROJECT_RUN_NAME = f"{PROJECT_NAME}-{RUN_NAME}"
|
||||
REVISION = "e8d637df551603dc86cd7a1598a8f44af4d7ae36"
|
||||
FINETUNED_MODEL = f"{HF_USER}/{PROJECT_RUN_NAME}"
|
||||
MODEL_DIR = "hf-cache/"
|
||||
BASE_DIR = MODEL_DIR + BASE_MODEL
|
||||
FINETUNED_DIR = MODEL_DIR + FINETUNED_MODEL
|
||||
|
||||
QUESTION = "How much does this cost to the nearest dollar?"
|
||||
PREFIX = "Price is $"
|
||||
|
||||
@app.cls(image=image, secrets=secrets, gpu=GPU, timeout=1800)
|
||||
class Pricer:
|
||||
@modal.build()
|
||||
def download_model_to_folder(self):
|
||||
from huggingface_hub import snapshot_download
|
||||
import os
|
||||
os.makedirs(MODEL_DIR, exist_ok=True)
|
||||
snapshot_download(BASE_MODEL, local_dir=BASE_DIR)
|
||||
snapshot_download(FINETUNED_MODEL, revision=REVISION, local_dir=FINETUNED_DIR)
|
||||
|
||||
@modal.enter()
|
||||
def setup(self):
|
||||
import os
|
||||
import torch
|
||||
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, set_seed
|
||||
from peft import PeftModel
|
||||
|
||||
# Quant Config
|
||||
quant_config = BitsAndBytesConfig(
|
||||
load_in_4bit=True,
|
||||
bnb_4bit_use_double_quant=True,
|
||||
bnb_4bit_compute_dtype=torch.bfloat16,
|
||||
bnb_4bit_quant_type="nf4"
|
||||
)
|
||||
|
||||
# Load model and tokenizer
|
||||
|
||||
self.tokenizer = AutoTokenizer.from_pretrained(BASE_DIR)
|
||||
self.tokenizer.pad_token = self.tokenizer.eos_token
|
||||
self.tokenizer.padding_side = "right"
|
||||
|
||||
self.base_model = AutoModelForCausalLM.from_pretrained(
|
||||
BASE_DIR,
|
||||
quantization_config=quant_config,
|
||||
device_map="auto"
|
||||
)
|
||||
|
||||
self.fine_tuned_model = PeftModel.from_pretrained(self.base_model, FINETUNED_DIR, revision=REVISION)
|
||||
|
||||
@modal.method()
|
||||
def price(self, description: str) -> float:
|
||||
import os
|
||||
import re
|
||||
import torch
|
||||
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, set_seed
|
||||
from peft import PeftModel
|
||||
|
||||
set_seed(42)
|
||||
prompt = f"{QUESTION}\n\n{description}\n\n{PREFIX}"
|
||||
inputs = self.tokenizer.encode(prompt, return_tensors="pt").to("cuda")
|
||||
attention_mask = torch.ones(inputs.shape, device="cuda")
|
||||
outputs = self.fine_tuned_model.generate(inputs, attention_mask=attention_mask, max_new_tokens=5, num_return_sequences=1)
|
||||
result = self.tokenizer.decode(outputs[0])
|
||||
|
||||
contents = result.split("Price is $")[1]
|
||||
contents = contents.replace(',','')
|
||||
match = re.search(r"[-+]?\d*\.\d+|\d+", contents)
|
||||
return float(match.group()) if match else 0
|
||||
|
||||
@modal.method()
|
||||
def wake_up(self) -> str:
|
||||
return "ok"
|
||||
|
||||
@@ -0,0 +1,75 @@
|
||||
import math
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
GREEN = "\033[92m"
|
||||
YELLOW = "\033[93m"
|
||||
RED = "\033[91m"
|
||||
RESET = "\033[0m"
|
||||
COLOR_MAP = {"red":RED, "orange": YELLOW, "green": GREEN}
|
||||
|
||||
class Tester:
|
||||
|
||||
def __init__(self, predictor, data, title=None, size=250):
|
||||
self.predictor = predictor
|
||||
self.data = data
|
||||
self.title = title or predictor.__name__.replace("_", " ").title()
|
||||
self.size = size
|
||||
self.guesses = []
|
||||
self.truths = []
|
||||
self.errors = []
|
||||
self.sles = []
|
||||
self.colors = []
|
||||
|
||||
def color_for(self, error, truth):
|
||||
if error<40 or error/truth < 0.2:
|
||||
return "green"
|
||||
elif error<80 or error/truth < 0.4:
|
||||
return "orange"
|
||||
else:
|
||||
return "red"
|
||||
|
||||
def run_datapoint(self, i):
|
||||
datapoint = self.data[i]
|
||||
guess = self.predictor(datapoint)
|
||||
truth = datapoint.price
|
||||
error = abs(guess - truth)
|
||||
log_error = math.log(truth+1) - math.log(guess+1)
|
||||
sle = log_error ** 2
|
||||
color = self.color_for(error, truth)
|
||||
title = datapoint.title if len(datapoint.title) <= 40 else datapoint.title[:40]+"..."
|
||||
self.guesses.append(guess)
|
||||
self.truths.append(truth)
|
||||
self.errors.append(error)
|
||||
self.sles.append(sle)
|
||||
self.colors.append(color)
|
||||
print(f"{COLOR_MAP[color]}{i+1}: Guess: ${guess:,.2f} Truth: ${truth:,.2f} Error: ${error:,.2f} SLE: {sle:,.2f} Item: {title}{RESET}")
|
||||
|
||||
def chart(self, title):
|
||||
max_error = max(self.errors)
|
||||
plt.figure(figsize=(12, 8))
|
||||
max_val = max(max(self.truths), max(self.guesses))
|
||||
plt.plot([0, max_val], [0, max_val], color='deepskyblue', lw=2, alpha=0.6)
|
||||
plt.scatter(self.truths, self.guesses, s=3, c=self.colors)
|
||||
plt.xlabel('Ground Truth')
|
||||
plt.ylabel('Model Estimate')
|
||||
plt.xlim(0, max_val)
|
||||
plt.ylim(0, max_val)
|
||||
plt.title(title)
|
||||
plt.show()
|
||||
|
||||
def report(self):
|
||||
average_error = sum(self.errors) / self.size
|
||||
rmsle = math.sqrt(sum(self.sles) / self.size)
|
||||
hits = sum(1 for color in self.colors if color=="green")
|
||||
title = f"{self.title} Error=${average_error:,.2f} RMSLE={rmsle:,.2f} Hits={hits/self.size*100:.1f}%"
|
||||
self.chart(title)
|
||||
|
||||
def run(self):
|
||||
self.error = 0
|
||||
for i in range(self.size):
|
||||
self.run_datapoint(i)
|
||||
self.report()
|
||||
|
||||
@classmethod
|
||||
def test(cls, function, data):
|
||||
cls(function, data).run()
|
||||
Reference in New Issue
Block a user