Week_6_Exercise
This commit is contained in:
621
week6/community-contributions/week_6_exercise_revised.py
Normal file
621
week6/community-contributions/week_6_exercise_revised.py
Normal file
@@ -0,0 +1,621 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""Week_6_exercise_revised.ipynb
|
||||||
|
|
||||||
|
Automatically generated by Colab.
|
||||||
|
|
||||||
|
Original file is located at
|
||||||
|
https://colab.research.google.com/drive/1GaV053HB8l-Wd3J3o9BcOAjC009Qk_W0
|
||||||
|
"""
|
||||||
|
|
||||||
|
#installations
|
||||||
|
!pip install --upgrade pip
|
||||||
|
!pip install datasets==3.0.1 anthropic transformers accelerate pandas tqdm numpy
|
||||||
|
|
||||||
|
#imports
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
import json
|
||||||
|
import random
|
||||||
|
import time
|
||||||
|
from typing import Optional, List, Dict, Any, Tuple
|
||||||
|
from sklearn.model_selection import train_test_split
|
||||||
|
import anthropic
|
||||||
|
from datasets import load_dataset
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
import numpy as np
|
||||||
|
import pandas as pd
|
||||||
|
from tqdm import tqdm
|
||||||
|
import seaborn as sns
|
||||||
|
|
||||||
|
#TEMPORARY: Hard-coded keys
|
||||||
|
|
||||||
|
#I hid my keys, you can replace your keys with 'sk' and 'hf'
|
||||||
|
os.environ["ANTHROPIC_API_KEY"] = "sk"
|
||||||
|
os.environ["HF_TOKEN"] = "hf"
|
||||||
|
|
||||||
|
|
||||||
|
# Anthropic Client
|
||||||
|
try:
|
||||||
|
client = anthropic.Anthropic(api_key=os.environ["ANTHROPIC_API_KEY"])
|
||||||
|
print("Anthropic client initialized")
|
||||||
|
except Exception as e:
|
||||||
|
raise ImportError("Please install anthropic: !pip install anthropic") from e
|
||||||
|
|
||||||
|
#some Basic configrations used throughtout the notebook
|
||||||
|
RANDOM_SEED = 42
|
||||||
|
# medium test size
|
||||||
|
TEST_SIZE = 50
|
||||||
|
CLAUDE_MODEL = "claude-opus-4-20250514"
|
||||||
|
MAX_TOKENS = 300
|
||||||
|
|
||||||
|
random.seed(RANDOM_SEED)
|
||||||
|
np.random.seed(RANDOM_SEED)
|
||||||
|
|
||||||
|
# Load my dataset, the Aplliances in my case
|
||||||
|
dataset = load_dataset("McAuley-Lab/Amazon-Reviews-2023", "raw_meta_Appliances", split="full")
|
||||||
|
#using Pandas to create a dataframe
|
||||||
|
df = dataset.to_pandas()
|
||||||
|
#see the data
|
||||||
|
df.head()
|
||||||
|
|
||||||
|
# Let clean the Price column and have it as a Price-clean
|
||||||
|
df["price_clean"] = pd.to_numeric(df["price"], errors="coerce")
|
||||||
|
|
||||||
|
#check the number of rows In the ddata
|
||||||
|
print("Dataset size:", len(df))
|
||||||
|
|
||||||
|
#check The featues in the data
|
||||||
|
print(df.columns.tolist())
|
||||||
|
|
||||||
|
#checking some info
|
||||||
|
print(df.info())
|
||||||
|
|
||||||
|
print("Price-related columns found:", [c for c in df.columns if "price" in c])
|
||||||
|
|
||||||
|
print("Missing price_clean:", df["price_clean"].isna().sum(), "rows")
|
||||||
|
|
||||||
|
# Price distribution visualization (Zoomed histogram)
|
||||||
|
plt.figure(figsize=(10,5))
|
||||||
|
df[df["price_clean"] < 200]["price_clean"].hist(bins=50)
|
||||||
|
plt.title("Price Distribution")
|
||||||
|
plt.xlabel("Price ($)")
|
||||||
|
plt.ylabel("Frequency")
|
||||||
|
plt.show()
|
||||||
|
|
||||||
|
# Keep only rows where price_clean is not null
|
||||||
|
df_model = df.dropna(subset=["price_clean"]).copy()
|
||||||
|
|
||||||
|
# come up with a ptompt text combined
|
||||||
|
def combine_text(row):
|
||||||
|
title = row["title"] or ""
|
||||||
|
features = " ".join(row["features"]) if isinstance(row["features"], list) else ""
|
||||||
|
description = " ".join(row["description"]) if isinstance(row["description"], list) else ""
|
||||||
|
return f"{title}\n\nFEATURES: {features}\n\nDESCRIPTION: {description}"
|
||||||
|
|
||||||
|
df_model["text"] = df_model.apply(combine_text, axis=1)
|
||||||
|
|
||||||
|
# Retain what's needed
|
||||||
|
df_model = df_model[["text", "price_clean"]].reset_index(drop=True)
|
||||||
|
|
||||||
|
# check the model dataset size
|
||||||
|
print(len(df_model))
|
||||||
|
df_model.head(5)
|
||||||
|
|
||||||
|
# Splitting the data into Training and test
|
||||||
|
train_df, test_df = train_test_split(
|
||||||
|
df_model,
|
||||||
|
test_size=0.10, # 10% test split
|
||||||
|
random_state=RANDOM_SEED
|
||||||
|
)
|
||||||
|
|
||||||
|
#Training
|
||||||
|
len(train_df)
|
||||||
|
|
||||||
|
#Testing
|
||||||
|
len(test_df)
|
||||||
|
|
||||||
|
# make the test a list for better samplng
|
||||||
|
test_records = test_df.to_dict(orient="records")
|
||||||
|
|
||||||
|
# Pricing system Prompt
|
||||||
|
|
||||||
|
def build_prompt(item_text: str) -> str:
|
||||||
|
return f"""
|
||||||
|
You are a pricing analyst. Given a marketplace product listing, estimate the item's correct fair market price in KES.
|
||||||
|
|
||||||
|
Return ONLY a number, no currency sign, no explanation.
|
||||||
|
|
||||||
|
Product details:
|
||||||
|
\"\"\"
|
||||||
|
{item_text}
|
||||||
|
\"\"\"
|
||||||
|
"""
|
||||||
|
|
||||||
|
def estimate_price_claude(item_text: str) -> Optional[float]:
|
||||||
|
try:
|
||||||
|
prompt = build_prompt(item_text)
|
||||||
|
|
||||||
|
response = client.messages.create(
|
||||||
|
model=CLAUDE_MODEL,
|
||||||
|
max_tokens=MAX_TOKENS,
|
||||||
|
messages=[
|
||||||
|
{"role": "user", "content": prompt}
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
raw_output = response.content[0].text.strip()
|
||||||
|
|
||||||
|
# Extract first valid number from model response
|
||||||
|
match = re.search(r"\d+(\.\d+)?", raw_output.replace(",", ""))
|
||||||
|
return float(match.group(0)) if match else None
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print("Error:", e)
|
||||||
|
return None
|
||||||
|
|
||||||
|
client = anthropic.Anthropic(api_key=os.environ["ANTHROPIC_API_KEY"])
|
||||||
|
|
||||||
|
# Filter and Sample 100 usable Rows
|
||||||
|
df_usable = df[df["price_clean"].notna()].copy()
|
||||||
|
sample_df = df_usable.sample(100, random_state=42).reset_index(drop=True)
|
||||||
|
|
||||||
|
#empty predriction list for them to be stored
|
||||||
|
predictions = []
|
||||||
|
|
||||||
|
#Getting the prices
|
||||||
|
def extract_price(text):
|
||||||
|
"""Extract the first valid float from Claude's reply."""
|
||||||
|
match = re.search(r"\d+(\.\d+)?", text.replace(",", ""))
|
||||||
|
return float(match.group(0)) if match else None
|
||||||
|
|
||||||
|
# Getting the predictions
|
||||||
|
for i, row in tqdm(sample_df.iterrows(), total=len(sample_df)):
|
||||||
|
title = row["title"]
|
||||||
|
desc = " ".join(row["description"]) if isinstance(row["description"], list) else str(row["description"])
|
||||||
|
feat = " ".join(row["features"]) if isinstance(row["features"], list) else str(row["features"])
|
||||||
|
cats = " ".join(row["categories"]) if isinstance(row["categories"], list) else str(row["categories"])
|
||||||
|
|
||||||
|
prompt = f"""
|
||||||
|
You are estimating the USD retail price of an appliance part.
|
||||||
|
|
||||||
|
Analyze the information and respond with **only a single number** (no currency symbol, no text, no explanation).
|
||||||
|
|
||||||
|
TITLE: {title}
|
||||||
|
DESCRIPTION: {desc}
|
||||||
|
FEATURES: {feat}
|
||||||
|
CATEGORIES: {cats}
|
||||||
|
|
||||||
|
Your response must be only a number like: 29.99
|
||||||
|
"""
|
||||||
|
|
||||||
|
response = client.messages.create(
|
||||||
|
model=CLAUDE_MODEL,
|
||||||
|
max_tokens=50,
|
||||||
|
messages=[{"role": "user", "content": prompt}]
|
||||||
|
)
|
||||||
|
|
||||||
|
raw = response.content[0].text.strip()
|
||||||
|
pred_price = extract_price(raw)
|
||||||
|
|
||||||
|
predictions.append({
|
||||||
|
"title": title,
|
||||||
|
"true_price": row["price_clean"],
|
||||||
|
"claude_price": pred_price,
|
||||||
|
"raw_reply": raw
|
||||||
|
})
|
||||||
|
|
||||||
|
# Saving output in a csv nw
|
||||||
|
result_df = pd.DataFrame(predictions)
|
||||||
|
result_df.to_csv("claude_price_predictions_100.csv", index=False)
|
||||||
|
|
||||||
|
# Show preview
|
||||||
|
display(result_df.head())
|
||||||
|
|
||||||
|
# Error metrics
|
||||||
|
valid = result_df[result_df["claude_price"].notna()]
|
||||||
|
mae = np.mean(np.abs(valid["true_price"] - valid["claude_price"]))
|
||||||
|
rmse = np.sqrt(np.mean((valid["true_price"] - valid["claude_price"])**2))
|
||||||
|
pct_within_20 = np.mean(np.abs(valid["true_price"] - valid["claude_price"]) <= 20) * 100
|
||||||
|
|
||||||
|
print(f"\nValid predictions: {len(valid)}/{len(result_df)}")
|
||||||
|
print(f"MAE: {mae:.2f}")
|
||||||
|
print(f"RMSE: {rmse:.2f}")
|
||||||
|
print(f"% within $20: {pct_within_20:.1f}%")
|
||||||
|
|
||||||
|
"""The model returned a price every single time:
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
1. -->MAE = 22.52 On average Claude is off by 22.52 from the true price
|
||||||
|
2. -->RMSE = 44.11 Big errors exist on some items — a sign of occasional wild guesses
|
||||||
|
2. -->RMSE = 44.11 Big errors exist on some items — a sign of occasional wild guesses
|
||||||
|
2. -->72% within $20 Claude predicts reasonable accuracy on most products, but 28% are far off.
|
||||||
|
|
||||||
|
;
|
||||||
|
|
||||||
|
1. Strengths- Model is somehow decent with zero/low fine-tuning. It understood the task, 72% within $20 on a dataset it’s never seen is a good baseline
|
||||||
|
1. Weaknesses- Too many rounded “classic” retail numbers (24.99, 89.99, 14.99, 29.99). Seems not to deeply use features, category, or rating. Also the RMSE is high → meaning a few really bad errors are dragging performance
|
||||||
|
|
||||||
|
Improvements
|
||||||
|
|
||||||
|
1. Prompt enhancements
|
||||||
|
2. Multi-shot and also better structuring
|
||||||
|
3. Fine-tuning with local model
|
||||||
|
"""
|
||||||
|
|
||||||
|
#Now we build a persona Prompt
|
||||||
|
def build_pricing_prompt(examples: list, new_title: str) -> str:
|
||||||
|
"""
|
||||||
|
Build a multi-shot prompt for the E-commerce Market Analyst persona.
|
||||||
|
Each example has (title, price).
|
||||||
|
"""
|
||||||
|
few_shots = "\n".join(
|
||||||
|
[f"Product: {t}\nEstimated fair market price: ${p:.2f}" for t, p in examples]
|
||||||
|
)
|
||||||
|
|
||||||
|
system_prompt = (
|
||||||
|
"You are a meticulous Data-Driven Market Analyst who estimates realistic, data-based "
|
||||||
|
"product prices for online marketplaces. You base estimates on comparable items and "
|
||||||
|
"avoid outliers. Return only the price number."
|
||||||
|
)
|
||||||
|
|
||||||
|
user_prompt = (
|
||||||
|
f"{system_prompt}\n\nHere are recent examples:\n{few_shots}\n\n"
|
||||||
|
f"Now estimate a fair market price for this product:\n"
|
||||||
|
f"Product: {new_title}\n\n"
|
||||||
|
"Respond with only a number, no text or symbols."
|
||||||
|
)
|
||||||
|
return user_prompt
|
||||||
|
|
||||||
|
#10-shot predictios
|
||||||
|
subset_10 = df.dropna(subset=["price_clean"]).sample(10, random_state=42).reset_index(drop=True)
|
||||||
|
few_shots_3 = subset_10.sample(3, random_state=42)[["title", "price_clean"]].values.tolist()
|
||||||
|
results_10 = []
|
||||||
|
|
||||||
|
for i, row in tqdm(subset_10.iterrows(), total=len(subset_10)):
|
||||||
|
prompt = build_pricing_prompt(few_shots_3, row["title"])
|
||||||
|
try:
|
||||||
|
resp = client.messages.create(
|
||||||
|
model=CLAUDE_MODEL,
|
||||||
|
max_tokens=MAX_TOKENS,
|
||||||
|
messages=[{"role": "user", "content": prompt}],
|
||||||
|
)
|
||||||
|
reply = resp.content[0].text.strip()
|
||||||
|
pred = float(reply.replace("$", "").strip())
|
||||||
|
except Exception:
|
||||||
|
pred, reply = np.nan, None
|
||||||
|
results_10.append({"title": row["title"], "true_price": row["price_clean"], "pred_price": pred, "raw": reply})
|
||||||
|
|
||||||
|
df10 = pd.DataFrame(results_10).dropna(subset=["pred_price"])
|
||||||
|
|
||||||
|
mae10 = np.mean(np.abs(df10.pred_price - df10.true_price))
|
||||||
|
|
||||||
|
rmse10 = np.sqrt(np.mean((df10.pred_price - df10.true_price)**2))
|
||||||
|
|
||||||
|
pct20_10 = np.mean(np.abs(df10.pred_price - df10.true_price) <= 20) * 100
|
||||||
|
|
||||||
|
print(f"MAE={mae10:.2f}, RMSE={rmse10:.2f}, %within$20={pct20_10:.1f}%")
|
||||||
|
df10.head()
|
||||||
|
|
||||||
|
#30 shot
|
||||||
|
subset_30 = df.dropna(subset=["price_clean"]).sample(30, random_state=42).reset_index(drop=True)
|
||||||
|
few_shots_5 = subset_30.sample(5, random_state=42)[["title", "price_clean"]].values.tolist()
|
||||||
|
results_30 = []
|
||||||
|
|
||||||
|
for i, row in tqdm(subset_30.iterrows(), total=len(subset_30)):
|
||||||
|
prompt = build_pricing_prompt(few_shots_5, row["title"])
|
||||||
|
try:
|
||||||
|
resp = client.messages.create(
|
||||||
|
model=CLAUDE_MODEL,
|
||||||
|
max_tokens=MAX_TOKENS,
|
||||||
|
messages=[{"role": "user", "content": prompt}],
|
||||||
|
)
|
||||||
|
reply = resp.content[0].text.strip()
|
||||||
|
pred = float(reply.replace("$", "").strip())
|
||||||
|
except Exception:
|
||||||
|
pred, reply = np.nan, None
|
||||||
|
results_30.append({"title": row["title"], "true_price": row["price_clean"], "pred_price": pred, "raw": reply})
|
||||||
|
|
||||||
|
df30 = pd.DataFrame(results_30).dropna(subset=["pred_price"])
|
||||||
|
|
||||||
|
mae30 = np.mean(np.abs(df30.pred_price - df30.true_price))
|
||||||
|
|
||||||
|
rmse30 = np.sqrt(np.mean((df30.pred_price - df30.true_price)**2))
|
||||||
|
|
||||||
|
pct20_30 = np.mean(np.abs(df30.pred_price - df30.true_price) <= 20) * 100
|
||||||
|
|
||||||
|
print(f"MAE={mae30:.2f}, RMSE={rmse30:.2f}, %within$20={pct20_30:.1f}%")
|
||||||
|
df30.head()
|
||||||
|
|
||||||
|
#50 Shot s
|
||||||
|
subset_50 = df.dropna(subset=["price_clean"]).sample(50, random_state=42).reset_index(drop=True)
|
||||||
|
few_shots_8 = subset_50.sample(8, random_state=42)[["title", "price_clean"]].values.tolist()
|
||||||
|
results_50 = []
|
||||||
|
|
||||||
|
for i, row in tqdm(subset_50.iterrows(), total=len(subset_50)):
|
||||||
|
prompt = build_pricing_prompt(few_shots_8, row["title"])
|
||||||
|
try:
|
||||||
|
resp = client.messages.create(
|
||||||
|
model=CLAUDE_MODEL,
|
||||||
|
max_tokens=MAX_TOKENS,
|
||||||
|
messages=[{"role": "user", "content": prompt}],
|
||||||
|
)
|
||||||
|
reply = resp.content[0].text.strip()
|
||||||
|
pred = float(reply.replace("$", "").strip())
|
||||||
|
except Exception:
|
||||||
|
pred, reply = np.nan, None
|
||||||
|
results_50.append({"title": row["title"], "true_price": row["price_clean"], "pred_price": pred, "raw": reply})
|
||||||
|
|
||||||
|
df50 = pd.DataFrame(results_50).dropna(subset=["pred_price"])
|
||||||
|
|
||||||
|
mae50 = np.mean(np.abs(df50.pred_price - df50.true_price))
|
||||||
|
|
||||||
|
rmse50 = np.sqrt(np.mean((df50.pred_price - df50.true_price)**2))
|
||||||
|
|
||||||
|
pct20_50 = np.mean(np.abs(df50.pred_price - df50.true_price) <= 20) * 100
|
||||||
|
|
||||||
|
print(f"MAE={mae50:.2f}, RMSE={rmse50:.2f}, %within$20={pct20_50:.1f}%")
|
||||||
|
df50.head()
|
||||||
|
|
||||||
|
#Improved Ptompt and comparin the 10,30, &50 shot hints
|
||||||
|
def build_strict_prompt(few_shots, test_title):
|
||||||
|
shots_text = "\n".join([f"Title: {t}\nPrice: ${p:.2f}" for t, p in few_shots])
|
||||||
|
return f"""
|
||||||
|
You are an expert e-commerce product pricing analyst. Your job is to predict the most realistic market price for a product based purely on its title.
|
||||||
|
|
||||||
|
Here are reference examples:
|
||||||
|
{shots_text}
|
||||||
|
|
||||||
|
Now predict the price for:
|
||||||
|
Title: {test_title}
|
||||||
|
|
||||||
|
RULES:
|
||||||
|
- Return ONLY a single number.
|
||||||
|
- No dollar sign.
|
||||||
|
- No text, no reasoning, no words.
|
||||||
|
- Format: 123.45
|
||||||
|
"""
|
||||||
|
|
||||||
|
def run_eval(name, subset, shot_count):
|
||||||
|
few = subset.sample(shot_count, random_state=42)[["title", "price_clean"]].values.tolist()
|
||||||
|
results = []
|
||||||
|
|
||||||
|
for _, row in tqdm(subset.iterrows(), total=len(subset), desc=f"{name}"):
|
||||||
|
prompt = build_strict_prompt(few, row["title"])
|
||||||
|
try:
|
||||||
|
resp = client.messages.create(
|
||||||
|
model=CLAUDE_MODEL,
|
||||||
|
max_tokens=MAX_TOKENS,
|
||||||
|
messages=[{"role": "user", "content": prompt}],
|
||||||
|
)
|
||||||
|
reply = resp.content[0].text.strip()
|
||||||
|
pred = float(reply)
|
||||||
|
except Exception:
|
||||||
|
pred, reply = np.nan, None
|
||||||
|
|
||||||
|
results.append({"title": row["title"], "true": row["price_clean"], "pred": pred})
|
||||||
|
|
||||||
|
df = pd.DataFrame(results).dropna(subset=["pred"])
|
||||||
|
mae = np.mean(np.abs(df.pred - df.true))
|
||||||
|
rmse = np.sqrt(np.mean((df.pred - df.true)**2))
|
||||||
|
pct20 = np.mean(np.abs(df.pred - df.true) <= 20) * 100
|
||||||
|
return df, mae, rmse, pct20
|
||||||
|
|
||||||
|
# Run 10 / 30 / 50
|
||||||
|
subset10 = df.dropna(subset=["price_clean"]).sample(10, random_state=1).reset_index(drop=True)
|
||||||
|
subset30 = df.dropna(subset=["price_clean"]).sample(30, random_state=2).reset_index(drop=True)
|
||||||
|
subset50 = df.dropna(subset=["price_clean"]).sample(50, random_state=3).reset_index(drop=True)
|
||||||
|
|
||||||
|
df10, mae10, rmse10, pct10 = run_eval("RUN10", subset10, 3)
|
||||||
|
df30, mae30, rmse30, pct30 = run_eval("RUN30", subset30, 6)
|
||||||
|
df50, mae50, rmse50, pct50 = run_eval("RUN50", subset50, 8)
|
||||||
|
|
||||||
|
#compare
|
||||||
|
comparison = pd.DataFrame([
|
||||||
|
{"shots": 10, "MAE": mae10, "RMSE": rmse10, "%≤$20": pct10},
|
||||||
|
{"shots": 30, "MAE": mae30, "RMSE": rmse30, "%≤$20": pct30},
|
||||||
|
{"shots": 50, "MAE": mae50, "RMSE": rmse50, "%≤$20": pct50},
|
||||||
|
])
|
||||||
|
|
||||||
|
print(comparison)
|
||||||
|
comparison
|
||||||
|
|
||||||
|
"""The model becomes confused by too many examples, became more biased toward random values and less less stable and less accurate.
|
||||||
|
Hypothesis: Possibly the dataset has high variance (many unrelated categories), and the model benefits from small, clean, representative few-shots, not large few-shots.
|
||||||
|
"""
|
||||||
|
|
||||||
|
#Rechecking the variance in the data
|
||||||
|
prices = df["price_clean"].dropna()
|
||||||
|
print(prices.describe(percentiles=[0.25, 0.5, 0.75, 0.9, 0.95]))
|
||||||
|
|
||||||
|
print("\nSkewness:", prices.skew())
|
||||||
|
print("Kurtosis:", prices.kurt())
|
||||||
|
|
||||||
|
# Plot histogram
|
||||||
|
plt.figure(figsize=(12,4))
|
||||||
|
sns.histplot(prices, bins=50)
|
||||||
|
plt.title("Histogram — Full Dataset Price Distribution")
|
||||||
|
plt.xlabel("Price ($)")
|
||||||
|
plt.ylabel("Frequency")
|
||||||
|
plt.show()
|
||||||
|
|
||||||
|
# Plot boxplot
|
||||||
|
plt.figure(figsize=(10,2))
|
||||||
|
sns.boxplot(x=prices)
|
||||||
|
plt.title("Boxplot — Full Dataset Price Spread")
|
||||||
|
plt.show()
|
||||||
|
|
||||||
|
"""Testing fewer shots to check fr the optimal"""
|
||||||
|
|
||||||
|
def run_few_shot_test(df_subset, shots, model=CLAUDE_MODEL):
|
||||||
|
few_shots = df_subset.sample(shots, random_state=42)[["title", "price_clean"]].values.tolist()
|
||||||
|
results = []
|
||||||
|
|
||||||
|
for _, row in df_subset.iterrows():
|
||||||
|
prompt = build_pricing_prompt(few_shots, row["title"])
|
||||||
|
try:
|
||||||
|
resp = client.messages.create(
|
||||||
|
model=model,
|
||||||
|
max_tokens=MAX_TOKENS,
|
||||||
|
messages=[{"role": "user", "content": prompt}],
|
||||||
|
)
|
||||||
|
reply = resp.content[0].text.strip()
|
||||||
|
pred = float(reply.replace("$", "").strip())
|
||||||
|
except:
|
||||||
|
pred, reply = np.nan, None
|
||||||
|
|
||||||
|
results.append({"title": row["title"], "true": row["price_clean"], "pred": pred})
|
||||||
|
|
||||||
|
df_res = pd.DataFrame(results).dropna()
|
||||||
|
mae = np.mean(np.abs(df_res.pred - df_res.true))
|
||||||
|
rmse = np.sqrt(np.mean((df_res.pred - df_res.true)**2))
|
||||||
|
pct20 = np.mean(np.abs(df_res.pred - df_res.true) <= 20) * 100
|
||||||
|
return df_res, mae, rmse, pct20
|
||||||
|
|
||||||
|
#Tabulate the 2 shot results
|
||||||
|
df2, mae2, rmse2, pct2 = run_few_shot_test(subset_50, shots=2)
|
||||||
|
print("2-SHOT RESULTS → MAE={:.2f}, RMSE={:.2f}, %≤$20={:.1f}%".format(mae2, rmse2, pct2))
|
||||||
|
df2.head()
|
||||||
|
|
||||||
|
#5 shot results
|
||||||
|
df5, mae5, rmse5, pct5 = run_few_shot_test(subset_50, shots=5)
|
||||||
|
print("5-SHOT RESULTS → MAE={:.2f}, RMSE={:.2f}, %≤$20={:.1f}%".format(mae5, rmse5, pct5))
|
||||||
|
df5.head()
|
||||||
|
|
||||||
|
#7 shot results
|
||||||
|
df7, mae7, rmse7, pct7 = run_few_shot_test(subset_50, shots=7)
|
||||||
|
print("7-SHOT RESULTS → MAE={:.2f}, RMSE={:.2f}, %≤$20={:.1f}%".format(mae7, rmse7, pct7))
|
||||||
|
df7.head()
|
||||||
|
|
||||||
|
#Tabulate all the shots to choose the optimal or if there is Any need for the shots
|
||||||
|
|
||||||
|
results_summary = [
|
||||||
|
{"shots": 0, "MAE": 22.52, "RMSE": 44.11, "%≤$20": 72.0}, # baseline
|
||||||
|
{"shots": 2, "MAE": mae2, "RMSE": rmse2, "%≤$20": pct2},
|
||||||
|
{"shots": 5, "MAE": mae5, "RMSE": rmse5, "%≤$20": pct5},
|
||||||
|
{"shots": 7, "MAE": mae7, "RMSE": rmse7, "%≤$20": pct7},
|
||||||
|
{"shots": 10, "MAE": 16.27, "RMSE": 38.59, "%≤$20": 90.0},
|
||||||
|
{"shots": 30, "MAE": 135.73, "RMSE": 606.78, "%≤$20": 70.0},
|
||||||
|
{"shots": 50, "MAE": 42.54, "RMSE": 136.61, "%≤$20": 72.0},
|
||||||
|
]
|
||||||
|
|
||||||
|
df_comparison = pd.DataFrame(results_summary)
|
||||||
|
df_comparison = df_comparison.sort_values("shots").reset_index(drop=True)
|
||||||
|
df_comparison
|
||||||
|
|
||||||
|
"""1. 0-shot baseline: MAE 22.52, %≤$20 72%
|
||||||
|
|
||||||
|
2. Very low few-shots (2, 5): Surprisingly worse than baseline (MAE ↑, %≤$20 ↓), likely due to variance and poor example selection.
|
||||||
|
|
||||||
|
3. 7-shot: Improves over baseline slightly, MAE 19.91, %≤$20 back to 72%
|
||||||
|
|
||||||
|
4. 10-shot: Best performance overall — MAE 16.27, %≤$20 jumps to 90%! Clearly the few-shot hints are helping here.
|
||||||
|
|
||||||
|
5. 30-shot: Performance collapses (MAE 135.73, RMSE 606.78) — too many examples may confuse the model.
|
||||||
|
|
||||||
|
6. 50-shot: Slightly better than 30-shot but still worse than 10-shot.
|
||||||
|
|
||||||
|
|
||||||
|
Conclusion: Optimal few-shot count is 10 for this dataset and prompt style.
|
||||||
|
"""
|
||||||
|
|
||||||
|
#Further finetuning of the selected 10-shot
|
||||||
|
|
||||||
|
def build_finetune_prompt(few_shots: list, target_title: str, max_chars=800):
|
||||||
|
"""
|
||||||
|
few_shots: list of dicts {"title":..., "price_clean":...}
|
||||||
|
target_title: title string
|
||||||
|
"""
|
||||||
|
parts = ["You are an e-commerce pricing expert. Estimate product prices in USD accurately. Output only a number."]
|
||||||
|
parts.append("\nExamples:")
|
||||||
|
for ex in few_shots:
|
||||||
|
parts.append(f"- {ex['title']}: {ex['price_clean']}")
|
||||||
|
parts.append("\nPredict price for the following product:")
|
||||||
|
parts.append(f"Title: {target_title}")
|
||||||
|
prompt = "\n".join(parts)
|
||||||
|
if len(prompt) > max_chars:
|
||||||
|
return prompt[:max_chars] + "..."
|
||||||
|
return prompt
|
||||||
|
|
||||||
|
# Sample 10-shot prompts for fine-tuning
|
||||||
|
finetune_examples = []
|
||||||
|
subset_10 = df.dropna(subset=["price_clean"]).sample(100, random_state=42).reset_index(drop=True) # 100 products for initial fine-tuning
|
||||||
|
|
||||||
|
for idx, row in subset_10.iterrows():
|
||||||
|
# Pick 10 random examples from subset for few-shot
|
||||||
|
few_shots = subset_10.drop(idx).sample(10, random_state=idx)[["title","price_clean"]].to_dict(orient="records")
|
||||||
|
prompt = build_finetune_prompt(few_shots, row["title"])
|
||||||
|
finetune_examples.append({
|
||||||
|
"prompt": prompt,
|
||||||
|
"completion": str(row["price_clean"])
|
||||||
|
})
|
||||||
|
|
||||||
|
print("Sample fine-tuning example:")
|
||||||
|
print(finetune_examples[0])
|
||||||
|
|
||||||
|
with open("finetune_10shot.jsonl", "w") as f:
|
||||||
|
for ex in finetune_examples:
|
||||||
|
f.write(json.dumps(ex) + "\n")
|
||||||
|
print("(10-shot format).finetuned")
|
||||||
|
|
||||||
|
# Evaluate enhanced 10-shot prompt on sample
|
||||||
|
results_finetune_test = []
|
||||||
|
|
||||||
|
for idx, row in subset_10.iterrows():
|
||||||
|
few_shots = subset_10.drop(idx).sample(10, random_state=idx)[["title","price_clean"]].to_dict(orient="records")
|
||||||
|
prompt = build_finetune_prompt(few_shots, row["title"])
|
||||||
|
try:
|
||||||
|
resp = client.messages.create(
|
||||||
|
model=CLAUDE_MODEL,
|
||||||
|
max_tokens=MAX_TOKENS,
|
||||||
|
messages=[{"role": "user", "content": prompt}]
|
||||||
|
)
|
||||||
|
reply = resp.content[0].text.strip()
|
||||||
|
pred = float(reply.replace("$","").strip())
|
||||||
|
except Exception:
|
||||||
|
pred, reply = np.nan, None
|
||||||
|
results_finetune_test.append({"title": row["title"], "true_price": row["price_clean"], "pred": pred, "raw": reply})
|
||||||
|
|
||||||
|
df_finetune_test = pd.DataFrame(results_finetune_test).dropna(subset=["pred"])
|
||||||
|
mae_ft = np.mean(np.abs(df_finetune_test.pred - df_finetune_test.true_price))
|
||||||
|
rmse_ft = np.sqrt(np.mean((df_finetune_test.pred - df_finetune_test.true_price)**2))
|
||||||
|
pct20_ft = np.mean(np.abs(df_finetune_test.pred - df_finetune_test.true_price) <= 20) * 100
|
||||||
|
|
||||||
|
print(f"Finetuned 10-shot performance: MAE={mae_ft:.2f}, RMSE={rmse_ft:.2f}, %≤$20={pct20_ft:.1f}%")
|
||||||
|
|
||||||
|
"""Multi-shot prompting (10 examples in the prompt) without fine-tuning performed much better.
|
||||||
|
|
||||||
|
|
||||||
|
Next trial: Prompt optimization
|
||||||
|
"""
|
||||||
|
|
||||||
|
#prompt optimization seems like th eonly choice
|
||||||
|
def build_pricing_prompt_alt(few_shots: list, target_title: str) -> str:
|
||||||
|
"""
|
||||||
|
Build an alternative multi-shot pricing prompt for Claude.
|
||||||
|
|
||||||
|
few_shots: list of dicts with keys 'title' and 'price_clean'
|
||||||
|
target_title: product title to predict the price for
|
||||||
|
"""
|
||||||
|
parts = []
|
||||||
|
|
||||||
|
# Instruction with a slightly different phrasing
|
||||||
|
parts.append("Act as an expert e-commerce pricing analyst.")
|
||||||
|
parts.append("Given product titles and their prices, predict the price in USD for the new product.")
|
||||||
|
parts.append("Only provide the numeric price. No extra text, explanations, or symbols.")
|
||||||
|
|
||||||
|
# Format the examples differently: numbered list
|
||||||
|
parts.append("\nExample prices:")
|
||||||
|
for i, ex in enumerate(few_shots, start=1):
|
||||||
|
parts.append(f"{i}. {ex['title']} — ${ex['price_clean']:.2f}")
|
||||||
|
|
||||||
|
# Target product
|
||||||
|
parts.append("\nPredict the price for this product:")
|
||||||
|
parts.append(f"Title: {target_title}")
|
||||||
|
parts.append("Price (USD):")
|
||||||
|
|
||||||
|
# Combine into single prompt
|
||||||
|
prompt = "\n".join(parts)
|
||||||
|
return prompt
|
||||||
|
|
||||||
|
"""eda"""
|
||||||
Reference in New Issue
Block a user