bugs and fixes

This commit is contained in:
Umar Javed
2025-10-27 17:08:49 +05:00
parent a8c8889441
commit 65fe53836f
8 changed files with 280 additions and 4576 deletions

View File

@@ -1,30 +1,16 @@
#!/usr/bin/env python3
"""
Week 6 Day 5 - Simple Fine-Tuning Script
Basic fine-tuning approach for OpenAI gpt-4.1-2025-04-14 model
Key Features:
- Simple data loading and processing
- Token management to stay under 800k tokens
- Basic evaluation metrics
- Training monitoring
Usage:
python w6d5.py
Requirements:
- OPENAI_API_KEY environment variable
- OpenAI API access with fine-tuning permissions
"""
import os
import json
import random
import math
import re
import pickle
from typing import List, Dict, Any, Optional
from datetime import datetime
from dotenv import load_dotenv
from openai import OpenAI
from huggingface_hub import login
from datasets import load_dataset
import matplotlib.pyplot as plt
import numpy as np
from collections import Counter
@@ -32,160 +18,198 @@ import sys
import warnings
warnings.filterwarnings('ignore')
# Load environment variables
sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..'))
load_dotenv()
# Initialize OpenAI client
os.environ['HF_TOKEN'] = os.getenv('HF_TOKEN', 'your-key-if-not-using-env')
hf_token = os.environ['HF_TOKEN']
if hf_token and hf_token != 'your-key-if-not-using-env':
login(hf_token, add_to_git_credential=True)
print("Logged in to Hugging Face")
client = OpenAI(api_key=os.getenv('OPENAI_API_KEY'))
class SimpleFineTuner:
"""
Simple fine-tuning class for OpenAI gpt-4.1-2025-04-14 model
This class implements basic fine-tuning with:
1. Simple data loading and processing
2. Token management under 800k tokens
3. Basic evaluation metrics
4. Training monitoring
"""
from items import Item
from testing import Tester
print("Successfully imported Item and Tester classes")
class PricePredictionFineTuner:
def __init__(self, api_key: str):
"""Initialize the fine-tuner with OpenAI API key"""
self.client = OpenAI(api_key=api_key)
self.train_data = []
self.test_data = []
self.validation_data = []
self.fine_tuned_model = None
self.results = {}
self.train = []
self.test = []
self.fine_tune_train = []
self.fine_tune_validation = []
self.fine_tuned_model_name = None
self.wandb_integration = {"type": "wandb", "wandb": {"project": "gpt-pricer"}}
def create_sample_data(self, num_items: int = 100) -> None:
"""
Create sample training data for fine-tuning
def load_amazon_data(self, category: str = "Appliances") -> None:
print(f"Loading Amazon Reviews 2023 dataset - {category} category...")
Args:
num_items: Number of sample items to create
"""
print(f"Creating sample dataset with {num_items} items...")
train_pkl = os.path.join('..', '..', 'train.pkl')
test_pkl = os.path.join('..', '..', 'test.pkl')
# Sample product categories
categories = [
"Electronics", "Clothing", "Books", "Home & Garden",
"Sports", "Beauty", "Automotive", "Toys"
]
# Sample brands
brands = [
"TechCorp", "StyleCo", "BookWorld", "GardenPro",
"SportMax", "BeautyPlus", "AutoTech", "ToyLand"
]
all_items = []
for i in range(num_items):
# Generate sample product data
category = random.choice(categories)
brand = random.choice(brands)
price = round(random.uniform(10, 1000), 2)
if os.path.exists(train_pkl) and os.path.exists(test_pkl):
print("Found existing pickle files, loading...")
with open(train_pkl, 'rb') as file:
self.train = pickle.load(file)
# Create training example
item = {
"messages": [
{
"role": "system",
"content": "You are a helpful assistant that provides product information."
},
{
"role": "user",
"content": f"Tell me about {brand} products in {category} category"
},
{
"role": "assistant",
"content": f"{brand} offers high-quality {category.lower()} products. "
f"Our {category.lower()} items range from ${price-50:.2f} to ${price+50:.2f}. "
f"We focus on quality and customer satisfaction in the {category} market."
}
]
}
all_items.append(item)
with open(test_pkl, 'rb') as file:
self.test = pickle.load(file)
print(f"Loaded {len(self.train)} training items and {len(self.test)} test items from pickle files")
else:
print("Pickle files not found. Loading from Hugging Face...")
self._load_from_huggingface(category)
# Split data
random.shuffle(all_items)
train_size = int(0.8 * len(all_items))
val_size = int(0.1 * len(all_items))
self.fine_tune_train = self.train[:200]
self.fine_tune_validation = self.train[200:250]
self.train_data = all_items[:train_size]
self.validation_data = all_items[train_size:train_size + val_size]
self.test_data = all_items[train_size + val_size:]
print(f"Fine-tuning split: {len(self.fine_tune_train)} train, {len(self.fine_tune_validation)} validation")
def _load_from_huggingface(self, category: str) -> None:
try:
print(f"Downloading {category} dataset from Hugging Face...")
dataset = load_dataset("McAuley-Lab/Amazon-Reviews-2023", f"raw_meta_{category}", split="full", trust_remote_code=True)
print(f"Number of {category}: {len(dataset):,}")
print("Processing items with prices...")
items = []
processed = 0
for datapoint in dataset:
try:
price = float(datapoint["price"])
if price > 0 and price <= 999:
item = Item(datapoint, price)
if item.include:
items.append(item)
processed += 1
if processed % 1000 == 0:
print(f"Processed {processed:,} items, found {len(items):,} valid items")
if len(items) >= 1000:
print(f"Collected {len(items)} items, stopping for efficiency")
break
except (ValueError, TypeError):
continue
print(f"Created {len(items):,} valid Item objects")
if len(items) < 250:
raise ValueError(f"Not enough valid items found: {len(items)}. Need at least 250.")
random.shuffle(items)
split_point = int(0.8 * len(items))
self.train = items[:split_point]
self.test = items[split_point:]
print(f"Split into {len(self.train)} training and {len(self.test)} test items")
print("Saving to pickle files for future use...")
with open(os.path.join('..', '..', 'train.pkl'), 'wb') as f:
pickle.dump(self.train, f)
with open(os.path.join('..', '..', 'test.pkl'), 'wb') as f:
pickle.dump(self.test, f)
print("Saved pickle files")
except Exception as e:
print(f"Error loading from Hugging Face: {e}")
print("This might be due to:")
print("1. Missing HF_TOKEN environment variable")
print("2. Need to accept Meta's terms for the tokenizer")
print("3. Network connectivity issues")
raise
def messages_for(self, item: Item) -> List[Dict[str, str]]:
system_message = "You estimate prices of items. Reply only with the price, no explanation"
user_prompt = item.test_prompt().replace(" to the nearest dollar", "").replace("\n\nPrice is $", "")
print(f"Created {len(all_items)} sample items: {len(self.train_data)} train, "
f"{len(self.validation_data)} validation, {len(self.test_data)} test")
return [
{"role": "system", "content": system_message},
{"role": "user", "content": user_prompt},
{"role": "assistant", "content": f"Price is ${item.price:.2f}"}
]
def messages_for_testing(self, item: Item) -> List[Dict[str, str]]:
system_message = "You estimate prices of items. Reply only with the price, no explanation"
user_prompt = item.test_prompt().replace(" to the nearest dollar", "").replace("\n\nPrice is $", "")
return [
{"role": "system", "content": system_message},
{"role": "user", "content": user_prompt},
{"role": "assistant", "content": "Price is $"}
]
def make_jsonl(self, items: List[Item]) -> str:
result = ""
for item in items:
messages = self.messages_for(item)
messages_str = json.dumps(messages)
result += '{"messages": ' + messages_str + '}\n'
return result.strip()
def write_jsonl(self, items: List[Item], filename: str) -> None:
with open(filename, "w") as f:
jsonl = self.make_jsonl(items)
f.write(jsonl)
def save_training_files(self) -> tuple:
"""
Save training and validation data to JSONL files
print("Creating JSONL files...")
Returns:
tuple: (train_file_id, validation_file_id)
"""
# Save training data
with open('train_data.jsonl', 'w') as f:
for item in self.train_data:
f.write(json.dumps(item) + '\n')
self.write_jsonl(self.fine_tune_train, "fine_tune_train.jsonl")
self.write_jsonl(self.fine_tune_validation, "fine_tune_validation.jsonl")
# Save validation data
with open('validation_data.jsonl', 'w') as f:
for item in self.validation_data:
f.write(json.dumps(item) + '\n')
print("Uploading files to OpenAI...")
# Upload files to OpenAI
train_file = self.client.files.create(
file=open('train_data.jsonl', 'rb'),
purpose='fine-tune'
)
with open("fine_tune_train.jsonl", "rb") as f:
train_file = self.client.files.create(file=f, purpose="fine-tune")
validation_file = self.client.files.create(
file=open('validation_data.jsonl', 'rb'),
purpose='fine-tune'
)
with open("fine_tune_validation.jsonl", "rb") as f:
validation_file = self.client.files.create(file=f, purpose="fine-tune")
print(f"Files uploaded: {train_file.id}, {validation_file.id}")
return train_file.id, validation_file.id
def start_fine_tuning(self, train_file_id: str, validation_file_id: str) -> str:
"""
Start the fine-tuning job
print("Starting fine-tuning job with Weights and Biases integration...")
Args:
train_file_id: Training file ID
validation_file_id: Validation file ID
wandb_key = os.getenv('WANDB_API_KEY')
integrations = []
if wandb_key:
integrations = [self.wandb_integration]
print("Weights and Biases integration enabled")
else:
print("WANDB_API_KEY not found - proceeding without W&B integration")
try:
job = self.client.fine_tuning.jobs.create(
training_file=train_file_id,
validation_file=validation_file_id,
model="gpt-4o-mini-2024-07-18",
seed=42,
hyperparameters={"n_epochs": 1},
integrations=integrations,
suffix="pricer"
)
Returns:
str: Fine-tuning job ID
"""
print("Starting fine-tuning job...")
job = self.client.fine_tuning.jobs.create(
training_file=train_file_id,
validation_file=validation_file_id,
model="gpt-4.1-2025-04-14",
hyperparameters={
"n_epochs": 3
}
)
print(f"Fine-tuning job started: {job.id}")
return job.id
print(f"Fine-tuning job started: {job.id}")
return job.id
except Exception as e:
print(f"Failed to start fine-tuning job: {e}")
raise
def monitor_training(self, job_id: str) -> Optional[str]:
"""
Monitor the fine-tuning job until completion
Args:
job_id: Fine-tuning job ID
Returns:
Optional[str]: Model name if successful, None if failed
"""
while True:
job = self.client.fine_tuning.jobs.retrieve(job_id)
status = job.status
@@ -201,116 +225,98 @@ class SimpleFineTuner:
return None
elif status in ["running", "validating_files", "queued"]:
print(f"Training in progress... ({status})")
# Wait before checking again
import time
time.sleep(30)
continue
else:
print(f"Unknown status: {status}")
# Wait before checking again
import time
time.sleep(30)
continue
def evaluate_model(self, model_name: str) -> Dict[str, float]:
"""
Evaluate the fine-tuned model
Args:
model_name: Name of the fine-tuned model
Returns:
Dict[str, float]: Evaluation metrics
"""
print("Evaluating fine-tuned model...")
correct_predictions = 0
total_predictions = len(self.test_data)
for item in self.test_data:
try:
user_message = item["messages"][1]["content"]
expected_response = item["messages"][2]["content"]
response = self.client.chat.completions.create(
model=model_name,
messages=[
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": user_message}
],
max_tokens=100
)
predicted_response = response.choices[0].message.content
# Simple evaluation - check if response contains key terms
if any(word in predicted_response.lower() for word in expected_response.lower().split()[:5]):
correct_predictions += 1
except Exception as e:
print(f"Prediction error: {e}")
continue
accuracy = correct_predictions / total_predictions if total_predictions > 0 else 0
results = {
"accuracy": accuracy,
"correct_predictions": correct_predictions,
"total_predictions": total_predictions
}
return results
def get_price(self, s: str) -> float:
s = s.replace('$', '').replace(',', '')
match = re.search(r"[-+]?\d*\.\d+|\d+", s)
return float(match.group()) if match else 0
def run_simple_evaluation(self) -> Dict[str, Any]:
"""
Run a simple evaluation without fine-tuning
def gpt_fine_tuned(self, item: Item) -> float:
if not self.fine_tuned_model_name:
raise ValueError("No fine-tuned model available")
Returns:
Dict[str, Any]: Evaluation results
"""
print("Running simple evaluation...")
try:
response = self.client.chat.completions.create(
model=self.fine_tuned_model_name,
messages=self.messages_for_testing(item),
seed=42,
max_tokens=7
)
reply = response.choices[0].message.content
return self.get_price(reply)
except Exception as e:
print(f"Prediction error: {e}")
return 0.0
def evaluate_model(self, job_id: str) -> Dict[str, Any]:
print("Retrieving fine-tuned model name...")
correct_predictions = 0
total_predictions = min(10, len(self.test_data))
for item in self.test_data[:total_predictions]:
try:
user_message = item["messages"][1]["content"]
expected_response = item["messages"][2]["content"]
response = self.client.chat.completions.create(
model="gpt-4.1-2025-04-14",
messages=[
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": user_message}
],
max_tokens=100
)
predicted_response = response.choices[0].message.content
# Simple evaluation
if any(word in predicted_response.lower() for word in expected_response.lower().split()[:5]):
correct_predictions += 1
except Exception as e:
print(f"Prediction error: {e}")
continue
accuracy = correct_predictions / total_predictions if total_predictions > 0 else 0
return {
"baseline_accuracy": accuracy,
"correct_predictions": correct_predictions,
"total_predictions": total_predictions
}
try:
job = self.client.fine_tuning.jobs.retrieve(job_id)
self.fine_tuned_model_name = job.fine_tuned_model
if not self.fine_tuned_model_name:
return {"error": "Fine-tuned model name not available yet"}
print(f"Fine-tuned model: {self.fine_tuned_model_name}")
if not self.test:
return {"error": "No test items available"}
print(f"Testing individual prediction first...")
print(f"Actual price: ${self.test[0].price}")
predicted_price = self.gpt_fine_tuned(self.test[0])
print(f"Predicted price: ${predicted_price}")
print(f"Test prompt used:")
print(self.test[0].test_prompt())
print(f"\nRunning full evaluation with {len(self.test)} test items...")
Tester.test(self.gpt_fine_tuned, self.test)
return {
"status": "completed",
"message": "Evaluation completed using Tester class with RMSLE metrics",
"test_items": len(self.test),
"model_name": self.fine_tuned_model_name
}
except Exception as e:
return {"error": f"Evaluation failed: {e}"}
def add_wandb_sync(self, job_id: str) -> None:
try:
import wandb
from wandb.integration.openai.fine_tuning import WandbLogger
wandb_key = os.getenv('WANDB_API_KEY')
if not wandb_key:
print("WANDB_API_KEY not found - skipping W&B sync")
return
print("Setting up Weights and Biases monitoring...")
wandb.login()
WandbLogger.sync(fine_tune_job_id=job_id, project="gpt-pricer")
print("Weights and Biases sync enabled")
except ImportError:
print("wandb not installed - skipping W&B sync")
except Exception as e:
print(f"W&B sync failed: {e}")
def main():
"""Main function to run the fine-tuning process"""
print("Starting Simple Fine-Tuning Process")
print("=" * 50)
print("Starting Price Prediction Fine-Tuning Process")
print("Based on reference implementation from day5.ipynb")
print("=" * 60)
# Check API key
api_key = os.getenv('OPENAI_API_KEY')
if not api_key:
print("OPENAI_API_KEY not found in environment")
@@ -318,40 +324,56 @@ def main():
return
try:
# Initialize fine-tuner
fine_tuner = SimpleFineTuner(api_key)
fine_tuner = PricePredictionFineTuner(api_key)
print("\nStep 1: Creating sample data...")
fine_tuner.create_sample_data(50) # Create 50 sample items
print("\nStep 1: Loading Amazon Reviews 2023 dataset...")
fine_tuner.load_amazon_data("Appliances")
print("\nStep 2: Saving training files...")
if not fine_tuner.fine_tune_train:
print("No training data available!")
return
print("\nStep 2: Creating JSONL files and uploading...")
train_file_id, validation_file_id = fine_tuner.save_training_files()
print("\nStep 3: Starting fine-tuning...")
print("\nStep 3: Starting fine-tuning job...")
job_id = fine_tuner.start_fine_tuning(train_file_id, validation_file_id)
print("\nStep 4: Monitoring training...")
print("\nStep 4: Setting up Weights and Biases monitoring...")
fine_tuner.add_wandb_sync(job_id)
print("\nStep 5: Monitoring training progress...")
print("This may take several minutes to hours depending on data size...")
model_name = fine_tuner.monitor_training(job_id)
if model_name:
print("\nStep 5: Evaluating model...")
results = fine_tuner.evaluate_model(model_name)
print(f"\nFine-tuning completed! Model: {model_name}")
print("\nResults:")
print(f"Accuracy: {results['accuracy']:.2%}")
print(f"Correct predictions: {results['correct_predictions']}/{results['total_predictions']}")
print("\nStep 6: Evaluating model with Tester class...")
results = fine_tuner.evaluate_model(job_id)
if "error" in results:
print(f"Evaluation failed: {results['error']}")
else:
print(f"{results['message']}")
print(f"Evaluation used {results['test_items']} test items")
print("\nCheck the generated chart for detailed RMSLE metrics!")
print("\nPrice prediction fine-tuning process completed!")
print("\nFollows reference implementation exactly:")
print(" Uses pickle files (train.pkl, test.pkl)")
print(" 200 training examples, 50 validation examples")
print(" Proper RMSLE evaluation using Tester class")
print(" Weights and Biases integration")
print(" Same model and hyperparameters as reference")
print("\nFine-tuning process completed successfully!")
print("\nKey features implemented:")
print(" - Simple data generation")
print(" - Basic token management")
print(" - Training monitoring")
print(" - Model evaluation")
else:
print("\nFine-tuning failed")
print("\nFine-tuning failed - check the error messages above")
except Exception as e:
print(f"\nError during fine-tuning: {e}")
print(f"\nError during fine-tuning process: {e}")
import traceback
traceback.print_exc()
if __name__ == "__main__":
main()