bugs and fixes

2025-10-27 17:08:49 +05:00
parent a8c8889441
commit 65fe53836f
8 changed files with 280 additions and 4576 deletions
--- a/week6/community-contributions/w6d5/w6d5.py
+++ b/week6/community-contributions/w6d5/w6d5.py
@@ -1,30 +1,16 @@
 #!/usr/bin/env python3
-"""
-Week 6 Day 5 - Simple Fine-Tuning Script
-Basic fine-tuning approach for OpenAI gpt-4.1-2025-04-14 model
-
-Key Features:
- Simple data loading and processing
- Token management to stay under 800k tokens
- Basic evaluation metrics
- Training monitoring
-
-Usage:
-    python w6d5.py
-
-Requirements:
-    - OPENAI_API_KEY environment variable
-    - OpenAI API access with fine-tuning permissions
-"""

 import os
 import json
 import random
 import math
+import re
+import pickle
 from typing import List, Dict, Any, Optional
-from datetime import datetime
 from dotenv import load_dotenv
 from openai import OpenAI
+from huggingface_hub import login
+from datasets import load_dataset
 import matplotlib.pyplot as plt
 import numpy as np
 from collections import Counter
@@ -32,160 +18,198 @@ import sys
 import warnings
 warnings.filterwarnings('ignore')

-# Load environment variables
+sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..'))
+
 load_dotenv()

-# Initialize OpenAI client
+os.environ['HF_TOKEN'] = os.getenv('HF_TOKEN', 'your-key-if-not-using-env')
+
+hf_token = os.environ['HF_TOKEN']
+if hf_token and hf_token != 'your-key-if-not-using-env':
+    login(hf_token, add_to_git_credential=True)
+    print("Logged in to Hugging Face")
+
 client = OpenAI(api_key=os.getenv('OPENAI_API_KEY'))

-class SimpleFineTuner:
-    """
-    Simple fine-tuning class for OpenAI gpt-4.1-2025-04-14 model
-    
-    This class implements basic fine-tuning with:
-    1. Simple data loading and processing
-    2. Token management under 800k tokens
-    3. Basic evaluation metrics
-    4. Training monitoring
-    """
+from items import Item
+from testing import Tester
+print("Successfully imported Item and Tester classes")
+
+class PricePredictionFineTuner:
    
    def __init__(self, api_key: str):
-        """Initialize the fine-tuner with OpenAI API key"""
        self.client = OpenAI(api_key=api_key)
-        self.train_data = []
-        self.test_data = []
-        self.validation_data = []
-        self.fine_tuned_model = None
-        self.results = {}
+        self.train = []
+        self.test = []
+        self.fine_tune_train = []
+        self.fine_tune_validation = []
+        self.fine_tuned_model_name = None
+        self.wandb_integration = {"type": "wandb", "wandb": {"project": "gpt-pricer"}}
        
-    def create_sample_data(self, num_items: int = 100) -> None:
-        """
-        Create sample training data for fine-tuning
+    def load_amazon_data(self, category: str = "Appliances") -> None:
+        print(f"Loading Amazon Reviews 2023 dataset - {category} category...")
        
-        Args:
-            num_items: Number of sample items to create
-        """
-        print(f"Creating sample dataset with {num_items} items...")
+        train_pkl = os.path.join('..', '..', 'train.pkl')
+        test_pkl = os.path.join('..', '..', 'test.pkl')
        
-        # Sample product categories
-        categories = [
-            "Electronics", "Clothing", "Books", "Home & Garden", 
-            "Sports", "Beauty", "Automotive", "Toys"
-        ]
-        
-        # Sample brands
-        brands = [
-            "TechCorp", "StyleCo", "BookWorld", "GardenPro",
-            "SportMax", "BeautyPlus", "AutoTech", "ToyLand"
-        ]
-        
-        all_items = []
-        
-        for i in range(num_items):
-            # Generate sample product data
-            category = random.choice(categories)
-            brand = random.choice(brands)
-            price = round(random.uniform(10, 1000), 2)
+        if os.path.exists(train_pkl) and os.path.exists(test_pkl):
+            print("Found existing pickle files, loading...")
+            with open(train_pkl, 'rb') as file:
+                self.train = pickle.load(file)
            
-            # Create training example
-            item = {
-                "messages": [
-                    {
-                        "role": "system",
-                        "content": "You are a helpful assistant that provides product information."
-                    },
-                    {
-                        "role": "user", 
-                        "content": f"Tell me about {brand} products in {category} category"
-                    },
-                    {
-                        "role": "assistant",
-                        "content": f"{brand} offers high-quality {category.lower()} products. "
-                                 f"Our {category.lower()} items range from ${price-50:.2f} to ${price+50:.2f}. "
-                                 f"We focus on quality and customer satisfaction in the {category} market."
-                    }
-                ]
-            }
-            all_items.append(item)
+            with open(test_pkl, 'rb') as file:
+                self.test = pickle.load(file)
+            
+            print(f"Loaded {len(self.train)} training items and {len(self.test)} test items from pickle files")
+        else:
+            print("Pickle files not found. Loading from Hugging Face...")
+            self._load_from_huggingface(category)
        
-        # Split data
-        random.shuffle(all_items)
-        train_size = int(0.8 * len(all_items))
-        val_size = int(0.1 * len(all_items))
+        self.fine_tune_train = self.train[:200]
+        self.fine_tune_validation = self.train[200:250]
        
-        self.train_data = all_items[:train_size]
-        self.validation_data = all_items[train_size:train_size + val_size]
-        self.test_data = all_items[train_size + val_size:]
+        print(f"Fine-tuning split: {len(self.fine_tune_train)} train, {len(self.fine_tune_validation)} validation")
+    
+    def _load_from_huggingface(self, category: str) -> None:
+        try:
+            print(f"Downloading {category} dataset from Hugging Face...")
+            dataset = load_dataset("McAuley-Lab/Amazon-Reviews-2023", f"raw_meta_{category}", split="full", trust_remote_code=True)
+            
+            print(f"Number of {category}: {len(dataset):,}")
+            
+            print("Processing items with prices...")
+            items = []
+            processed = 0
+            
+            for datapoint in dataset:
+                try:
+                    price = float(datapoint["price"])
+                    if price > 0 and price <= 999:
+                        item = Item(datapoint, price)
+                        if item.include:
+                            items.append(item)
+                        
+                        processed += 1
+                        if processed % 1000 == 0:
+                            print(f"Processed {processed:,} items, found {len(items):,} valid items")
+                            
+                        if len(items) >= 1000:
+                            print(f"Collected {len(items)} items, stopping for efficiency")
+                            break
+                            
+                except (ValueError, TypeError):
+                    continue
+            
+            print(f"Created {len(items):,} valid Item objects")
+            
+            if len(items) < 250:
+                raise ValueError(f"Not enough valid items found: {len(items)}. Need at least 250.")
+            
+            random.shuffle(items)
+            
+            split_point = int(0.8 * len(items))
+            self.train = items[:split_point]
+            self.test = items[split_point:]
+            
+            print(f"Split into {len(self.train)} training and {len(self.test)} test items")
+            
+            print("Saving to pickle files for future use...")
+            with open(os.path.join('..', '..', 'train.pkl'), 'wb') as f:
+                pickle.dump(self.train, f)
+            with open(os.path.join('..', '..', 'test.pkl'), 'wb') as f:
+                pickle.dump(self.test, f)
+            print("Saved pickle files")
+            
+        except Exception as e:
+            print(f"Error loading from Hugging Face: {e}")
+            print("This might be due to:")
+            print("1. Missing HF_TOKEN environment variable")
+            print("2. Need to accept Meta's terms for the tokenizer")
+            print("3. Network connectivity issues")
+            raise
+    
+    
+    def messages_for(self, item: Item) -> List[Dict[str, str]]:
+        system_message = "You estimate prices of items. Reply only with the price, no explanation"
+        user_prompt = item.test_prompt().replace(" to the nearest dollar", "").replace("\n\nPrice is $", "")
        
-        print(f"Created {len(all_items)} sample items: {len(self.train_data)} train, "
-              f"{len(self.validation_data)} validation, {len(self.test_data)} test")
+        return [
+            {"role": "system", "content": system_message},
+            {"role": "user", "content": user_prompt},
+            {"role": "assistant", "content": f"Price is ${item.price:.2f}"}
+        ]
+    
+    def messages_for_testing(self, item: Item) -> List[Dict[str, str]]:
+        system_message = "You estimate prices of items. Reply only with the price, no explanation"
+        user_prompt = item.test_prompt().replace(" to the nearest dollar", "").replace("\n\nPrice is $", "")
+        
+        return [
+            {"role": "system", "content": system_message},
+            {"role": "user", "content": user_prompt},
+            {"role": "assistant", "content": "Price is $"}
+        ]
+    
+    def make_jsonl(self, items: List[Item]) -> str:
+        result = ""
+        for item in items:
+            messages = self.messages_for(item)
+            messages_str = json.dumps(messages)
+            result += '{"messages": ' + messages_str + '}\n'
+        return result.strip()
+    
+    def write_jsonl(self, items: List[Item], filename: str) -> None:
+        with open(filename, "w") as f:
+            jsonl = self.make_jsonl(items)
+            f.write(jsonl)
    
    def save_training_files(self) -> tuple:
-        """
-        Save training and validation data to JSONL files
+        print("Creating JSONL files...")
        
-        Returns:
-            tuple: (train_file_id, validation_file_id)
-        """
-        # Save training data
-        with open('train_data.jsonl', 'w') as f:
-            for item in self.train_data:
-                f.write(json.dumps(item) + '\n')
+        self.write_jsonl(self.fine_tune_train, "fine_tune_train.jsonl")
+        self.write_jsonl(self.fine_tune_validation, "fine_tune_validation.jsonl")
        
-        # Save validation data
-        with open('validation_data.jsonl', 'w') as f:
-            for item in self.validation_data:
-                f.write(json.dumps(item) + '\n')
+        print("Uploading files to OpenAI...")
        
-        # Upload files to OpenAI
-        train_file = self.client.files.create(
-            file=open('train_data.jsonl', 'rb'),
-            purpose='fine-tune'
-        )
+        with open("fine_tune_train.jsonl", "rb") as f:
+            train_file = self.client.files.create(file=f, purpose="fine-tune")
        
-        validation_file = self.client.files.create(
-            file=open('validation_data.jsonl', 'rb'),
-            purpose='fine-tune'
-        )
+        with open("fine_tune_validation.jsonl", "rb") as f:
+            validation_file = self.client.files.create(file=f, purpose="fine-tune")
        
        print(f"Files uploaded: {train_file.id}, {validation_file.id}")
        return train_file.id, validation_file.id
    
    def start_fine_tuning(self, train_file_id: str, validation_file_id: str) -> str:
-        """
-        Start the fine-tuning job
+        print("Starting fine-tuning job with Weights and Biases integration...")
        
-        Args:
-            train_file_id: Training file ID
-            validation_file_id: Validation file ID
+        wandb_key = os.getenv('WANDB_API_KEY')
+        integrations = []
+        
+        if wandb_key:
+            integrations = [self.wandb_integration]
+            print("Weights and Biases integration enabled")
+        else:
+            print("WANDB_API_KEY not found - proceeding without W&B integration")
+        
+        try:
+            job = self.client.fine_tuning.jobs.create(
+                training_file=train_file_id,
+                validation_file=validation_file_id,
+                    model="gpt-4o-mini-2024-07-18",
+                    seed=42,
+                    hyperparameters={"n_epochs": 1},
+                    integrations=integrations,
+                    suffix="pricer"
+            )
            
-        Returns:
-            str: Fine-tuning job ID
-        """
-        print("Starting fine-tuning job...")
-        
-        job = self.client.fine_tuning.jobs.create(
-            training_file=train_file_id,
-            validation_file=validation_file_id,
-            model="gpt-4.1-2025-04-14",
-            hyperparameters={
-                "n_epochs": 3
-            }
-        )
-        
-        print(f"Fine-tuning job started: {job.id}")
-        return job.id
+            print(f"Fine-tuning job started: {job.id}")
+            return job.id
+            
+        except Exception as e:
+            print(f"Failed to start fine-tuning job: {e}")
+            raise
    
    def monitor_training(self, job_id: str) -> Optional[str]:
-        """
-        Monitor the fine-tuning job until completion
-        
-        Args:
-            job_id: Fine-tuning job ID
-            
-        Returns:
-            Optional[str]: Model name if successful, None if failed
-        """
        while True:
            job = self.client.fine_tuning.jobs.retrieve(job_id)
            status = job.status
@@ -201,116 +225,98 @@ class SimpleFineTuner:
                return None
            elif status in ["running", "validating_files", "queued"]:
                print(f"Training in progress... ({status})")
-                # Wait before checking again
                import time
                time.sleep(30)
                continue
            else:
                print(f"Unknown status: {status}")
-                # Wait before checking again
                import time
                time.sleep(30)
                continue
    
-    def evaluate_model(self, model_name: str) -> Dict[str, float]:
-        """
-        Evaluate the fine-tuned model
-        
-        Args:
-            model_name: Name of the fine-tuned model
-            
-        Returns:
-            Dict[str, float]: Evaluation metrics
-        """
-        print("Evaluating fine-tuned model...")
-        
-        correct_predictions = 0
-        total_predictions = len(self.test_data)
-        
-        for item in self.test_data:
-            try:
-                user_message = item["messages"][1]["content"]
-                expected_response = item["messages"][2]["content"]
-                
-                response = self.client.chat.completions.create(
-                    model=model_name,
-                    messages=[
-                        {"role": "system", "content": "You are a helpful assistant."},
-                        {"role": "user", "content": user_message}
-                    ],
-                    max_tokens=100
-                )
-                
-                predicted_response = response.choices[0].message.content
-                
-                # Simple evaluation - check if response contains key terms
-                if any(word in predicted_response.lower() for word in expected_response.lower().split()[:5]):
-                    correct_predictions += 1
-                    
-            except Exception as e:
-                print(f"Prediction error: {e}")
-                continue
-        
-        accuracy = correct_predictions / total_predictions if total_predictions > 0 else 0
-        
-        results = {
-            "accuracy": accuracy,
-            "correct_predictions": correct_predictions,
-            "total_predictions": total_predictions
-        }
-        
-        return results
+    def get_price(self, s: str) -> float:
+        s = s.replace('$', '').replace(',', '')
+        match = re.search(r"[-+]?\d*\.\d+|\d+", s)
+        return float(match.group()) if match else 0
    
-    def run_simple_evaluation(self) -> Dict[str, Any]:
-        """
-        Run a simple evaluation without fine-tuning
+    def gpt_fine_tuned(self, item: Item) -> float:
+        if not self.fine_tuned_model_name:
+            raise ValueError("No fine-tuned model available")
        
-        Returns:
-            Dict[str, Any]: Evaluation results
-        """
-        print("Running simple evaluation...")
+        try:
+            response = self.client.chat.completions.create(
+                model=self.fine_tuned_model_name,
+                messages=self.messages_for_testing(item),
+                seed=42,
+                max_tokens=7
+            )
+            reply = response.choices[0].message.content
+            return self.get_price(reply)
+        except Exception as e:
+            print(f"Prediction error: {e}")
+            return 0.0
+    
+    def evaluate_model(self, job_id: str) -> Dict[str, Any]:
+        print("Retrieving fine-tuned model name...")
        
-        correct_predictions = 0
-        total_predictions = min(10, len(self.test_data))
-        
-        for item in self.test_data[:total_predictions]:
-            try:
-                user_message = item["messages"][1]["content"]
-                expected_response = item["messages"][2]["content"]
-                
-                response = self.client.chat.completions.create(
-                    model="gpt-4.1-2025-04-14",
-                    messages=[
-                        {"role": "system", "content": "You are a helpful assistant."},
-                        {"role": "user", "content": user_message}
-                    ],
-                    max_tokens=100
-                )
-                
-                predicted_response = response.choices[0].message.content
-                
-                # Simple evaluation
-                if any(word in predicted_response.lower() for word in expected_response.lower().split()[:5]):
-                    correct_predictions += 1
-                    
-            except Exception as e:
-                print(f"Prediction error: {e}")
-                continue
-        
-        accuracy = correct_predictions / total_predictions if total_predictions > 0 else 0
-        
-        return {
-            "baseline_accuracy": accuracy,
-            "correct_predictions": correct_predictions,
-            "total_predictions": total_predictions
-        }
+        try:
+            job = self.client.fine_tuning.jobs.retrieve(job_id)
+            self.fine_tuned_model_name = job.fine_tuned_model
+            
+            if not self.fine_tuned_model_name:
+                return {"error": "Fine-tuned model name not available yet"}
+            
+            print(f"Fine-tuned model: {self.fine_tuned_model_name}")
+            
+            if not self.test:
+                return {"error": "No test items available"}
+            
+            print(f"Testing individual prediction first...")
+            print(f"Actual price: ${self.test[0].price}")
+            predicted_price = self.gpt_fine_tuned(self.test[0])
+            print(f"Predicted price: ${predicted_price}")
+            
+            print(f"Test prompt used:")
+            print(self.test[0].test_prompt())
+            
+            print(f"\nRunning full evaluation with {len(self.test)} test items...")
+            Tester.test(self.gpt_fine_tuned, self.test)
+            
+            return {
+                "status": "completed",
+                "message": "Evaluation completed using Tester class with RMSLE metrics",
+                "test_items": len(self.test),
+                "model_name": self.fine_tuned_model_name
+            }
+            
+        except Exception as e:
+            return {"error": f"Evaluation failed: {e}"}
+    
+    def add_wandb_sync(self, job_id: str) -> None:
+        try:
+            import wandb
+            from wandb.integration.openai.fine_tuning import WandbLogger
+            
+            wandb_key = os.getenv('WANDB_API_KEY')
+            if not wandb_key:
+                print("WANDB_API_KEY not found - skipping W&B sync")
+                return
+            
+            print("Setting up Weights and Biases monitoring...")
+            wandb.login()
+            WandbLogger.sync(fine_tune_job_id=job_id, project="gpt-pricer")
+            print("Weights and Biases sync enabled")
+            
+        except ImportError:
+            print("wandb not installed - skipping W&B sync")
+        except Exception as e:
+            print(f"W&B sync failed: {e}")

 def main():
-    """Main function to run the fine-tuning process"""
-    print("Starting Simple Fine-Tuning Process")
-    print("=" * 50)
+    print("Starting Price Prediction Fine-Tuning Process")
+    print("Based on reference implementation from day5.ipynb")
+    print("=" * 60)
    
-    # Check API key
    api_key = os.getenv('OPENAI_API_KEY')
    if not api_key:
        print("OPENAI_API_KEY not found in environment")
@@ -318,40 +324,56 @@ def main():
        return
    
    try:
-        # Initialize fine-tuner
-        fine_tuner = SimpleFineTuner(api_key)
+        fine_tuner = PricePredictionFineTuner(api_key)
        
-        print("\nStep 1: Creating sample data...")
-        fine_tuner.create_sample_data(50)  # Create 50 sample items
+        print("\nStep 1: Loading Amazon Reviews 2023 dataset...")
+        fine_tuner.load_amazon_data("Appliances")
        
-        print("\nStep 2: Saving training files...")
+        if not fine_tuner.fine_tune_train:
+            print("No training data available!")
+            return
+        
+        print("\nStep 2: Creating JSONL files and uploading...")
        train_file_id, validation_file_id = fine_tuner.save_training_files()
        
-        print("\nStep 3: Starting fine-tuning...")
+        print("\nStep 3: Starting fine-tuning job...")
        job_id = fine_tuner.start_fine_tuning(train_file_id, validation_file_id)
        
-        print("\nStep 4: Monitoring training...")
+        print("\nStep 4: Setting up Weights and Biases monitoring...")
+        fine_tuner.add_wandb_sync(job_id)
+        
+        print("\nStep 5: Monitoring training progress...")
+        print("This may take several minutes to hours depending on data size...")
        model_name = fine_tuner.monitor_training(job_id)
        
        if model_name:
-            print("\nStep 5: Evaluating model...")
-            results = fine_tuner.evaluate_model(model_name)
+            print(f"\nFine-tuning completed! Model: {model_name}")
            
-            print("\nResults:")
-            print(f"Accuracy: {results['accuracy']:.2%}")
-            print(f"Correct predictions: {results['correct_predictions']}/{results['total_predictions']}")
+            print("\nStep 6: Evaluating model with Tester class...")
+            results = fine_tuner.evaluate_model(job_id)
+            
+            if "error" in results:
+                print(f"Evaluation failed: {results['error']}")
+            else:
+                print(f"{results['message']}")
+                print(f"Evaluation used {results['test_items']} test items")
+                print("\nCheck the generated chart for detailed RMSLE metrics!")
+            
+            print("\nPrice prediction fine-tuning process completed!")
+            print("\nFollows reference implementation exactly:")
+            print("  Uses pickle files (train.pkl, test.pkl)")
+            print("  200 training examples, 50 validation examples")
+            print("  Proper RMSLE evaluation using Tester class")
+            print("  Weights and Biases integration")
+            print("  Same model and hyperparameters as reference")
            
-            print("\nFine-tuning process completed successfully!")
-            print("\nKey features implemented:")
-            print("  - Simple data generation")
-            print("  - Basic token management")
-            print("  - Training monitoring")
-            print("  - Model evaluation")
        else:
-            print("\nFine-tuning failed")
+            print("\nFine-tuning failed - check the error messages above")
            
    except Exception as e:
-        print(f"\nError during fine-tuning: {e}")
+        print(f"\nError during fine-tuning process: {e}")
+        import traceback
+        traceback.print_exc()

 if __name__ == "__main__":
    main()