improved results
This commit is contained in:
@@ -65,8 +65,8 @@ class PricePredictionFineTuner:
|
|||||||
print("Pickle files not found. Loading from Hugging Face...")
|
print("Pickle files not found. Loading from Hugging Face...")
|
||||||
self._load_from_huggingface(category)
|
self._load_from_huggingface(category)
|
||||||
|
|
||||||
self.fine_tune_train = self.train[:200]
|
self.fine_tune_train = self.train[:500]
|
||||||
self.fine_tune_validation = self.train[200:250]
|
self.fine_tune_validation = self.train[500:600]
|
||||||
|
|
||||||
print(f"Fine-tuning split: {len(self.fine_tune_train)} train, {len(self.fine_tune_validation)} validation")
|
print(f"Fine-tuning split: {len(self.fine_tune_train)} train, {len(self.fine_tune_validation)} validation")
|
||||||
|
|
||||||
@@ -93,7 +93,7 @@ class PricePredictionFineTuner:
|
|||||||
if processed % 1000 == 0:
|
if processed % 1000 == 0:
|
||||||
print(f"Processed {processed:,} items, found {len(items):,} valid items")
|
print(f"Processed {processed:,} items, found {len(items):,} valid items")
|
||||||
|
|
||||||
if len(items) >= 1000:
|
if len(items) >= 1500:
|
||||||
print(f"Collected {len(items)} items, stopping for efficiency")
|
print(f"Collected {len(items)} items, stopping for efficiency")
|
||||||
break
|
break
|
||||||
|
|
||||||
@@ -102,8 +102,8 @@ class PricePredictionFineTuner:
|
|||||||
|
|
||||||
print(f"Created {len(items):,} valid Item objects")
|
print(f"Created {len(items):,} valid Item objects")
|
||||||
|
|
||||||
if len(items) < 250:
|
if len(items) < 600:
|
||||||
raise ValueError(f"Not enough valid items found: {len(items)}. Need at least 250.")
|
raise ValueError(f"Not enough valid items found: {len(items)}. Need at least 600.")
|
||||||
|
|
||||||
random.shuffle(items)
|
random.shuffle(items)
|
||||||
|
|
||||||
@@ -195,11 +195,15 @@ class PricePredictionFineTuner:
|
|||||||
job = self.client.fine_tuning.jobs.create(
|
job = self.client.fine_tuning.jobs.create(
|
||||||
training_file=train_file_id,
|
training_file=train_file_id,
|
||||||
validation_file=validation_file_id,
|
validation_file=validation_file_id,
|
||||||
model="gpt-4o-mini-2024-07-18",
|
model="gpt-4o-mini-2024-07-18",
|
||||||
seed=42,
|
seed=42,
|
||||||
hyperparameters={"n_epochs": 1},
|
hyperparameters={
|
||||||
integrations=integrations,
|
"n_epochs": 3,
|
||||||
suffix="pricer"
|
"learning_rate_multiplier": 0.1,
|
||||||
|
"batch_size": 4
|
||||||
|
},
|
||||||
|
integrations=integrations,
|
||||||
|
suffix="pricer-improved"
|
||||||
)
|
)
|
||||||
|
|
||||||
print(f"Fine-tuning job started: {job.id}")
|
print(f"Fine-tuning job started: {job.id}")
|
||||||
@@ -257,41 +261,73 @@ class PricePredictionFineTuner:
|
|||||||
return 0.0
|
return 0.0
|
||||||
|
|
||||||
def evaluate_model(self, job_id: str) -> Dict[str, Any]:
|
def evaluate_model(self, job_id: str) -> Dict[str, Any]:
|
||||||
print("Retrieving fine-tuned model name...")
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
job = self.client.fine_tuning.jobs.retrieve(job_id)
|
job = self.client.fine_tuning.jobs.retrieve(job_id)
|
||||||
self.fine_tuned_model_name = job.fine_tuned_model
|
self.fine_tuned_model_name = job.fine_tuned_model
|
||||||
|
|
||||||
if not self.fine_tuned_model_name:
|
|
||||||
return {"error": "Fine-tuned model name not available yet"}
|
|
||||||
|
|
||||||
print(f"Fine-tuned model: {self.fine_tuned_model_name}")
|
|
||||||
|
|
||||||
if not self.test:
|
if not self.test:
|
||||||
return {"error": "No test items available"}
|
return {"error": "No test items available"}
|
||||||
|
|
||||||
|
test_subset = self.test[:min(250, len(self.test))]
|
||||||
|
actual_size = len(test_subset)
|
||||||
|
|
||||||
print(f"Testing individual prediction first...")
|
print(f"Testing individual prediction first...")
|
||||||
print(f"Actual price: ${self.test[0].price}")
|
print(f"Actual price: ${test_subset[0].price}")
|
||||||
predicted_price = self.gpt_fine_tuned(self.test[0])
|
predicted_price = self.gpt_fine_tuned(test_subset[0])
|
||||||
print(f"Predicted price: ${predicted_price}")
|
print(f"Predicted price: ${predicted_price}")
|
||||||
|
|
||||||
print(f"Test prompt used:")
|
print(f"Test prompt used:")
|
||||||
print(self.test[0].test_prompt())
|
print(test_subset[0].test_prompt())
|
||||||
|
|
||||||
print(f"\nRunning full evaluation with {len(self.test)} test items...")
|
print(f"\nRunning full evaluation with {actual_size} test items...")
|
||||||
Tester.test(self.gpt_fine_tuned, self.test)
|
|
||||||
|
test_subset2 = self.test[:actual_size]
|
||||||
|
tester = Tester(self.gpt_fine_tuned, test_subset2, size=actual_size)
|
||||||
|
tester.run()
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"status": "completed",
|
"status": "completed",
|
||||||
"message": "Evaluation completed using Tester class with RMSLE metrics",
|
"message": "Evaluation completed using Tester class with RMSLE metrics",
|
||||||
"test_items": len(self.test),
|
"test_items": actual_size,
|
||||||
"model_name": self.fine_tuned_model_name
|
"model_name": self.fine_tuned_model_name
|
||||||
}
|
}
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return {"error": f"Evaluation failed: {e}"}
|
return {"error": f"Evaluation failed: {e}"}
|
||||||
|
|
||||||
|
def evaluate_existing_model(self, model_name: str) -> Dict[str, Any]:
|
||||||
|
print("Evaluating existing fine-tuned model...")
|
||||||
|
|
||||||
|
self.fine_tuned_model_name = model_name
|
||||||
|
|
||||||
|
if not self.test:
|
||||||
|
return {"error": "No test items available. Load data first."}
|
||||||
|
|
||||||
|
print(f"Fine-tuned model: {self.fine_tuned_model_name}")
|
||||||
|
|
||||||
|
test_subset = self.test[:min(250, len(self.test))]
|
||||||
|
actual_size = len(test_subset)
|
||||||
|
|
||||||
|
print(f"Testing individual prediction first...")
|
||||||
|
print(f"Actual price: ${test_subset[0].price}")
|
||||||
|
predicted_price = self.gpt_fine_tuned(test_subset[0])
|
||||||
|
print(f"Predicted price: ${predicted_price}")
|
||||||
|
|
||||||
|
print(f"Test prompt used:")
|
||||||
|
print(test_subset[0].test_prompt())
|
||||||
|
|
||||||
|
print(f"\nRunning full evaluation with {actual_size} test items...")
|
||||||
|
|
||||||
|
test_subset2 = self.test[:actual_size]
|
||||||
|
tester = Tester(self.gpt_fine_tuned, test_subset2, size=actual_size)
|
||||||
|
tester.run()
|
||||||
|
|
||||||
|
return {
|
||||||
|
"status": "completed",
|
||||||
|
"message": "Evaluation completed using Tester class with RMSLE metrics",
|
||||||
|
"test_items": actual_size,
|
||||||
|
"model_name": self.fine_tuned_model_name
|
||||||
|
}
|
||||||
|
|
||||||
def add_wandb_sync(self, job_id: str) -> None:
|
def add_wandb_sync(self, job_id: str) -> None:
|
||||||
try:
|
try:
|
||||||
import wandb
|
import wandb
|
||||||
@@ -362,10 +398,11 @@ def main():
|
|||||||
print("\nPrice prediction fine-tuning process completed!")
|
print("\nPrice prediction fine-tuning process completed!")
|
||||||
print("\nFollows reference implementation exactly:")
|
print("\nFollows reference implementation exactly:")
|
||||||
print(" Uses pickle files (train.pkl, test.pkl)")
|
print(" Uses pickle files (train.pkl, test.pkl)")
|
||||||
print(" 200 training examples, 50 validation examples")
|
print(" 500 training examples, 100 validation examples")
|
||||||
|
print(" 3 epochs with balanced learning rate (0.1)")
|
||||||
|
print(" Batch size 4 for stable training")
|
||||||
print(" Proper RMSLE evaluation using Tester class")
|
print(" Proper RMSLE evaluation using Tester class")
|
||||||
print(" Weights and Biases integration")
|
print(" Weights and Biases integration")
|
||||||
print(" Same model and hyperparameters as reference")
|
|
||||||
|
|
||||||
else:
|
else:
|
||||||
print("\nFine-tuning failed - check the error messages above")
|
print("\nFine-tuning failed - check the error messages above")
|
||||||
@@ -375,5 +412,47 @@ def main():
|
|||||||
import traceback
|
import traceback
|
||||||
traceback.print_exc()
|
traceback.print_exc()
|
||||||
|
|
||||||
|
def evaluate_only(model_name: str):
|
||||||
|
print("=" * 60)
|
||||||
|
print("EVALUATING EXISTING FINE-TUNED MODEL")
|
||||||
|
print("=" * 60)
|
||||||
|
|
||||||
|
api_key = os.getenv('OPENAI_API_KEY')
|
||||||
|
if not api_key:
|
||||||
|
print("OPENAI_API_KEY not found in environment")
|
||||||
|
return
|
||||||
|
|
||||||
|
try:
|
||||||
|
fine_tuner = PricePredictionFineTuner(api_key)
|
||||||
|
|
||||||
|
print("\nLoading data...")
|
||||||
|
fine_tuner.load_amazon_data("Appliances")
|
||||||
|
|
||||||
|
print("\nRunning evaluation...")
|
||||||
|
results = fine_tuner.evaluate_existing_model(model_name)
|
||||||
|
|
||||||
|
if "error" in results:
|
||||||
|
print(f"Evaluation failed: {results['error']}")
|
||||||
|
else:
|
||||||
|
print(f"\n{results['message']}")
|
||||||
|
print(f"Evaluation used {results['test_items']} test items")
|
||||||
|
print("\nCheck the generated chart for detailed RMSLE metrics!")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"\nError during evaluation: {e}")
|
||||||
|
import traceback
|
||||||
|
traceback.print_exc()
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
import sys
|
||||||
|
|
||||||
|
if len(sys.argv) > 1 and sys.argv[1] == "--evaluate":
|
||||||
|
if len(sys.argv) < 3:
|
||||||
|
print("Usage: python w6d5.py --evaluate <model_name>")
|
||||||
|
print("\nExample:")
|
||||||
|
print(" python w6d5.py --evaluate ft:gpt-4o-mini-2024-07-18:techxelo:pricer-improved:CVIfbqic")
|
||||||
|
else:
|
||||||
|
model_name = sys.argv[2]
|
||||||
|
evaluate_only(model_name)
|
||||||
|
else:
|
||||||
|
main()
|
||||||
Reference in New Issue
Block a user