Files
LLM_Engineering_OLD/week6/community-contributions/Exercise_week6_jom.ipynb
2025-10-24 17:45:28 +02:00

373 lines
13 KiB
Plaintext

{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "168f6f43",
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"import re\n",
"import math\n",
"import json\n",
"import random\n",
"from dotenv import load_dotenv\n",
"from huggingface_hub import login\n",
"import matplotlib.pyplot as plt\n",
"import numpy as np\n",
"import pickle\n",
"from collections import Counter\n",
"from openai import OpenAI\n",
"from anthropic import Anthropic\n",
"\n",
"# environment\n",
"\n",
"load_dotenv(override=True)\n",
"os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY', 'your-key-if-not-using-env')\n",
"os.environ['ANTHROPIC_API_KEY'] = os.getenv('ANTHROPIC_API_KEY', 'your-key-if-not-using-env')\n",
"os.environ['HF_TOKEN'] = os.getenv('HF_TOKEN', 'your-key-if-not-using-env')\n",
"\n",
"hf_token = os.environ['HF_TOKEN']\n",
"login(hf_token, add_to_git_credential=True)\n",
"\n",
"\n",
"from items import Item\n",
"from testing import Tester\n",
"\n",
"openai = OpenAI()\n",
"\n",
"%matplotlib inline"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b990ccf1",
"metadata": {},
"outputs": [],
"source": [
"\n",
"with open('train.pkl', 'rb') as file:\n",
" train = pickle.load(file)\n",
"\n",
"with open('test.pkl', 'rb') as file:\n",
" test = pickle.load(file)\n",
"\n",
"\n",
"fine_tune_train = train[:200]\n",
"fine_tune_validation = train[200:250]\n",
"\n",
"\n",
"def messages_for(item):\n",
" system_message = \"You estimate prices of items. Reply only with the price, no explanation\"\n",
" user_prompt = item.test_prompt().replace(\" to the nearest dollar\",\"\").replace(\"\\n\\nPrice is $\",\"\")\n",
" return [\n",
" {\"role\": \"system\", \"content\": system_message},\n",
" {\"role\": \"user\", \"content\": user_prompt},\n",
" {\"role\": \"assistant\", \"content\": f\"Price is ${item.price:.2f}\"}\n",
" ]\n",
"\n",
"def make_jsonl(items):\n",
" result = \"\"\n",
" for item in items:\n",
" messages = messages_for(item)\n",
" messages_str = json.dumps(messages)\n",
" result += '{\"messages\": ' + messages_str +'}\\n'\n",
" return result.strip()\n",
"\n",
"\n",
"def write_jsonl(items, filename):\n",
" with open(filename, \"w\") as f:\n",
" jsonl = make_jsonl(items)\n",
" f.write(jsonl)\n",
"\n"
]
},
{
"cell_type": "markdown",
"id": "f0d128e2",
"metadata": {},
"source": [
"# Trained too fast\n",
"It resulted in overfitting (validation loss jumping all around about x4 larger) although Accuracy stayed constant. \n",
"Epochs: 2 Batch size: 16 LR multiplier:0.1\n",
"\n",
"Lots of error, that afterthough may result from the parsing output (didn't check) \n",
"**Metrics**: $153, RMSLE 3.6 Hits 31% "
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f8cce151",
"metadata": {},
"outputs": [],
"source": [
"write_jsonl(fine_tune_train, \"fine_tune_train.jsonl\")\n",
"write_jsonl(fine_tune_validation, \"fine_tune_validation.jsonl\")\n",
"\n",
"with open(\"fine_tune_train.jsonl\", \"rb\") as f:\n",
" train_file = openai.files.create(file=f, purpose=\"fine-tune\")\n",
"with open(\"fine_tune_validation.jsonl\", \"rb\") as f:\n",
" validation_file = openai.files.create(file=f, purpose=\"fine-tune\")\n",
"\n",
"wandb_integration = {\"type\": \"wandb\", \"wandb\": {\"project\": \"gpt-pricer\"}}\n",
"\n",
"openai.fine_tuning.jobs.create(\n",
" training_file=train_file.id,\n",
" validation_file=validation_file.id,\n",
" model=\"gpt-4o-mini-2024-07-18\",\n",
" seed=42,\n",
" hyperparameters={\"n_epochs\": 5},\n",
" integrations = [wandb_integration],\n",
" suffix=\"pricer_v1\"\n",
")\n",
"\n",
"fine_tuned_model_name_hpo = openai.fine_tuning.jobs.retrieve(job_id).fine_tuned_model\n",
"# The prompt\n",
"\n",
"def messages_for_test(item):\n",
" system_message = \"You estimate prices of items. Reply only with the price, no explanation\"\n",
" user_prompt = item.test_prompt().replace(\" to the nearest dollar\",\"\").replace(\"\\n\\nPrice is $\",\"\")\n",
" return [\n",
" {\"role\": \"system\", \"content\": system_message},\n",
" {\"role\": \"user\", \"content\": user_prompt},\n",
" {\"role\": \"assistant\", \"content\": \"Price is $\"}\n",
" ]\n",
"# A utility function to extract the price from a string\n",
"\n",
"def get_price(s):\n",
" s = s.replace('$','').replace(',','')\n",
" match = re.search(r\"[-+]?\\d*\\.\\d+|\\d+\", s)\n",
" return float(match.group()) if match else 0\n",
"\n",
"# The function for gpt-4o-mini\n",
"\n",
"def gpt_fine_tuned(item):\n",
" response = openai.chat.completions.create(\n",
" model=fine_tuned_model_name_hpo,\n",
" messages=messages_for_test(item),\n",
" seed=42,\n",
" max_tokens=7\n",
" )\n",
" reply = response.choices[0].message.content\n",
" return get_price(reply)\n",
"\n",
"Tester.test(gpt_fine_tuned, test)"
]
},
{
"cell_type": "markdown",
"id": "43716422",
"metadata": {},
"source": [
"# Same OP model, but with nicer prompting ONLY at inference\n",
"It fixed the $0 prices, driving \n",
"**Metrics**: $88, RMSLE 0.59 Hits 50% "
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c624cade",
"metadata": {},
"outputs": [],
"source": [
"def modified_messages_for_test(item):\n",
" system_message = (\n",
" \"You are a helpful assistant skilled at estimating the prices of a wide range of products and purchases.\"\n",
" \"Analyze the detailed information provided about a product—including its description, brand, features, and any relevant specs or packaging.\"\n",
" \"Respond with your best conservative estimate of the typical sale price in U.S. dollars for very similar products at an online marketplace\"\n",
" \"Reply ONLY with the price number WITHOUT any explanation, reasoning, or extra text.\"\n",
" \"Price cannot be zero, always make sensible assumptions.\"\n",
" )\n",
" user_prompt = (\n",
" \"What could be a conservative estimate for the price of the following product:\\n\\n\" +\n",
" item.test_prompt().replace(\" to the nearest dollar\", \"\").replace(\"\\n\\nPrice is $\", \"\")\n",
" )\n",
" return [\n",
" {\"role\": \"system\", \"content\": system_message},\n",
" {\"role\": \"user\", \"content\": user_prompt},\n",
" {\"role\": \"assistant\", \"content\": f\"Price is $\"}\n",
" ]\n",
"\n",
"\n",
"def gpt_fine_tuned(item):\n",
" response = openai.chat.completions.create(\n",
" model=fine_tuned_model_name_epoch5,\n",
" messages=modified_messages_for_test(item),\n",
" seed=42,\n",
" max_tokens=7\n",
" )\n",
" reply = response.choices[0].message.content\n",
" return get_price(reply)\n",
"\n",
"Tester.test(gpt_fine_tuned, test)"
]
},
{
"cell_type": "markdown",
"id": "892b06e3",
"metadata": {},
"source": [
"# Trying to fix overfitting, setting new HPO and prompting on training \n",
"Epochs:1 Batch size:1 LR multiplier:0.01 \n",
"Didn't make noticeable difference \n",
"**Metrics**: $89, RMSLE 0.56 Hits 50% \n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "662870a8",
"metadata": {},
"outputs": [],
"source": [
"\n",
"def modified_messages_for(item):\n",
" system_message = (\n",
" \"You are a helpful assistant skilled at estimating the prices of a wide range of products and purchases.\"\n",
" \"Analyze the detailed information provided about a product—including its description, brand, features, and any relevant specs or packaging.\"\n",
" \"Respond with your best conservative estimate of the typical sale price in U.S. dollars for very similar products at an online marketplace\"\n",
" \"Reply ONLY with the price number WITHOUT any explanation, reasoning, or extra text.\"\n",
" \"Price cannot be zero, always make sensible assumptions.\"\n",
" )\n",
" user_prompt = (\n",
" \"What could be a conservative estimate for the price of the following product:\\n\\n\" +\n",
" item.test_prompt().replace(\" to the nearest dollar\", \"\").replace(\"\\n\\nPrice is $\", \"\")\n",
" )\n",
" return [\n",
" {\"role\": \"system\", \"content\": system_message},\n",
" {\"role\": \"user\", \"content\": user_prompt},\n",
" {\"role\": \"assistant\", \"content\": f\"Price is ${item.price:.2f}\"}\n",
"\n",
" ]\n",
"\n",
"def modified_make_jsonl(items):\n",
" result = \"\"\n",
" for item in items:\n",
" messages = modified_messages_for(item)\n",
" messages_str = json.dumps(messages)\n",
" result += '{\"messages\": ' + messages_str +'}\\n'\n",
" return result.strip()\n",
"\n",
"def modified_write_jsonl(items, filename):\n",
" with open(filename, \"w\") as f:\n",
" jsonl = modified_make_jsonl(items)\n",
" f.write(jsonl)\n",
"\n",
"modified_write_jsonl(fine_tune_train, \"mod_fine_tune_train.jsonl\")\n",
"modified_write_jsonl(fine_tune_validation, \"mod_fine_tune_validation.jsonl\")\n",
"\n",
"\n",
"with open(\"mod_fine_tune_train.jsonl\", \"rb\") as f:\n",
" mod_train_file = openai.files.create(file=f, purpose=\"fine-tune\")\n",
"with open(\"mod_fine_tune_validation.jsonl\", \"rb\") as f:\n",
" mod_validation_file = openai.files.create(file=f, purpose=\"fine-tune\")\n",
"\n",
"openai.fine_tuning.jobs.create(\n",
" training_file=mod_train_file.id,\n",
" validation_file=mod_validation_file.id,\n",
" model=\"gpt-4o-mini-2024-07-18\",\n",
" seed=42,\n",
" hyperparameters={\"n_epochs\": 1, \"learning_rate_multiplier\":1., \"batch_size\":1},\n",
" integrations = [wandb_integration],\n",
" suffix=\"pricer_v3\"\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b7d14e01",
"metadata": {},
"outputs": [],
"source": [
"fine_tuned_model_name_prompt_train = openai.fine_tuning.jobs.retrieve(job_id).fine_tuned_model\n",
"\n",
"\n",
"def mod_gpt_fine_tuned(item):\n",
" response = openai.chat.completions.create(\n",
" model=fine_tuned_model_name_prompt_train,\n",
" messages=modified_messages_for_test(item),\n",
" seed=42,\n",
" max_tokens=7\n",
" )\n",
" reply = response.choices[0].message.content\n",
" return get_price(reply)\n",
"\n",
"Tester.test(mod_gpt_fine_tuned, test)"
]
},
{
"cell_type": "markdown",
"id": "4fbedd53",
"metadata": {},
"source": [
"# Last model to fix achieve faster convergence\n",
"Epochs:1 Batch size:1 LR multiplier:1 \n",
"**Metrics**: $87, RMSLE 0.59 Hits 47% \n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9b78f3b4",
"metadata": {},
"outputs": [],
"source": [
"openai.fine_tuning.jobs.create(\n",
" training_file=mod_train_file.id,\n",
" validation_file=mod_validation_file.id,\n",
" model=\"gpt-4o-mini-2024-07-18\",\n",
" seed=42,\n",
" hyperparameters={\"n_epochs\": 1, \"learning_rate_multiplier\":1., \"batch_size\":1},\n",
" integrations = [wandb_integration],\n",
" suffix=\"pricer_v3\"\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6da5f2d5",
"metadata": {},
"outputs": [],
"source": [
"fine_tuned_model_name_prompt_train_lr = openai.fine_tuning.jobs.retrieve(job_id).fine_tuned_model\n",
"\n",
"def mod_gpt_fine_tuned_v2(item):\n",
" response = openai.chat.completions.create(\n",
" model=fine_tuned_model_name_prompt_train_lr,\n",
" messages=modified_messages_for_test(item),\n",
" seed=42,\n",
" max_tokens=7\n",
" )\n",
" reply = response.choices[0].message.content\n",
" return get_price(reply)\n",
"\n",
"Tester.test(mod_gpt_fine_tuned_v2, test)"
]
},
{
"cell_type": "markdown",
"id": "19febde6",
"metadata": {},
"source": [
"## Summary\n",
"For this model in particular, it seems way more important the prompting than the finetuning itself.\n",
"We've tried to train more, turning to overfitting. Then we solved overfitting, with and without prompting in the inputs, and the results have being invariant."
]
}
],
"metadata": {
"language_info": {
"name": "python"
}
},
"nbformat": 4,
"nbformat_minor": 5
}