From 178be348b1d2c0697c62165798d6f7b8e2ff4657 Mon Sep 17 00:00:00 2001 From: unknown Date: Fri, 24 Oct 2025 17:45:28 +0200 Subject: [PATCH] Added Exercise week6 --- .../Exercise_week6_jom.ipynb | 372 ++++++++++++++++++ 1 file changed, 372 insertions(+) create mode 100644 week6/community-contributions/Exercise_week6_jom.ipynb diff --git a/week6/community-contributions/Exercise_week6_jom.ipynb b/week6/community-contributions/Exercise_week6_jom.ipynb new file mode 100644 index 0000000..7927e86 --- /dev/null +++ b/week6/community-contributions/Exercise_week6_jom.ipynb @@ -0,0 +1,372 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "168f6f43", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import re\n", + "import math\n", + "import json\n", + "import random\n", + "from dotenv import load_dotenv\n", + "from huggingface_hub import login\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pickle\n", + "from collections import Counter\n", + "from openai import OpenAI\n", + "from anthropic import Anthropic\n", + "\n", + "# environment\n", + "\n", + "load_dotenv(override=True)\n", + "os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY', 'your-key-if-not-using-env')\n", + "os.environ['ANTHROPIC_API_KEY'] = os.getenv('ANTHROPIC_API_KEY', 'your-key-if-not-using-env')\n", + "os.environ['HF_TOKEN'] = os.getenv('HF_TOKEN', 'your-key-if-not-using-env')\n", + "\n", + "hf_token = os.environ['HF_TOKEN']\n", + "login(hf_token, add_to_git_credential=True)\n", + "\n", + "\n", + "from items import Item\n", + "from testing import Tester\n", + "\n", + "openai = OpenAI()\n", + "\n", + "%matplotlib inline" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b990ccf1", + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "with open('train.pkl', 'rb') as file:\n", + " train = pickle.load(file)\n", + "\n", + "with open('test.pkl', 'rb') as file:\n", + " test = pickle.load(file)\n", + "\n", + "\n", + "fine_tune_train = train[:200]\n", + "fine_tune_validation = train[200:250]\n", + "\n", + "\n", + "def messages_for(item):\n", + " system_message = \"You estimate prices of items. Reply only with the price, no explanation\"\n", + " user_prompt = item.test_prompt().replace(\" to the nearest dollar\",\"\").replace(\"\\n\\nPrice is $\",\"\")\n", + " return [\n", + " {\"role\": \"system\", \"content\": system_message},\n", + " {\"role\": \"user\", \"content\": user_prompt},\n", + " {\"role\": \"assistant\", \"content\": f\"Price is ${item.price:.2f}\"}\n", + " ]\n", + "\n", + "def make_jsonl(items):\n", + " result = \"\"\n", + " for item in items:\n", + " messages = messages_for(item)\n", + " messages_str = json.dumps(messages)\n", + " result += '{\"messages\": ' + messages_str +'}\\n'\n", + " return result.strip()\n", + "\n", + "\n", + "def write_jsonl(items, filename):\n", + " with open(filename, \"w\") as f:\n", + " jsonl = make_jsonl(items)\n", + " f.write(jsonl)\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "f0d128e2", + "metadata": {}, + "source": [ + "# Trained too fast\n", + "It resulted in overfitting (validation loss jumping all around about x4 larger) although Accuracy stayed constant. \n", + "Epochs: 2 Batch size: 16 LR multiplier:0.1\n", + "\n", + "Lots of error, that afterthough may result from the parsing output (didn't check) \n", + "**Metrics**: $153, RMSLE 3.6 Hits 31% " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f8cce151", + "metadata": {}, + "outputs": [], + "source": [ + "write_jsonl(fine_tune_train, \"fine_tune_train.jsonl\")\n", + "write_jsonl(fine_tune_validation, \"fine_tune_validation.jsonl\")\n", + "\n", + "with open(\"fine_tune_train.jsonl\", \"rb\") as f:\n", + " train_file = openai.files.create(file=f, purpose=\"fine-tune\")\n", + "with open(\"fine_tune_validation.jsonl\", \"rb\") as f:\n", + " validation_file = openai.files.create(file=f, purpose=\"fine-tune\")\n", + "\n", + "wandb_integration = {\"type\": \"wandb\", \"wandb\": {\"project\": \"gpt-pricer\"}}\n", + "\n", + "openai.fine_tuning.jobs.create(\n", + " training_file=train_file.id,\n", + " validation_file=validation_file.id,\n", + " model=\"gpt-4o-mini-2024-07-18\",\n", + " seed=42,\n", + " hyperparameters={\"n_epochs\": 5},\n", + " integrations = [wandb_integration],\n", + " suffix=\"pricer_v1\"\n", + ")\n", + "\n", + "fine_tuned_model_name_hpo = openai.fine_tuning.jobs.retrieve(job_id).fine_tuned_model\n", + "# The prompt\n", + "\n", + "def messages_for_test(item):\n", + " system_message = \"You estimate prices of items. Reply only with the price, no explanation\"\n", + " user_prompt = item.test_prompt().replace(\" to the nearest dollar\",\"\").replace(\"\\n\\nPrice is $\",\"\")\n", + " return [\n", + " {\"role\": \"system\", \"content\": system_message},\n", + " {\"role\": \"user\", \"content\": user_prompt},\n", + " {\"role\": \"assistant\", \"content\": \"Price is $\"}\n", + " ]\n", + "# A utility function to extract the price from a string\n", + "\n", + "def get_price(s):\n", + " s = s.replace('$','').replace(',','')\n", + " match = re.search(r\"[-+]?\\d*\\.\\d+|\\d+\", s)\n", + " return float(match.group()) if match else 0\n", + "\n", + "# The function for gpt-4o-mini\n", + "\n", + "def gpt_fine_tuned(item):\n", + " response = openai.chat.completions.create(\n", + " model=fine_tuned_model_name_hpo,\n", + " messages=messages_for_test(item),\n", + " seed=42,\n", + " max_tokens=7\n", + " )\n", + " reply = response.choices[0].message.content\n", + " return get_price(reply)\n", + "\n", + "Tester.test(gpt_fine_tuned, test)" + ] + }, + { + "cell_type": "markdown", + "id": "43716422", + "metadata": {}, + "source": [ + "# Same OP model, but with nicer prompting ONLY at inference\n", + "It fixed the $0 prices, driving \n", + "**Metrics**: $88, RMSLE 0.59 Hits 50% " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c624cade", + "metadata": {}, + "outputs": [], + "source": [ + "def modified_messages_for_test(item):\n", + " system_message = (\n", + " \"You are a helpful assistant skilled at estimating the prices of a wide range of products and purchases.\"\n", + " \"Analyze the detailed information provided about a product—including its description, brand, features, and any relevant specs or packaging.\"\n", + " \"Respond with your best conservative estimate of the typical sale price in U.S. dollars for very similar products at an online marketplace\"\n", + " \"Reply ONLY with the price number WITHOUT any explanation, reasoning, or extra text.\"\n", + " \"Price cannot be zero, always make sensible assumptions.\"\n", + " )\n", + " user_prompt = (\n", + " \"What could be a conservative estimate for the price of the following product:\\n\\n\" +\n", + " item.test_prompt().replace(\" to the nearest dollar\", \"\").replace(\"\\n\\nPrice is $\", \"\")\n", + " )\n", + " return [\n", + " {\"role\": \"system\", \"content\": system_message},\n", + " {\"role\": \"user\", \"content\": user_prompt},\n", + " {\"role\": \"assistant\", \"content\": f\"Price is $\"}\n", + " ]\n", + "\n", + "\n", + "def gpt_fine_tuned(item):\n", + " response = openai.chat.completions.create(\n", + " model=fine_tuned_model_name_epoch5,\n", + " messages=modified_messages_for_test(item),\n", + " seed=42,\n", + " max_tokens=7\n", + " )\n", + " reply = response.choices[0].message.content\n", + " return get_price(reply)\n", + "\n", + "Tester.test(gpt_fine_tuned, test)" + ] + }, + { + "cell_type": "markdown", + "id": "892b06e3", + "metadata": {}, + "source": [ + "# Trying to fix overfitting, setting new HPO and prompting on training \n", + "Epochs:1 Batch size:1 LR multiplier:0.01 \n", + "Didn't make noticeable difference \n", + "**Metrics**: $89, RMSLE 0.56 Hits 50% \n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "662870a8", + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "def modified_messages_for(item):\n", + " system_message = (\n", + " \"You are a helpful assistant skilled at estimating the prices of a wide range of products and purchases.\"\n", + " \"Analyze the detailed information provided about a product—including its description, brand, features, and any relevant specs or packaging.\"\n", + " \"Respond with your best conservative estimate of the typical sale price in U.S. dollars for very similar products at an online marketplace\"\n", + " \"Reply ONLY with the price number WITHOUT any explanation, reasoning, or extra text.\"\n", + " \"Price cannot be zero, always make sensible assumptions.\"\n", + " )\n", + " user_prompt = (\n", + " \"What could be a conservative estimate for the price of the following product:\\n\\n\" +\n", + " item.test_prompt().replace(\" to the nearest dollar\", \"\").replace(\"\\n\\nPrice is $\", \"\")\n", + " )\n", + " return [\n", + " {\"role\": \"system\", \"content\": system_message},\n", + " {\"role\": \"user\", \"content\": user_prompt},\n", + " {\"role\": \"assistant\", \"content\": f\"Price is ${item.price:.2f}\"}\n", + "\n", + " ]\n", + "\n", + "def modified_make_jsonl(items):\n", + " result = \"\"\n", + " for item in items:\n", + " messages = modified_messages_for(item)\n", + " messages_str = json.dumps(messages)\n", + " result += '{\"messages\": ' + messages_str +'}\\n'\n", + " return result.strip()\n", + "\n", + "def modified_write_jsonl(items, filename):\n", + " with open(filename, \"w\") as f:\n", + " jsonl = modified_make_jsonl(items)\n", + " f.write(jsonl)\n", + "\n", + "modified_write_jsonl(fine_tune_train, \"mod_fine_tune_train.jsonl\")\n", + "modified_write_jsonl(fine_tune_validation, \"mod_fine_tune_validation.jsonl\")\n", + "\n", + "\n", + "with open(\"mod_fine_tune_train.jsonl\", \"rb\") as f:\n", + " mod_train_file = openai.files.create(file=f, purpose=\"fine-tune\")\n", + "with open(\"mod_fine_tune_validation.jsonl\", \"rb\") as f:\n", + " mod_validation_file = openai.files.create(file=f, purpose=\"fine-tune\")\n", + "\n", + "openai.fine_tuning.jobs.create(\n", + " training_file=mod_train_file.id,\n", + " validation_file=mod_validation_file.id,\n", + " model=\"gpt-4o-mini-2024-07-18\",\n", + " seed=42,\n", + " hyperparameters={\"n_epochs\": 1, \"learning_rate_multiplier\":1., \"batch_size\":1},\n", + " integrations = [wandb_integration],\n", + " suffix=\"pricer_v3\"\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b7d14e01", + "metadata": {}, + "outputs": [], + "source": [ + "fine_tuned_model_name_prompt_train = openai.fine_tuning.jobs.retrieve(job_id).fine_tuned_model\n", + "\n", + "\n", + "def mod_gpt_fine_tuned(item):\n", + " response = openai.chat.completions.create(\n", + " model=fine_tuned_model_name_prompt_train,\n", + " messages=modified_messages_for_test(item),\n", + " seed=42,\n", + " max_tokens=7\n", + " )\n", + " reply = response.choices[0].message.content\n", + " return get_price(reply)\n", + "\n", + "Tester.test(mod_gpt_fine_tuned, test)" + ] + }, + { + "cell_type": "markdown", + "id": "4fbedd53", + "metadata": {}, + "source": [ + "# Last model to fix achieve faster convergence\n", + "Epochs:1 Batch size:1 LR multiplier:1 \n", + "**Metrics**: $87, RMSLE 0.59 Hits 47% \n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9b78f3b4", + "metadata": {}, + "outputs": [], + "source": [ + "openai.fine_tuning.jobs.create(\n", + " training_file=mod_train_file.id,\n", + " validation_file=mod_validation_file.id,\n", + " model=\"gpt-4o-mini-2024-07-18\",\n", + " seed=42,\n", + " hyperparameters={\"n_epochs\": 1, \"learning_rate_multiplier\":1., \"batch_size\":1},\n", + " integrations = [wandb_integration],\n", + " suffix=\"pricer_v3\"\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6da5f2d5", + "metadata": {}, + "outputs": [], + "source": [ + "fine_tuned_model_name_prompt_train_lr = openai.fine_tuning.jobs.retrieve(job_id).fine_tuned_model\n", + "\n", + "def mod_gpt_fine_tuned_v2(item):\n", + " response = openai.chat.completions.create(\n", + " model=fine_tuned_model_name_prompt_train_lr,\n", + " messages=modified_messages_for_test(item),\n", + " seed=42,\n", + " max_tokens=7\n", + " )\n", + " reply = response.choices[0].message.content\n", + " return get_price(reply)\n", + "\n", + "Tester.test(mod_gpt_fine_tuned_v2, test)" + ] + }, + { + "cell_type": "markdown", + "id": "19febde6", + "metadata": {}, + "source": [ + "## Summary\n", + "For this model in particular, it seems way more important the prompting than the finetuning itself.\n", + "We've tried to train more, turning to overfitting. Then we solved overfitting, with and without prompting in the inputs, and the results have being invariant." + ] + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}