1018 lines
28 KiB
Plaintext
1018 lines
28 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {
|
|
"id": "view-in-github",
|
|
"colab_type": "text"
|
|
},
|
|
"source": [
|
|
"<a href=\"https://colab.research.google.com/github/dkisselev-zz/llm_engineering/blob/wk6-exerc/week6/community-contributions/dkisselev-zz/Week6-Excerise.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "db8736a7-ed94-441c-9556-831fa57b5a10",
|
|
"metadata": {
|
|
"id": "db8736a7-ed94-441c-9556-831fa57b5a10"
|
|
},
|
|
"source": [
|
|
"# The Product Pricer Challenge\n",
|
|
"\n",
|
|
"A baseline established by gpt4o and attempt to beat it\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"source": [
|
|
"## Initialize and load configuration"
|
|
],
|
|
"metadata": {
|
|
"id": "FwYmkcF_Jw4m"
|
|
},
|
|
"id": "FwYmkcF_Jw4m"
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "681c717b-4c24-4ac3-a5f3-3c5881d6e70a",
|
|
"metadata": {
|
|
"id": "681c717b-4c24-4ac3-a5f3-3c5881d6e70a"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"# imports\n",
|
|
"\n",
|
|
"import os\n",
|
|
"import re\n",
|
|
"import math\n",
|
|
"import json\n",
|
|
"import random\n",
|
|
"import pickle\n",
|
|
"\n",
|
|
"from collections import Counter\n",
|
|
"import numpy as np\n",
|
|
"import matplotlib.pyplot as plt\n",
|
|
"\n",
|
|
"from huggingface_hub import login\n",
|
|
"from openai import OpenAI\n",
|
|
"\n",
|
|
"from items import Item\n",
|
|
"from testing import Tester"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "36d05bdc-0155-4c72-a7ee-aa4e614ffd3c",
|
|
"metadata": {
|
|
"id": "36d05bdc-0155-4c72-a7ee-aa4e614ffd3c"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"# environment\n",
|
|
"\n",
|
|
"try:\n",
|
|
" from google.colab import userdata\n",
|
|
" os.environ['OPENAI_API_KEY']=userdata.get('OPENAI_API_KEY')\n",
|
|
" os.environ['HF_TOKEN']=userdata.get('HF_TOKEN')\n",
|
|
" print(\"✅ Using Colab secrets\")\n",
|
|
"except:\n",
|
|
" from dotenv import load_dotenv\n",
|
|
" load_dotenv(override=True)\n",
|
|
" os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY', 'your-key-if-not-using-env')\n",
|
|
" os.environ['HF_TOKEN'] = os.getenv('HF_TOKEN', 'your-key-if-not-using-env')\n",
|
|
" print(\"✅ Using local .env file\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "4dd3aad2-6f99-433c-8792-e461d2f06622",
|
|
"metadata": {
|
|
"id": "4dd3aad2-6f99-433c-8792-e461d2f06622"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Log in to HuggingFace\n",
|
|
"\n",
|
|
"hf_token = os.environ['HF_TOKEN']\n",
|
|
"login(hf_token)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "b0a6fb86-74a4-403c-ab25-6db2d74e9d2b",
|
|
"metadata": {
|
|
"id": "b0a6fb86-74a4-403c-ab25-6db2d74e9d2b"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"openai = OpenAI()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "c830ed3e-24ee-4af6-a07b-a1bfdcd39278",
|
|
"metadata": {
|
|
"id": "c830ed3e-24ee-4af6-a07b-a1bfdcd39278"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"%matplotlib inline"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "5c9b05f4-c9eb-462c-8d86-de9140a2d985",
|
|
"metadata": {
|
|
"id": "5c9b05f4-c9eb-462c-8d86-de9140a2d985"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Let's avoid curating all our data again! Load in the pickle files:\n",
|
|
"\n",
|
|
"with open('train2.pkl', 'rb') as file:\n",
|
|
" train = pickle.load(file)\n",
|
|
"\n",
|
|
"with open('test2.pkl', 'rb') as file:\n",
|
|
" test = pickle.load(file)\n",
|
|
"\n",
|
|
"with open('validation2.pkl','rb') as file:\n",
|
|
" validation = pickle.load(file)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "e8367135-f40e-43e1-8f3c-09e990ab1194",
|
|
"metadata": {
|
|
"id": "e8367135-f40e-43e1-8f3c-09e990ab1194"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"# OpenAI recommends fine-tuning with populations of 50-100 examples\n",
|
|
"# But as our examples are very small, I'm suggesting we go with 500 examples (and 1 epoch)\n",
|
|
"\n",
|
|
"fine_tune_train = train[:500]\n",
|
|
"fine_tune_validation = train[500:550]"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"source": [
|
|
"# Weight and Biases\n",
|
|
"wandb_integration = {\"type\": \"wandb\", \"wandb\": {\"project\": \"gpt-pricer-ft\"}}"
|
|
],
|
|
"metadata": {
|
|
"id": "xvsvrdivOBCs"
|
|
},
|
|
"id": "xvsvrdivOBCs",
|
|
"execution_count": null,
|
|
"outputs": []
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"source": [
|
|
"## Helpers"
|
|
],
|
|
"metadata": {
|
|
"id": "Fr5cFugENugL"
|
|
},
|
|
"id": "Fr5cFugENugL"
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"source": [
|
|
"# A utility function to extract the price from a string\n",
|
|
"\n",
|
|
"def get_price(s):\n",
|
|
" s = s.replace('$','').replace(',','')\n",
|
|
" match = re.search(r\"[-+]?\\d*\\.\\d+|\\d+\", s)\n",
|
|
" return float(match.group()) if match else 0"
|
|
],
|
|
"metadata": {
|
|
"id": "rRF5PhHANsTN"
|
|
},
|
|
"id": "rRF5PhHANsTN",
|
|
"execution_count": null,
|
|
"outputs": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"source": [
|
|
"# Prompt\n",
|
|
"def messages_for(item):\n",
|
|
" system_message = \"You estimate prices of items. Reply only with the price, no explanation\"\n",
|
|
" user_prompt = item.test_prompt().replace(\" to the nearest dollar\",\"\").replace(\"\\n\\nPrice is $\",\"\")\n",
|
|
" return [\n",
|
|
" {\"role\": \"system\", \"content\": system_message},\n",
|
|
" {\"role\": \"user\", \"content\": user_prompt},\n",
|
|
" {\"role\": \"assistant\", \"content\": \"Price is $\"}\n",
|
|
" ]"
|
|
],
|
|
"metadata": {
|
|
"id": "-mDWirZLOTxf"
|
|
},
|
|
"id": "-mDWirZLOTxf",
|
|
"execution_count": null,
|
|
"outputs": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"source": [
|
|
"def messages_with_price(item):\n",
|
|
" system_message = \"You estimate prices of items. Reply only with the price, no explanation\"\n",
|
|
" user_prompt = item.test_prompt().replace(\" to the nearest dollar\",\"\").replace(\"\\n\\nPrice is $\",\"\")\n",
|
|
" return [\n",
|
|
" {\"role\": \"system\", \"content\": system_message},\n",
|
|
" {\"role\": \"user\", \"content\": user_prompt},\n",
|
|
" {\"role\": \"assistant\", \"content\": f\"Price is ${item.price:.2f}\"}\n",
|
|
" ]"
|
|
],
|
|
"metadata": {
|
|
"id": "ttaE6iO9SAZX"
|
|
},
|
|
"id": "ttaE6iO9SAZX",
|
|
"execution_count": null,
|
|
"outputs": []
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"source": [
|
|
"## Baseline *gpt4o*\n",
|
|
"\n",
|
|
"**Error:** $78.02 RMSLE 0.81 Hits 55.6%"
|
|
],
|
|
"metadata": {
|
|
"id": "N9hXBrSBI2_q"
|
|
},
|
|
"id": "N9hXBrSBI2_q"
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "03ff4b48-3788-4370-9e34-6592f23d1bce",
|
|
"metadata": {
|
|
"id": "03ff4b48-3788-4370-9e34-6592f23d1bce"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"def gpt_4o_frontier(item):\n",
|
|
" response = openai.chat.completions.create(\n",
|
|
" model=\"gpt-4o-2024-08-06\",\n",
|
|
" messages=messages_for(item),\n",
|
|
" seed=42,\n",
|
|
" max_tokens=5\n",
|
|
" )\n",
|
|
" reply = response.choices[0].message.content\n",
|
|
" return get_price(reply)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"source": [
|
|
"Tester.test(gpt_4o_frontier, test)"
|
|
],
|
|
"metadata": {
|
|
"id": "ymJRsQKRJAhS"
|
|
},
|
|
"id": "ymJRsQKRJAhS",
|
|
"execution_count": null,
|
|
"outputs": []
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"source": [
|
|
"## Fine tuned baseline *gpt4o*\n",
|
|
"\n",
|
|
"**Error:** $105.37 RMSLE 0.84 Hits 41.2%"
|
|
],
|
|
"metadata": {
|
|
"id": "RZAsscjePPg4"
|
|
},
|
|
"id": "RZAsscjePPg4"
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "8be4a889-81c3-42b1-a2fc-034cdc7321a6",
|
|
"metadata": {
|
|
"id": "8be4a889-81c3-42b1-a2fc-034cdc7321a6"
|
|
},
|
|
"source": [
|
|
"### Data Preprocessing\n",
|
|
"\n",
|
|
"Prepare our data for fine-tuning in JSONL (JSON Lines) format and upload to OpenAI"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "c0e5b56c-8a0b-4d8e-a112-ce87efb4e152",
|
|
"metadata": {
|
|
"id": "c0e5b56c-8a0b-4d8e-a112-ce87efb4e152"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Convert the items into a list of json objects - a \"jsonl\" string\n",
|
|
"# Each row represents a message in the form:\n",
|
|
"# {\"messages\" : [{\"role\": \"system\", \"content\": \"You estimate prices...\n",
|
|
"\n",
|
|
"def make_jsonl(items):\n",
|
|
" result = \"\"\n",
|
|
" for item in items:\n",
|
|
" messages = messages_with_price(item)\n",
|
|
" messages_str = json.dumps(messages)\n",
|
|
" result += '{\"messages\": ' + messages_str +'}\\n'\n",
|
|
" return result.strip()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "7734bff0-95c4-4e67-a87e-7e2254e2c67d",
|
|
"metadata": {
|
|
"id": "7734bff0-95c4-4e67-a87e-7e2254e2c67d"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Convert the items into jsonl and write them to a file\n",
|
|
"\n",
|
|
"def write_jsonl(items, filename):\n",
|
|
" with open(filename, \"w\") as f:\n",
|
|
" jsonl = make_jsonl(items)\n",
|
|
" f.write(jsonl)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "393d3ad8-999a-4f99-8c04-339d9166d604",
|
|
"metadata": {
|
|
"id": "393d3ad8-999a-4f99-8c04-339d9166d604"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"write_jsonl(fine_tune_train, \"fine_tune_train.jsonl\")\n",
|
|
"write_jsonl(fine_tune_validation, \"fine_tune_validation.jsonl\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "d59ad8d2-c61a-448e-b7ed-232f1606970f",
|
|
"metadata": {
|
|
"id": "d59ad8d2-c61a-448e-b7ed-232f1606970f"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"with open(\"fine_tune_train.jsonl\", \"rb\") as f:\n",
|
|
" train_file = openai.files.create(file=f, purpose=\"fine-tune\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "083fefba-fd54-47ce-9ff3-aabbc200846f",
|
|
"metadata": {
|
|
"id": "083fefba-fd54-47ce-9ff3-aabbc200846f"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"train_file"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "97df3360-0760-4422-a556-5f26d23de6dc",
|
|
"metadata": {
|
|
"id": "97df3360-0760-4422-a556-5f26d23de6dc"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"with open(\"fine_tune_validation.jsonl\", \"rb\") as f:\n",
|
|
" validation_file = openai.files.create(file=f, purpose=\"fine-tune\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "a1abb8f3-9e52-4061-970c-fcf399d8ffa3",
|
|
"metadata": {
|
|
"id": "a1abb8f3-9e52-4061-970c-fcf399d8ffa3"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"validation_file"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"source": [
|
|
"### Fine Tune the model"
|
|
],
|
|
"metadata": {
|
|
"id": "MXXCe72aKdfR"
|
|
},
|
|
"id": "MXXCe72aKdfR"
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "45421b86-5531-4e42-ab19-d6abbb8f4c13",
|
|
"metadata": {
|
|
"id": "45421b86-5531-4e42-ab19-d6abbb8f4c13"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"openai.fine_tuning.jobs.create(\n",
|
|
" training_file=train_file.id,\n",
|
|
" validation_file=validation_file.id,\n",
|
|
" model=\"gpt-4o-mini-2024-07-18\",\n",
|
|
" seed=42,\n",
|
|
" hyperparameters={\"n_epochs\": 1},\n",
|
|
" integrations = [wandb_integration],\n",
|
|
" suffix=\"pricer\"\n",
|
|
")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "aeb9de2e-542c-4e83-81c7-b6745133e48b",
|
|
"metadata": {
|
|
"id": "aeb9de2e-542c-4e83-81c7-b6745133e48b"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"openai.fine_tuning.jobs.list(limit=1)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "40d24873-8ff5-413f-b0d4-8f77c28f18e1",
|
|
"metadata": {
|
|
"id": "40d24873-8ff5-413f-b0d4-8f77c28f18e1"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"job_id = openai.fine_tuning.jobs.list(limit=1).data[0].id"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "a32aef35-4b38-436c-ad00-d082f758efa7",
|
|
"metadata": {
|
|
"id": "a32aef35-4b38-436c-ad00-d082f758efa7"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"job_id"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "a7e01247-c133-48e1-93d3-c79c399e6178",
|
|
"metadata": {
|
|
"id": "a7e01247-c133-48e1-93d3-c79c399e6178"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"openai.fine_tuning.jobs.retrieve(job_id)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "0f5150e1-b8de-485f-8eba-cf1e5b00c117",
|
|
"metadata": {
|
|
"id": "0f5150e1-b8de-485f-8eba-cf1e5b00c117"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"openai.fine_tuning.jobs.list_events(fine_tuning_job_id=job_id, limit=10).data"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "066fef03-8338-4526-9df3-89b649ad4f0a",
|
|
"metadata": {
|
|
"id": "066fef03-8338-4526-9df3-89b649ad4f0a"
|
|
},
|
|
"source": [
|
|
"### Run inference on the fine tune model"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "fa4488cb-3c17-4eda-abd1-53c1c68a491b",
|
|
"metadata": {
|
|
"id": "fa4488cb-3c17-4eda-abd1-53c1c68a491b"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"fine_tuned_model_name = openai.fine_tuning.jobs.retrieve(job_id).fine_tuned_model"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "4ff92d61-0d27-4b0d-8b32-c9891016509b",
|
|
"metadata": {
|
|
"id": "4ff92d61-0d27-4b0d-8b32-c9891016509b"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Try this out\n",
|
|
"\n",
|
|
"messages_for(test[237])"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "501a2a7a-69c8-451b-bbc0-398bcb9e1612",
|
|
"metadata": {
|
|
"id": "501a2a7a-69c8-451b-bbc0-398bcb9e1612"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"# The function for gpt-4o fine tuned\n",
|
|
"\n",
|
|
"def gpt_fine_tuned(item):\n",
|
|
" response = openai.chat.completions.create(\n",
|
|
" model=fine_tuned_model_name,\n",
|
|
" messages=messages_for(item),\n",
|
|
" seed=42,\n",
|
|
" max_tokens=7\n",
|
|
" )\n",
|
|
" reply = response.choices[0].message.content\n",
|
|
" return get_price(reply)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "843d88b4-364a-431b-b48b-8a7c1f68b786",
|
|
"metadata": {
|
|
"id": "843d88b4-364a-431b-b48b-8a7c1f68b786"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"print(test[237].price)\n",
|
|
"print(gpt_fine_tuned(test[237]))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "36bdd2c9-1859-4f99-a09f-3ec83b845b30",
|
|
"metadata": {
|
|
"id": "36bdd2c9-1859-4f99-a09f-3ec83b845b30"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"Tester.test(gpt_fine_tuned, test)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"source": [
|
|
"## gpt 4.1 base\n",
|
|
"**Error:** $70.36 RMSLE=0.522 Hits=64.4%"
|
|
],
|
|
"metadata": {
|
|
"id": "EF9S1_dBEVAc"
|
|
},
|
|
"id": "EF9S1_dBEVAc"
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"source": [
|
|
"def gpt_4_1_frontier(item):\n",
|
|
" response = openai.chat.completions.create(\n",
|
|
" model=\"gpt-4.1-2025-04-14\",\n",
|
|
" messages=messages_for(item),\n",
|
|
" seed=42,\n",
|
|
" max_completion_tokens=7\n",
|
|
" )\n",
|
|
" reply = response.choices[0].message.content\n",
|
|
" return get_price(reply)"
|
|
],
|
|
"metadata": {
|
|
"id": "QRIelvwmEZgw"
|
|
},
|
|
"id": "QRIelvwmEZgw",
|
|
"execution_count": null,
|
|
"outputs": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"source": [
|
|
"Tester.test(gpt_4_1_frontier, test)"
|
|
],
|
|
"metadata": {
|
|
"id": "2vwrkA_4Eon6"
|
|
},
|
|
"id": "2vwrkA_4Eon6",
|
|
"execution_count": null,
|
|
"outputs": []
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"source": [
|
|
"### gpt 4.1 hypertuned , extended dataset\n",
|
|
"**Error**: $67.93, RMSLE=0.47, Hits 68.8"
|
|
],
|
|
"metadata": {
|
|
"id": "L0-cps4dLg0S"
|
|
},
|
|
"id": "L0-cps4dLg0S"
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"source": [
|
|
"def gpt_4_1_hypertuned(item):\n",
|
|
" response = openai.chat.completions.create(\n",
|
|
" model=\"gpt-4.1-2025-04-14\",\n",
|
|
" messages=messages_v2(item, with_price=False),\n",
|
|
" seed=42,\n",
|
|
" temperature=0.2,\n",
|
|
" max_completion_tokens=7\n",
|
|
" )\n",
|
|
" reply = response.choices[0].message.content\n",
|
|
" return get_price(reply)"
|
|
],
|
|
"metadata": {
|
|
"id": "ZptbHZN3LilR"
|
|
},
|
|
"id": "ZptbHZN3LilR",
|
|
"execution_count": null,
|
|
"outputs": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"source": [
|
|
"Tester.test(gpt_4_1_hypertuned, test)"
|
|
],
|
|
"metadata": {
|
|
"id": "CtBfsCixLgSe"
|
|
},
|
|
"id": "CtBfsCixLgSe",
|
|
"execution_count": null,
|
|
"outputs": []
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"source": [
|
|
"## gpt-5 base\n",
|
|
"**Error:** $77.97 RMLSE=0.53 Hits=61.6% (reasoning_effort=\"minimal\"\n",
|
|
"\n",
|
|
"**Error:** $70.63 RMLSE=0.51 Hits=61.6% (reasoning_effort=\"low\""
|
|
],
|
|
"metadata": {
|
|
"id": "LQiDcxk3pNc4"
|
|
},
|
|
"id": "LQiDcxk3pNc4"
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"source": [
|
|
"def gpt_5_frontier(item):\n",
|
|
" response = openai.chat.completions.create(\n",
|
|
" model=\"gpt-5-2025-08-07\",\n",
|
|
" messages=messages_for(item),\n",
|
|
" seed=42,\n",
|
|
" reasoning_effort=\"low\",\n",
|
|
" max_completion_tokens=800\n",
|
|
" )\n",
|
|
" reply = response.choices[0].message.content\n",
|
|
" return get_price(reply)"
|
|
],
|
|
"metadata": {
|
|
"id": "nZk45Bujp4aS"
|
|
},
|
|
"id": "nZk45Bujp4aS",
|
|
"execution_count": null,
|
|
"outputs": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"source": [
|
|
"Tester.test(gpt_5_frontier, test)"
|
|
],
|
|
"metadata": {
|
|
"id": "9wx-0BT_p-j_"
|
|
},
|
|
"id": "9wx-0BT_p-j_",
|
|
"execution_count": null,
|
|
"outputs": []
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"source": [
|
|
"## gpt 4.1 fine-tuned with extended dataset"
|
|
],
|
|
"metadata": {
|
|
"id": "mOXvulp11NRS"
|
|
},
|
|
"id": "mOXvulp11NRS"
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"source": [
|
|
"### Data Preprocessing"
|
|
],
|
|
"metadata": {
|
|
"id": "hY0v7oBR1brc"
|
|
},
|
|
"id": "hY0v7oBR1brc"
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"source": [
|
|
"\n",
|
|
"def messages_v2(item, with_price=True):\n",
|
|
" system_message = \\\n",
|
|
" \"Role: You are a retail price estimator.\\n\" \\\n",
|
|
" \"Market: United States; Currency: USD.\\n\" \\\n",
|
|
" \"Scope: Predict the most likely new retail price. Ignore taxes, shipping, coupons, bundles, used/renewed.\\n\" \\\n",
|
|
" \"Output: Only a number with two decimals (e.g., 129.99). No $ sign. No words.\\n\" \\\n",
|
|
" \"Think silently; do not reveal reasoning.\"\n",
|
|
"\n",
|
|
" user_prompt = item.test_prompt().replace(\" to the nearest dollar\",\"\").replace(\"\\n\\nPrice is $\",\"\")\n",
|
|
" return [\n",
|
|
" {\"role\": \"system\", \"content\": system_message},\n",
|
|
" {\"role\": \"user\", \"content\": str({\n",
|
|
" \"query\":\"price_estimate\",\n",
|
|
" \"locale\":\"en_US\",\n",
|
|
" \"currency\":\"USD\",\n",
|
|
" \"category\":item.category,\n",
|
|
" \"description\":user_prompt,\n",
|
|
" \"brand\":json.loads(item.details).get(\"Brand\",\"Unknown\")\n",
|
|
" })\n",
|
|
" },\n",
|
|
" {\"role\": \"assistant\", \"content\": f\"Price is ${item.price:.2f}\" if with_price else \"Price is $\"}\n",
|
|
" ]"
|
|
],
|
|
"metadata": {
|
|
"id": "dAqEb7GD2HJZ"
|
|
},
|
|
"id": "dAqEb7GD2HJZ",
|
|
"execution_count": null,
|
|
"outputs": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"source": [
|
|
"messages_v2(test[237], with_price=False)\n"
|
|
],
|
|
"metadata": {
|
|
"id": "wRqFRHzE_LPm"
|
|
},
|
|
"id": "wRqFRHzE_LPm",
|
|
"execution_count": null,
|
|
"outputs": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"source": [
|
|
"def make_jsonl(items):\n",
|
|
" result = \"\"\n",
|
|
" for item in items:\n",
|
|
" messages = messages_v2(item)\n",
|
|
" messages_str = json.dumps(messages)\n",
|
|
" result += '{\"messages\": ' + messages_str +'}\\n'\n",
|
|
" return result.strip()"
|
|
],
|
|
"metadata": {
|
|
"id": "CIkBy83R1T_J"
|
|
},
|
|
"id": "CIkBy83R1T_J",
|
|
"execution_count": null,
|
|
"outputs": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"source": [
|
|
"# Convert the items into jsonl and write them to a file\n",
|
|
"\n",
|
|
"def write_jsonl(items, filename):\n",
|
|
" with open(filename, \"w\") as f:\n",
|
|
" jsonl = make_jsonl(items)\n",
|
|
" f.write(jsonl)"
|
|
],
|
|
"metadata": {
|
|
"id": "WBkFmTvb1hwI"
|
|
},
|
|
"id": "WBkFmTvb1hwI",
|
|
"execution_count": null,
|
|
"outputs": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"source": [
|
|
"write_jsonl(fine_tune_train, \"fine_tune_train_v2.jsonl\")\n",
|
|
"write_jsonl(fine_tune_validation, \"fine_tune_validation_v2.jsonl\")"
|
|
],
|
|
"metadata": {
|
|
"id": "7YbhOEZA1lhm"
|
|
},
|
|
"id": "7YbhOEZA1lhm",
|
|
"execution_count": null,
|
|
"outputs": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"source": [
|
|
"with open(\"fine_tune_train_v2.jsonl\", \"rb\") as f:\n",
|
|
" train_file_v2 = openai.files.create(file=f, purpose=\"fine-tune\")\n",
|
|
"\n",
|
|
"with open(\"fine_tune_validation_v2.jsonl\", \"rb\") as f:\n",
|
|
" validation_file_v2 = openai.files.create(file=f, purpose=\"fine-tune\")"
|
|
],
|
|
"metadata": {
|
|
"id": "n62FQj701ntK"
|
|
},
|
|
"id": "n62FQj701ntK",
|
|
"execution_count": null,
|
|
"outputs": []
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"source": [
|
|
"### Fine tune the model"
|
|
],
|
|
"metadata": {
|
|
"id": "CvqTsT3w547n"
|
|
},
|
|
"id": "CvqTsT3w547n"
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"source": [
|
|
"openai.fine_tuning.jobs.create(\n",
|
|
" training_file=train_file_v2.id,\n",
|
|
" validation_file=validation_file_v2.id,\n",
|
|
" model=\"gpt-4.1-2025-04-14\",\n",
|
|
" seed=42,\n",
|
|
" hyperparameters={\"n_epochs\": 1},\n",
|
|
" integrations = [wandb_integration],\n",
|
|
" suffix=\"pricer\"\n",
|
|
")"
|
|
],
|
|
"metadata": {
|
|
"id": "V4hVbBhi58_k"
|
|
},
|
|
"id": "V4hVbBhi58_k",
|
|
"execution_count": null,
|
|
"outputs": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"source": [
|
|
"openai.fine_tuning.jobs.list(limit=1)"
|
|
],
|
|
"metadata": {
|
|
"id": "QdUK7rYd6X7J"
|
|
},
|
|
"id": "QdUK7rYd6X7J",
|
|
"execution_count": null,
|
|
"outputs": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"source": [
|
|
"job_id = openai.fine_tuning.jobs.list(limit=1).data[0].id"
|
|
],
|
|
"metadata": {
|
|
"id": "lpOvwnI36Y7m"
|
|
},
|
|
"id": "lpOvwnI36Y7m",
|
|
"execution_count": null,
|
|
"outputs": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"source": [
|
|
"openai.fine_tuning.jobs.retrieve(job_id)"
|
|
],
|
|
"metadata": {
|
|
"id": "YYL0Thpw6ZoU"
|
|
},
|
|
"id": "YYL0Thpw6ZoU",
|
|
"execution_count": null,
|
|
"outputs": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"source": [
|
|
"openai.fine_tuning.jobs.list_events(fine_tuning_job_id=job_id, limit=10).data"
|
|
],
|
|
"metadata": {
|
|
"id": "ZjYZl4eo6jDL"
|
|
},
|
|
"id": "ZjYZl4eo6jDL",
|
|
"execution_count": null,
|
|
"outputs": []
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"source": [
|
|
"### Inference"
|
|
],
|
|
"metadata": {
|
|
"id": "ZuGvgAwX6p5N"
|
|
},
|
|
"id": "ZuGvgAwX6p5N"
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"source": [
|
|
"fine_tuned_model_name = openai.fine_tuning.jobs.retrieve(job_id).fine_tuned_model"
|
|
],
|
|
"metadata": {
|
|
"id": "YiPB6tOx6je6"
|
|
},
|
|
"id": "YiPB6tOx6je6",
|
|
"execution_count": null,
|
|
"outputs": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"source": [
|
|
"def gpt_41fine_tuned(item):\n",
|
|
" response = openai.chat.completions.create(\n",
|
|
" model=fine_tuned_model_name,\n",
|
|
" messages=messages_v2(item, with_price=False),\n",
|
|
" seed=42,\n",
|
|
" temperature=1.0,\n",
|
|
" max_tokens=7\n",
|
|
" )\n",
|
|
" reply = response.choices[0].message.content\n",
|
|
" return get_price(reply)"
|
|
],
|
|
"metadata": {
|
|
"id": "NQy00Zx065yT"
|
|
},
|
|
"id": "NQy00Zx065yT",
|
|
"execution_count": null,
|
|
"outputs": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"source": [
|
|
"Tester.test(gpt_41fine_tuned, test)"
|
|
],
|
|
"metadata": {
|
|
"id": "bUVakvwgUa0Y"
|
|
},
|
|
"id": "bUVakvwgUa0Y",
|
|
"execution_count": null,
|
|
"outputs": []
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"source": [
|
|
"###"
|
|
],
|
|
"metadata": {
|
|
"id": "ZxQokpS95n-5"
|
|
},
|
|
"id": "ZxQokpS95n-5"
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3 (ipykernel)",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.11.11"
|
|
},
|
|
"colab": {
|
|
"provenance": [],
|
|
"include_colab_link": true
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 5
|
|
} |