{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Fine-tune Llama 3.2 1B Locally with LoRA\n", "\n", "This notebook fine-tunes Llama 3.2 1B model for product pricing using Low-Rank Adaptation (LoRA), which is memory-efficient and suitable for local training.\n", "\n", "**macOS Compatibility:** This notebook uses Hugging Face transformers and PEFT (instead of Unsloth) for better macOS compatibility. Works on CPU, Apple Silicon (Metal), or NVIDIA GPU.\n", "\n", "**Optimizations:**\n", "- LoRA for memory-efficient fine-tuning (only ~1% of parameters trained)\n", "- bfloat16 mixed precision training when available\n", "- Gradient checkpointing for additional memory savings\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Install PyTorch first (required for other packages on macOS ARM64)\n", "! uv pip -q install torch torchvision torchaudio\n", "\n", "# Install required packages for fine-tuning with LoRA (works on macOS without GPU)\n", "! uv pip -q install trl peft accelerate datasets transformers" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Imports\n", "import os\n", "os.environ[\"TOKENIZERS_PARALLELISM\"] = \"false\"\n", "\n", "import re\n", "import json\n", "import pickle\n", "from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments\n", "from peft import LoraConfig, get_peft_model, TaskType\n", "from datasets import Dataset\n", "import torch\n", "from items import Item\n", "from testing import Tester\n", "\n", "# Import SFTTrainer - try SFTConfig if available, otherwise use old API\n", "try:\n", " from trl import SFTTrainer, SFTConfig\n", " USE_SFT_CONFIG = True\n", "except ImportError:\n", " from trl import SFTTrainer\n", " USE_SFT_CONFIG = False\n", " print(\"Note: Using older TRL API without SFTConfig\")\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Load Training Data\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Load the training and test datasets\n", "with open('train_lite.pkl', 'rb') as f:\n", " train_data = pickle.load(f)\n", "\n", "with open('test_lite.pkl', 'rb') as f:\n", " test_data = pickle.load(f)\n", "\n", "print(f\"Training samples: {len(train_data)}\")\n", "print(f\"Test samples: {len(test_data)}\")\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Convert Data to Chat Format\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def messages_for(item):\n", " \"\"\"Convert item to chat format for fine-tuning\"\"\"\n", " system_message = \"You estimate prices of items. Reply only with the price, no explanation\"\n", " user_prompt = item.test_prompt().replace(\" to the nearest dollar\",\"\").replace(\"\\n\\nPrice is $\",\"\")\n", " return [\n", " {\"role\": \"system\", \"content\": system_message},\n", " {\"role\": \"user\", \"content\": user_prompt},\n", " {\"role\": \"assistant\", \"content\": f\"Price is ${item.price:.2f}\"}\n", " ]\n", "\n", "# Convert to chat format\n", "def format_for_training(items):\n", " texts = []\n", " for item in items:\n", " messages = messages_for(item)\n", " # Format as instruction following format for unsloth\n", " text = f\"### System:\\n{messages[0]['content']}\\n\\n### User:\\n{messages[1]['content']}\\n\\n### Assistant:\\n{messages[2]['content']}\"\n", " texts.append(text)\n", " return texts\n", "\n", "train_texts = format_for_training(train_data)\n", "print(f\"Example training text:\\n{train_texts[0]}\")\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Create dataset\n", "train_dataset = Dataset.from_dict({\"text\": train_texts})\n", "print(f\"Dataset created with {len(train_dataset)} samples\")\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Load Model with LoRA Configuration\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Load model and tokenizer\n", "model_name = \"unsloth/Llama-3.2-1B-Instruct\"\n", "tokenizer = AutoTokenizer.from_pretrained(model_name)\n", "tokenizer.pad_token = tokenizer.eos_token\n", "tokenizer.padding_side = \"right\"\n", "\n", "# Check if CUDA is available (won't be on macOS without GPU)\n", "device_map = \"auto\" if torch.cuda.is_available() else None\n", "\n", "# Load model (use dtype=bfloat16 for Apple Silicon)\n", "model = AutoModelForCausalLM.from_pretrained(\n", " model_name,\n", " dtype=torch.bfloat16 if torch.backends.mps.is_available() else torch.float32,\n", " device_map=device_map,\n", ")\n", "\n", "# Configure LoRA\n", "lora_config = LoraConfig(\n", " task_type=TaskType.CAUSAL_LM,\n", " r=16,\n", " lora_alpha=16,\n", " lora_dropout=0.1,\n", " bias=\"none\",\n", " target_modules=[\"q_proj\", \"k_proj\", \"v_proj\", \"o_proj\",\n", " \"gate_proj\", \"up_proj\", \"down_proj\"],\n", ")\n", "\n", "# Add LoRA adapters\n", "model = get_peft_model(model, lora_config)\n", "model.print_trainable_parameters()\n", "\n", "# Attach tokenizer to model for SFTTrainer\n", "model.tokenizer = tokenizer\n", "\n", "print(\"Model loaded with LoRA adapters\")\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Configure Training Arguments\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Configure training arguments\n", "training_args = TrainingArguments(\n", " output_dir=\"./llama32_pricer_lora\",\n", " per_device_train_batch_size=2,\n", " gradient_accumulation_steps=4,\n", " warmup_steps=10,\n", " max_steps=100, # Adjust based on dataset size\n", " learning_rate=2e-4,\n", " bf16=torch.backends.mps.is_available() or torch.cuda.is_available(), # Use bf16 if available\n", " logging_steps=10,\n", " save_strategy=\"steps\",\n", " save_steps=25,\n", " eval_steps=25,\n", " save_total_limit=2,\n", " load_best_model_at_end=False,\n", ")\n", "\n", "print(\"Training arguments configured\")\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Initialize Trainer and Start Fine-tuning\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Initialize trainer\n", "# Model is already wrapped with PEFT (LoRA), so we use basic parameters\n", "trainer = SFTTrainer(\n", " model=model,\n", " train_dataset=train_dataset,\n", " args=training_args,\n", ")\n", "\n", "print(\"Trainer initialized\")\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Train the model\n", "trainer.train()\n", "print(\"Training completed!\")\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Save the Fine-tuned Model\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Save the model\n", "model.save_pretrained(\"llama32_pricer_lora\")\n", "tokenizer.save_pretrained(\"llama32_pricer_lora\")\n", "print(\"Model saved to llama32_pricer_lora/\")\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Test the Fine-tuned Model\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Helper function to extract price from response\n", "def get_price(s):\n", " s = s.replace('$','').replace(',','')\n", " match = re.search(r\"[-+]?\\d*\\.\\d+|\\d+\", s)\n", " return float(match.group()) if match else 0\n", "\n", "# Function to test the fine-tuned model\n", "def llama32_finetuned_model(item):\n", " messages = messages_for(item)\n", " \n", " # Format the prompt\n", " prompt = f\"### System:\\n{messages[0]['content']}\\n\\n### User:\\n{messages[1]['content']}\\n\\n### Assistant:\\n\"\n", " \n", " # Move to appropriate device\n", " device = next(model.parameters()).device\n", " inputs = tokenizer(prompt, return_tensors=\"pt\").to(device)\n", " \n", " with torch.no_grad():\n", " outputs = model.generate(\n", " **inputs,\n", " max_new_tokens=50,\n", " temperature=0.1,\n", " do_sample=True,\n", " pad_token_id=tokenizer.eos_token_id\n", " )\n", " \n", " response = tokenizer.decode(outputs[0][inputs[\"input_ids\"].shape[1]:], skip_special_tokens=True)\n", " return get_price(response)\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Test on the test dataset\n", "print(\"Testing fine-tuned model...\")\n", "Tester.test(llama32_finetuned_model, test_data)\n" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.10" } }, "nbformat": 4, "nbformat_minor": 2 }