LLM_Engineering_OLD/week6/community-contributions/kachaje-andelaGenAi-bootcamp/finetune_llama32.ipynb

{
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "# Fine-tune Llama 3.2 1B Locally with LoRA\n",
        "\n",
        "This notebook fine-tunes Llama 3.2 1B model for product pricing using Low-Rank Adaptation (LoRA), which is memory-efficient and suitable for local training.\n",
        "\n",
        "**macOS Compatibility:** This notebook uses Hugging Face transformers and PEFT (instead of Unsloth) for better macOS compatibility. Works on CPU, Apple Silicon (Metal), or NVIDIA GPU.\n",
        "\n",
        "**Optimizations:**\n",
        "- LoRA for memory-efficient fine-tuning (only ~1% of parameters trained)\n",
        "- bfloat16 mixed precision training when available\n",
        "- Gradient checkpointing for additional memory savings\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "# Install PyTorch first (required for other packages on macOS ARM64)\n",
        "! uv pip -q install torch torchvision torchaudio\n",
        "\n",
        "# Install required packages for fine-tuning with LoRA (works on macOS without GPU)\n",
        "! uv pip -q install trl peft accelerate datasets transformers"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "# Imports\n",
        "import os\n",
        "os.environ[\"TOKENIZERS_PARALLELISM\"] = \"false\"\n",
        "\n",
        "import re\n",
        "import json\n",
        "import pickle\n",
        "from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments\n",
        "from peft import LoraConfig, get_peft_model, TaskType\n",
        "from datasets import Dataset\n",
        "import torch\n",
        "from items import Item\n",
        "from testing import Tester\n",
        "\n",
        "# Import SFTTrainer - try SFTConfig if available, otherwise use old API\n",
        "try:\n",
        "    from trl import SFTTrainer, SFTConfig\n",
        "    USE_SFT_CONFIG = True\n",
        "except ImportError:\n",
        "    from trl import SFTTrainer\n",
        "    USE_SFT_CONFIG = False\n",
        "    print(\"Note: Using older TRL API without SFTConfig\")\n"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "## Load Training Data\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "# Load the training and test datasets\n",
        "with open('train_lite.pkl', 'rb') as f:\n",
        "    train_data = pickle.load(f)\n",
        "\n",
        "with open('test_lite.pkl', 'rb') as f:\n",
        "    test_data = pickle.load(f)\n",
        "\n",
        "print(f\"Training samples: {len(train_data)}\")\n",
        "print(f\"Test samples: {len(test_data)}\")\n"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "## Convert Data to Chat Format\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "def messages_for(item):\n",
        "    \"\"\"Convert item to chat format for fine-tuning\"\"\"\n",
        "    system_message = \"You estimate prices of items. Reply only with the price, no explanation\"\n",
        "    user_prompt = item.test_prompt().replace(\" to the nearest dollar\",\"\").replace(\"\\n\\nPrice is $\",\"\")\n",
        "    return [\n",
        "        {\"role\": \"system\", \"content\": system_message},\n",
        "        {\"role\": \"user\", \"content\": user_prompt},\n",
        "        {\"role\": \"assistant\", \"content\": f\"Price is ${item.price:.2f}\"}\n",
        "    ]\n",
        "\n",
        "# Convert to chat format\n",
        "def format_for_training(items):\n",
        "    texts = []\n",
        "    for item in items:\n",
        "        messages = messages_for(item)\n",
        "        # Format as instruction following format for unsloth\n",
        "        text = f\"### System:\\n{messages[0]['content']}\\n\\n### User:\\n{messages[1]['content']}\\n\\n### Assistant:\\n{messages[2]['content']}\"\n",
        "        texts.append(text)\n",
        "    return texts\n",
        "\n",
        "train_texts = format_for_training(train_data)\n",
        "print(f\"Example training text:\\n{train_texts[0]}\")\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "# Create dataset\n",
        "train_dataset = Dataset.from_dict({\"text\": train_texts})\n",
        "print(f\"Dataset created with {len(train_dataset)} samples\")\n"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "## Load Model with LoRA Configuration\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "# Load model and tokenizer\n",
        "model_name = \"unsloth/Llama-3.2-1B-Instruct\"\n",
        "tokenizer = AutoTokenizer.from_pretrained(model_name)\n",
        "tokenizer.pad_token = tokenizer.eos_token\n",
        "tokenizer.padding_side = \"right\"\n",
        "\n",
        "# Check if CUDA is available (won't be on macOS without GPU)\n",
        "device_map = \"auto\" if torch.cuda.is_available() else None\n",
        "\n",
        "# Load model (use dtype=bfloat16 for Apple Silicon)\n",
        "model = AutoModelForCausalLM.from_pretrained(\n",
        "    model_name,\n",
        "    dtype=torch.bfloat16 if torch.backends.mps.is_available() else torch.float32,\n",
        "    device_map=device_map,\n",
        ")\n",
        "\n",
        "# Configure LoRA\n",
        "lora_config = LoraConfig(\n",
        "    task_type=TaskType.CAUSAL_LM,\n",
        "    r=16,\n",
        "    lora_alpha=16,\n",
        "    lora_dropout=0.1,\n",
        "    bias=\"none\",\n",
        "    target_modules=[\"q_proj\", \"k_proj\", \"v_proj\", \"o_proj\",\n",
        "                    \"gate_proj\", \"up_proj\", \"down_proj\"],\n",
        ")\n",
        "\n",
        "# Add LoRA adapters\n",
        "model = get_peft_model(model, lora_config)\n",
        "model.print_trainable_parameters()\n",
        "\n",
        "# Attach tokenizer to model for SFTTrainer\n",
        "model.tokenizer = tokenizer\n",
        "\n",
        "print(\"Model loaded with LoRA adapters\")\n"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "## Configure Training Arguments\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "# Configure training arguments\n",
        "training_args = TrainingArguments(\n",
        "    output_dir=\"./llama32_pricer_lora\",\n",
        "    per_device_train_batch_size=2,\n",
        "    gradient_accumulation_steps=4,\n",
        "    warmup_steps=10,\n",
        "    max_steps=100,  # Adjust based on dataset size\n",
        "    learning_rate=2e-4,\n",
        "    bf16=torch.backends.mps.is_available() or torch.cuda.is_available(),  # Use bf16 if available\n",
        "    logging_steps=10,\n",
        "    save_strategy=\"steps\",\n",
        "    save_steps=25,\n",
        "    eval_steps=25,\n",
        "    save_total_limit=2,\n",
        "    load_best_model_at_end=False,\n",
        ")\n",
        "\n",
        "print(\"Training arguments configured\")\n"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "## Initialize Trainer and Start Fine-tuning\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "# Initialize trainer\n",
        "# Model is already wrapped with PEFT (LoRA), so we use basic parameters\n",
        "trainer = SFTTrainer(\n",
        "    model=model,\n",
        "    train_dataset=train_dataset,\n",
        "    args=training_args,\n",
        ")\n",
        "\n",
        "print(\"Trainer initialized\")\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "# Train the model\n",
        "trainer.train()\n",
        "print(\"Training completed!\")\n"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "## Save the Fine-tuned Model\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "# Save the model\n",
        "model.save_pretrained(\"llama32_pricer_lora\")\n",
        "tokenizer.save_pretrained(\"llama32_pricer_lora\")\n",
        "print(\"Model saved to llama32_pricer_lora/\")\n"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "## Test the Fine-tuned Model\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "# Helper function to extract price from response\n",
        "def get_price(s):\n",
        "    s = s.replace('$','').replace(',','')\n",
        "    match = re.search(r\"[-+]?\\d*\\.\\d+|\\d+\", s)\n",
        "    return float(match.group()) if match else 0\n",
        "\n",
        "# Function to test the fine-tuned model\n",
        "def llama32_finetuned_model(item):\n",
        "    messages = messages_for(item)\n",
        "    \n",
        "    # Format the prompt\n",
        "    prompt = f\"### System:\\n{messages[0]['content']}\\n\\n### User:\\n{messages[1]['content']}\\n\\n### Assistant:\\n\"\n",
        "    \n",
        "    # Move to appropriate device\n",
        "    device = next(model.parameters()).device\n",
        "    inputs = tokenizer(prompt, return_tensors=\"pt\").to(device)\n",
        "    \n",
        "    with torch.no_grad():\n",
        "        outputs = model.generate(\n",
        "            **inputs,\n",
        "            max_new_tokens=50,\n",
        "            temperature=0.1,\n",
        "            do_sample=True,\n",
        "            pad_token_id=tokenizer.eos_token_id\n",
        "        )\n",
        "    \n",
        "    response = tokenizer.decode(outputs[0][inputs[\"input_ids\"].shape[1]:], skip_special_tokens=True)\n",
        "    return get_price(response)\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "# Test on the test dataset\n",
        "print(\"Testing fine-tuned model...\")\n",
        "Tester.test(llama32_finetuned_model, test_data)\n"
      ]
    }
  ],
  "metadata": {
    "kernelspec": {
      "display_name": "Python 3",
      "language": "python",
      "name": "python3"
    },
    "language_info": {
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "file_extension": ".py",
      "mimetype": "text/x-python",
      "name": "python",
      "nbconvert_exporter": "python",
      "pygments_lexer": "ipython3",
      "version": "3.12.10"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 2
}