diff --git a/week7/community_contributions/lisekarimi/09_part5_llama31_8b_quant.ipynb b/week7/community_contributions/lisekarimi/09_part5_llama31_8b_quant.ipynb new file mode 100644 index 0000000..6a10dd6 --- /dev/null +++ b/week7/community_contributions/lisekarimi/09_part5_llama31_8b_quant.ipynb @@ -0,0 +1,612 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "4WDyBU0Vm0Zl" + }, + "source": [ + "# πŸ” Predicting Item Prices from Descriptions (Part 5)\n", + "---\n", + "- Data Curation & Preprocessing\n", + "- Model Benchmarking – Traditional ML vs LLMs\n", + "- E5 Embeddings & RAG\n", + "- Fine-Tuning GPT-4o Mini\n", + "- ➑️ Evaluating LLaMA 3.1 8B Quantized\n", + "- Fine-Tuning LLaMA 3.1 with QLoRA\n", + "- Evaluating Fine-Tuned LLaMA\n", + "- Summary & Leaderboard\n", + "\n", + "---\n", + "\n", + "# πŸ¦™ Part 5: Evaluating LLaMA 3.1 8B Quantized\n", + "\n", + "- πŸ§‘β€πŸ’» Skill Level: Advanced\n", + "- βš™οΈ Hardware: ⚠️ GPU required - use Google Colab\n", + "- πŸ› οΈ Requirements: πŸ”‘ HF Token\n", + "- Tasks:\n", + " - Quantize LLaMA 3.1 8B to 4-bit\n", + " - Define prediction function\n", + " - Evaluate with Tester\n", + "\n", + "We know LLaMA 3.1 won’t beat frontier models β€” but how far behind is it without any tuning?\n", + "\n", + "---\n", + "πŸ“’ Find more LLM notebooks on my [GitHub repository](https://github.com/lisekarimi/lexo)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "MDyR63OTNUJ6", + "outputId": "7e9e5b6b-d11c-45df-d774-2da5f6455d51" + }, + "outputs": [], + "source": [ + "# Install required packages in Google Colab\n", + "%pip install -q datasets torch transformers bitsandbytes accelerate matplotlib" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "-yikV8pRBer9" + }, + "outputs": [], + "source": [ + "# imports\n", + "\n", + "import re\n", + "import math\n", + "import torch\n", + "from huggingface_hub import login\n", + "from datasets import load_dataset\n", + "from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, set_seed\n", + "from google.colab import userdata\n", + "import matplotlib.pyplot as plt\n", + "%matplotlib inline" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "uuTX-xonNeOK" + }, + "outputs": [], + "source": [ + "# Google Colab User Data\n", + "# Ensure you have set the following in your Google Colab environment:\n", + "hf_token = userdata.get('HF_TOKEN')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Constants\n", + "\n", + "BASE_MODEL = \"meta-llama/Meta-Llama-3.1-8B\"\n", + "HF_USER = \"lisekarimi\"\n", + "DATASET_NAME = f\"{HF_USER}/pricer-data\"\n", + "\n", + "login(hf_token, add_to_git_credential=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "DTMo_1msQb9X" + }, + "source": [ + "## πŸ“₯ Load Dataset" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# #If you face NotImplementedError: Loading a dataset cached in a LocalFileSystem is not supported run:\n", + "# %pip install -U datasets" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 177, + "referenced_widgets": [ + "048d1b454cdc400ea5979230703770b8", + "7dd26897141a4d87bc3893bb1b1bafb3", + "c4f6e0b6237c46b98d393d27b35cabe7", + "dd525aced49e4ebe8395514601c20b20", + "69a35e3916ae488296a70687b5e890de", + "e3442871bdb5445ba86e5aa0f0ee8de9", + "8981816dd709488e9ace85e6b160892d", + "6edd1bd723324c128fec4de5f1758330", + "f77a98060e9d48dc8ac4919902ffc88d", + "ed6dfe86de2e4957b4e24df3f564c5db", + "e253ec71b5104dd291455753a160c7f1", + "b802074124bc4d7d979c28ba9e84a432", + "76abd56919414e2b8b2b4683d4cb2bd5", + "2bb62653ed2d4e86b9eb0476a0333a3a", + "58a799a559ff4f2681b586650c35b12e", + "878d6cea9b2c40d3b3b58b1c1bff902f", + "d00a41c676034c38881da90ae961e936", + "da539e354ea540509a2ea7d13dc8dc45", + "4f27fc91cbb14aa08d08b848c6689937", + "abea7cdfa8624614aca8d8ab3c07a671", + "b5ed6e3c852c49c1b904a19e05f5a90b", + "ac7225138dfa48b086b30f154f9a1111", + "70da4d47bd4c4b57a7f65d82d7a01829", + "19f8ee6f626845beaa7154efe4802045", + "13556136763b49bda041c92445ee2ad4", + "a2f5735e3c314155be432484fcf72fe7", + "81c15499cc8e4011b9bd392f660a3b6e", + "96b6a830727d48539c181343efada938", + "0ead4e0b3435492693636130d2782c25", + "50a8e3f2c06c4595931788b18f5152e9", + "cebc935dafae4d4eba105d3107c46ddc", + "f566351d5c504181b00a53c3c654090f", + "4238f42b624142fea3746fb2f03bcc2d", + "be8416b487d04d769fd93973b7fe916a", + "1bbada4a48444e60a360aba596af77b3", + "75436fb8e4eb43e4b0a309871e4d3cc1", + "e4ae815e69d3448296e4c3bcb713710e", + "72ca180ed6604f148f2f2e61ac97259a", + "c0b34963c7a446908fadfb38c958b612", + "38fa12125f024935852122d434c2cbf6", + "ff564729da354497b606bfb809ac4e33", + "9ee352287f8b4e27af617e3427cb3012", + "15522242cf72440ca8895496ad5144c4", + "e7dc05ebb11a4b30b4806c2628ec6bde", + "413fafe61f7c415a9c1c90dea56aa301", + "ac2522256e73492d9b5d0e7976d92ff5", + "53266635573042b4b94496f38915e6d4", + "8da5f5529fef4f1bb884793e503e5fc5", + "80b7529e0ad541749bf464a1d8927225", + "2203154b7c464105b12f1ef8caf410cd", + "87b8c46fa98a4940ab90422ab44d33de", + "6dee11eaa4a849bbb58488a233d3719e", + "be947f2a2b8a484daa61f45ae06c5232", + "0b1de2365ad1497ab2ebfec1be33a720", + "326453121cd84c1e95b3b5da0166c931" + ] + }, + "id": "5PVXACKHQhh4", + "outputId": "80dc4772-ea31-4752-8f97-573efaa43917" + }, + "outputs": [], + "source": [ + "dataset = load_dataset(DATASET_NAME)\n", + "train = dataset['train']\n", + "test = dataset['test']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "pGJR24lbQlRS", + "outputId": "a1bb5e66-1aa9-40b7-c361-562eafae5d8c" + }, + "outputs": [], + "source": [ + "test[0]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "vrxH6h00P9qc" + }, + "source": [ + "## πŸ¦™ Load Tokenizer and Quantized LLaMA Model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 418, + "referenced_widgets": [ + "8698049df4ce440d8a16bc34d69231e7", + "b0f74f9ed33649ebb16952d0fb0aecab", + "00241684d4f64383b032a1362d174d55", + "b74ea8552d8145d28f43cf7ad8450d0b", + "6838953f363945d79e079c12aeb2232b", + "57c4a682571e401f9fec996603fa587d", + "e2b93774cb4a4648a4210c7693864ccc", + "766c8e2406ce407faf3489229dada75a", + "a52ea6c6672b4ab2bae4f669dc45deb6", + "381094cbfef64531a58df85e6d453847", + "a6a14894a06a4d4b839f3b791cfadd34", + "b43b118cae174c3c810f08c2249b80dc", + "6502e59cbd344bf6be966923bb3e38fa", + "f86fcf5f6bf24ce399c5d64dc83c3690", + "46598b397fbc4461bb83ad45000e5569", + "dd307f2b56824c6c91b8fe1c028a1704", + "b87577323cb44a2f9f3cc0a8219eadf4", + "0ed52815268a4d90a224029e9e23e09f", + "fafd3011a3cb4e0099a0db77caf843aa", + "89bb151875e349118677c9677bec4137", + "b8722d98e81d4e3494c9c0b8b01319d4", + "b8f661dd658e45a49b5bac2bdf7f3b78", + "f031fadfbba14031a944c351fd99b032", + "a6bf09a98289481abb0c9882702eb575", + "09d54c271e55463495cb9b617d2ec41d", + "32bdcc7f7efd47679258f398184dd0ab", + "0f090959768e40aa8fabfacdf772d8df", + "e40a9c85694c446d84d3493274138178", + "ceb1bbb613f0438aa6996d5551f713f3", + "59b59d4040e04b65a66f578160d13d43", + "079ab8a1fae64a0782ea8035f494f2fd", + "1e425099c1044c14986386e5a4ce0b48", + "1e3e028ec53448c691abb2cfad4ffd8c", + "d2142e9de5ea4dd2b8d00b56354352c3", + "cf1d7a58189b4a5bbe6d005de998548c", + "5b6e10b9e5a146be85e519c1bc476fcd", + "6d9feded70b84654ab79f9d13b065c83", + "ec5afa847d364fbc974b35d821ccd931", + "fa392b34ae8647668e94aca22c1a5edb", + "4d315c92d7c84191a165218d403c0f8d", + "c4279579368841b99a46f529c55125e9", + "572bb151402e4940a8d7c92156f9711b", + "d4536337378b4146b054371b18f83fdc", + "14ddae4a15d74c80b1712443853e3f96", + "46e827e5d7a94a619536ff08127b6172", + "ad7977cc642e4cae890e52d03d753788", + "73ba16b54d314d94aaeebea2ba291a94", + "1bc2120a87ab4ad99798b1706342bd89", + "4c4772ace8c246b9a5c8f870ed27c11b", + "d179e366c02f4bb2897cc9f531955e5e", + "1134fd00384740d0a39b6de241ca17cd", + "5a120718aa934959bd50cf4864b137b4", + "46917a8997f942fea0aaf00a95459f93", + "6076b184b66c4d50a91bc477c8eea53e", + "2538bc7fc4594363934266f25bcd52bc", + "f6144dfa2a20416b9e5c28615a5ff129", + "35941d364234488da6fcc0997a5cccf7", + "8c5e160cc4434ca99f694f5e195a2005", + "fa82a4c6e8fc4591aa5652d7d95c6e40", + "2a82034aebcb4e3fbaff825ca59817d0", + "edcabc56841a4ba68ee53385fe2dc0f6", + "e8bf66aa640e433d8d890ae541b21dcd", + "eb7b76b25ce44dc1b8eba7cac8bc9671", + "c9648aceda71470284f6ed7ce2add462", + "7df6ddb46ae3419fbc3fac488eb8a6b6", + "3b2aa7ca49e4451fbfc65560a2d3d43d", + "6af2831aa4e641568d72df6d13fc074a", + "732cb67ef916489298655df845773934", + "cc96573e39e148dbac8b0bd299f0f0b0", + "4e9cbcdc1cfb495a850be45cf752d3c4", + "b811921bdbe84b0dbd9add0f69271ef0", + "a1053fdea18348119949b326f3a12651", + "90c10be928c54821aacf11705c0513ff", + "a543366ec93c486bb2d28d1ff9567197", + "97ff48d2660444a1a7503e735e2b2a55", + "1fd84a85c98246adb2e18e41c8a9d88f", + "fed4a63b10ea4788af8cd181d8d24863", + "8bd5f65dda734db1a253897f85428d4c", + "2744cec152a44fd483f5cdd8f4de8c70", + "47780d4dab77454ab898f6707d8d4168", + "6653b71e07bc488ebbb4ed5728564ccd", + "59d44fcd08114cb4aaeea768b1438bcf", + "18cf08eb051d48c9a5c0b6b827507b7f", + "fee935f9fb354a67a37d42641ff0d81e", + "22ec450031234856a304ccee34d452f4", + "4944567015cc46be83a8524c0542722a", + "b80d78f92da64255991b4fcfde98b1d4", + "e239b0fe8301409f9dd7e5e801949ec2", + "6e7533e6b43c4f1dbb1e0421b99fdc47", + "4be15c8712e340b3b9d9a3bd1c7c7516", + "337d98c0886948929a48411422a81ff8", + "83d8d49cf93c4af7bb3e3cfa3234c6c6", + "a1fb4ca7292e4cdc85b522248fdddaf6", + "ce26e74cc006450ca4e44bee2d14d80c", + "c01c7b35b1914ae681550421c0035a8c", + "c7966f356f80422abb3dcb45dbc541db", + "a86bb39581e1430a8314a616951af75e", + "446c7ef56bcc437388d4a99859c1b9fb", + "5c16ffe6a5504f2585aa6bc3132ff2ee", + "9d466ef4939c43f2846f22a5a21e5cd0", + "bec4a9e185074743848c04c4aff12037", + "cff028485bae4d96b4f7a48b738f6b61", + "c02af33357e64469aeb01a7af5a9ab37", + "fd0ff0c9933d4238a373c286f8e1dd5d", + "c517e6db93f04398b9a3ccc86e090499", + "e75b68d16bc443e39974922342952de9", + "0536d41437f54df38624a7d290e45325", + "eec3717367b348a388bb76eb6482ce25", + "e16f1ef5ee06493fac2d5871806a3b3a", + "24f7575f0f47498480b2a2f79f0d4ce5", + "c17fe53a4a2b4266a3cbd24c9f145cde", + "b4d715f23ada4ee48fdfd9af463f7124", + "7d9102b6a7b44e14809ecf8fa421ee70", + "8d640aa311f34b33b0967e128c138130", + "3680065e53494bad98e74fd7c81185dd", + "11fb0bacbcc44352b3b25d9f0923c332", + "7319ccdb3e3349328d6f9b4bb5445776", + "35017cc6cb484eeaa12714532e872f99", + "ae22d146f6f24981bde97896ad3d8b14", + "3feeca46c382431c9868e4852ca04d49", + "0e3b239635704ab391f1801b762b7f93", + "90d35d2eda00413eba027093309f6c31", + "d61446d3664a455baaada9761a1715be", + "92b7d7f81ebc441d8e6d6e20477aa37c", + "8b5b230489104f6bba63720fa9fad0ae", + "0afc992c54ec4a10a7f9fd3e45fa7761", + "4fbbb9ba6f4e44d6b2ccc5197dad5488", + "18e5f93ef3b64301b7c1548d17843d64", + "ee997f8eeccc4dd98aea71b930531cf5", + "e11a6cfa4615457090d4c87815fdb716", + "4d74f0ec93f54e09a22c3cb93a042570", + "31cdf14402f34270bdc1b1efd2a0d011" + ] + }, + "id": "TAit9IzsQLcc", + "outputId": "176a77ad-0245-4a3d-b9f3-e139de359da7" + }, + "outputs": [], + "source": [ + "quant_config = BitsAndBytesConfig(\n", + " load_in_4bit=True,\n", + " bnb_4bit_use_double_quant=True,\n", + " bnb_4bit_compute_dtype=torch.bfloat16,\n", + " bnb_4bit_quant_type=\"nf4\"\n", + ")\n", + "\n", + "tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code=True)\n", + "tokenizer.pad_token = tokenizer.eos_token\n", + "tokenizer.padding_side = \"right\"\n", + "\n", + "base_model = AutoModelForCausalLM.from_pretrained(\n", + " BASE_MODEL,\n", + " quantization_config=quant_config,\n", + " device_map=\"auto\",\n", + ")\n", + "base_model.generation_config.pad_token_id = tokenizer.pad_token_id\n", + "\n", + "print(f\"Memory footprint: {base_model.get_memory_footprint() / 1e9:.1f} GB\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## πŸ€– Prediction" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "1FfMJ2JbzEr3" + }, + "outputs": [], + "source": [ + "def extract_price(s):\n", + " if \"Price is $\" in s:\n", + " contents = s.split(\"Price is $\")[1]\n", + " contents = contents.replace(',','').replace('$','')\n", + " match = re.search(r\"[-+]?\\d*\\.\\d+|\\d+\", contents)\n", + " return float(match.group()) if match else 0\n", + " return 0" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "CgN8eRttRAZx" + }, + "outputs": [], + "source": [ + "def model_predict(prompt):\n", + " set_seed(42)\n", + " inputs = tokenizer.encode(prompt, return_tensors=\"pt\").to(\"cuda\")\n", + " attention_mask = torch.ones(inputs.shape, device=\"cuda\")\n", + " outputs = base_model.generate(inputs, max_new_tokens=4, attention_mask=attention_mask, num_return_sequences=1)\n", + " response = tokenizer.decode(outputs[0])\n", + " return extract_price(response)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "hpdEk2-FW6aT", + "outputId": "f8913c56-1a8f-4a13-9084-21acfdb64ceb" + }, + "outputs": [], + "source": [ + "model_predict(test[0]['text']), test[0]['price']" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "auFzPUJKTLln" + }, + "source": [ + "## πŸ§ͺ Run Evaluation" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "jnoI1EWGTUau" + }, + "outputs": [], + "source": [ + "# Helper class for evaluating model predictions\n", + "\n", + "GREEN = \"\\033[92m\"\n", + "YELLOW = \"\\033[93m\"\n", + "RED = \"\\033[91m\"\n", + "RESET = \"\\033[0m\"\n", + "COLOR_MAP = {\"red\":RED, \"orange\": YELLOW, \"green\": GREEN}\n", + "\n", + "class Tester:\n", + "\n", + " def __init__(self, predictor, data, title=None, size=100):\n", + " self.predictor = predictor\n", + " self.data = data\n", + " self.title = title or predictor.__name__.replace(\"_\", \" \").title()\n", + " self.size = size\n", + " self.guesses = []\n", + " self.truths = []\n", + " self.errors = []\n", + " self.sles = []\n", + " self.colors = []\n", + "\n", + " def color_for(self, error, truth):\n", + " if error<40 or error/truth < 0.2:\n", + " return \"green\"\n", + " elif error<80 or error/truth < 0.4:\n", + " return \"orange\"\n", + " else:\n", + " return \"red\"\n", + "\n", + " def run_datapoint(self, i):\n", + " datapoint = self.data[i]\n", + " guess = self.predictor(datapoint[\"text\"])\n", + " truth = datapoint[\"price\"]\n", + " error = abs(guess - truth)\n", + " log_error = math.log(truth+1) - math.log(guess+1)\n", + " sle = log_error ** 2\n", + " color = self.color_for(error, truth)\n", + " # title = datapoint[\"text\"].split(\"\\n\\n\")[1][:20] + \"...\"\n", + " self.guesses.append(guess)\n", + " self.truths.append(truth)\n", + " self.errors.append(error)\n", + " self.sles.append(sle)\n", + " self.colors.append(color)\n", + " # print(f\"{COLOR_MAP[color]}{i+1}: Guess: ${guess:,.2f} Truth: ${truth:,.2f} Error: ${error:,.2f} SLE: {sle:,.2f} Item: {title}{RESET}\")\n", + "\n", + " def chart(self, title):\n", + " # max_error = max(self.errors)\n", + " plt.figure(figsize=(12, 8))\n", + " max_val = max(max(self.truths), max(self.guesses))\n", + " plt.plot([0, max_val], [0, max_val], color='deepskyblue', lw=2, alpha=0.6)\n", + " plt.scatter(self.truths, self.guesses, s=3, c=self.colors)\n", + " plt.xlabel('Ground Truth')\n", + " plt.ylabel('Model Estimate')\n", + " plt.xlim(0, max_val)\n", + " plt.ylim(0, max_val)\n", + " plt.title(title)\n", + "\n", + " # Add color legend\n", + " from matplotlib.lines import Line2D\n", + " legend_elements = [\n", + " Line2D([0], [0], marker='o', color='w', label='Accurate (green)', markerfacecolor='green', markersize=8),\n", + " Line2D([0], [0], marker='o', color='w', label='Medium error (orange)', markerfacecolor='orange', markersize=8),\n", + " Line2D([0], [0], marker='o', color='w', label='High error (red)', markerfacecolor='red', markersize=8)\n", + " ]\n", + " plt.legend(handles=legend_elements, loc='upper right')\n", + "\n", + " plt.show()\n", + "\n", + "\n", + " def report(self):\n", + " average_error = sum(self.errors) / self.size\n", + " rmsle = math.sqrt(sum(self.sles) / self.size)\n", + " hits = sum(1 for color in self.colors if color==\"green\")\n", + " title = f\"{self.title} Error=${average_error:,.2f} RMSLE={rmsle:,.2f} Hits={hits/self.size*100:.1f}%\"\n", + " self.chart(title)\n", + "\n", + " def run(self):\n", + " self.error = 0\n", + " for i in range(self.size):\n", + " self.run_datapoint(i)\n", + " self.report()\n", + "\n", + " @classmethod\n", + " def test(cls, function, data):\n", + " cls(function, data).run()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 692 + }, + "id": "1wA5uVgpTWLC", + "outputId": "5a597437-50c8-419c-c1da-af0166dabe0f" + }, + "outputs": [], + "source": [ + "Tester.test(model_predict, test)" + ] + }, + { + "attachments": { + "image.png": { + "image/png": "" + } + }, + "cell_type": "markdown", + "metadata": { + "id": "lSfPbebIq2Ml" + }, + "source": [ + "![image.png](attachment:image.png)\n", + "\n", + "Alright β€” now that we know where things stand, it’s time to shake things up.\n", + "\n", + "Can QLoRA fine-tuning unlock the true power of LLaMA 3.1?\n", + "\n", + "πŸ‘€ Let’s find out... in the [next notebook](https://github.com/lisekarimi/lexo/blob/main/09_part6_ft_llama_qlora.ipynb)" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "gpuType": "T4", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.11" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/week7/community_contributions/lisekarimi/09_part6_ft_llama_qlora.ipynb b/week7/community_contributions/lisekarimi/09_part6_ft_llama_qlora.ipynb new file mode 100644 index 0000000..af4b5e7 --- /dev/null +++ b/week7/community_contributions/lisekarimi/09_part6_ft_llama_qlora.ipynb @@ -0,0 +1,907 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# πŸ” Predicting Item Prices from Descriptions (Part 6)\n", + "---\n", + "- Data Curation & Preprocessing\n", + "- Model Benchmarking – Traditional ML vs LLMs\n", + "- E5 Embeddings & RAG\n", + "- Fine-Tuning GPT-4o Mini\n", + "- Evaluating LLaMA 3.1 8B Quantized\n", + "- ➑️ Fine-Tuning LLaMA 3.1 with QLoRA\n", + "- Evaluating Fine-Tuned LLaMA\n", + "- Summary & Leaderboard\n", + "\n", + "---\n", + "\n", + "# βš™οΈ Part 6: Fine-Tuning LLaMA 3.1 with QLoRA\n", + "\n", + "- πŸ§‘β€πŸ’» Skill Level: Advanced\n", + "- βš™οΈ Hardware: ⚠️ GPU required - use Google Colab (A100)\n", + "- πŸ› οΈ Requirements: πŸ”‘ HF Token, wandb API Key ([Weights & Biases](https://wandb.ai))\n", + "- Tasks:\n", + " - Load and split dataset (Train/validation); set up [Weights & Biases](https://wandb.ai) logging\n", + " - Load quantized LLaMA 3.1 8B and tokenizer\n", + " - Prepare data with a collator for fine-tuning\n", + " - Configure QLoRA (LoRAConfig), training settings (SFTConfig), and tune key hyperparameters\n", + " - Fine-tune and push best model to Hugging Face Hub\n", + "\n", + "⚠️ I attempted to fine-tune the model on the full 400K dataset using an A100 on Google Colab, but it consistently crashed. So for now, I’m training on a 20K subset to understand the process, play with hyperparameters, track progress in Weights & Biases, and push the best checkpoint to the Hub.\n", + "\n", + "⏱️ Training on 20,000 examples took over 2 hours.\n", + "\n", + "The full model fine-tuned on the complete 400K dataset is available thanks to our instructor, [Ed](https://www.linkedin.com/in/eddonner) β€” much appreciated! \n", + "We’ll dive into that model in the next notebook β€” **stay tuned** πŸ˜‰\n", + "\n", + "---\n", + "πŸ“’ Find more LLM notebooks on my [GitHub repository](https://github.com/lisekarimi/lexo)" + ], + "outputs": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "MDyR63OTNUJ6", + "outputId": "525372ce-f614-44f1-b894-80e289958197" + }, + "outputs": [], + "source": [ + "# Install required packages in Google Colab\n", + "%pip install -q datasets transformers torch peft bitsandbytes trl accelerate wandb" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "-yikV8pRBer9" + }, + "outputs": [], + "source": [ + "# imports\n", + "\n", + "import os\n", + "import torch\n", + "import wandb\n", + "from google.colab import userdata\n", + "from datetime import datetime\n", + "from datasets import load_dataset\n", + "from huggingface_hub import login\n", + "from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, EarlyStoppingCallback\n", + "from peft import LoraConfig\n", + "from trl import SFTTrainer, SFTConfig, DataCollatorForCompletionOnlyLM" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Google Colab User Data\n", + "# Ensure you have set the following in your Google Colab environment:\n", + "hf_token = userdata.get('HF_TOKEN')\n", + "login(hf_token, add_to_git_credential=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "B48QsPsvUs_x" + }, + "source": [ + "## πŸ”€ Load Dataset from HF and Split into Train/Validation" + ], + "outputs": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# #If you face NotImplementedError: Loading a dataset cached in a LocalFileSystem is not supported run:\n", + "# %pip install -U datasets (for Google Colab)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 177, + "referenced_widgets": [ + "6f1f8dca2a334818a36fae380818001e", + "6d3be1ece4a949d3b8d3736db02bcb5c", + "c8c6bbacfe254c539f4acda8cdd5c04d", + "db87c136ff15430892aa75fa47521b0c", + "1d56af1140034021b2aecc5df846e499", + "6238783102084e0c99626bf948ff5bb6", + "f523b67e652049f7b13131d2750325bb", + "f03cc2cf18c140c8b4a076ab99ac86e3", + "472bb957b0e149df8ef0c26c3a3ffc19", + "86dfcc161f2d41a7a33041848766d091", + "6a7ed9e79ebb4f9c9962d08c78b424ca", + "efc4817d5f734852a844640ebe7eceed", + "0b473a8e944c4b028f51f53f62b72deb", + "1fd89859568440f58f3ab56f32183dd4", + "2e4bd8853acc4faa92e461210df2c689", + "3fb588f271db4b7abb9a3631582cc7d6", + "8f9c00ca63ca47e9873ec2a743fa1512", + "afdae504b36845b9a98874cced112721", + "8afd0ddfdeca43b59207a8b35a35e13c", + "0be7a6fdb206420d88b2b2e45a37432c", + "00f0983c1d204862b589011100297ffe", + "8c7de85bcec742ec85f1e8b854351056", + "5847c75b6dd74bc1b13116d91431ccf2", + "bcb0ad86493f45848895c02c0b9deaf6", + "18d70754531248b1ab22e1fd0df061ae", + "028d806f909f42e2b6a7ec630f6e3cb5", + "ff00d3192c734b398f779c7fffde57c8", + "55388dcb89f84c7ebe7f5f7051f2d98b", + "d3cab2b162a740fb82f78f030ea32b45", + "cea0149336be4c92952bacb8aa820926", + "6b560f8a028c4ba39896fd97f48f18ad", + "2a3ed922dab44648b6d6ed63e21c549d", + "885e1f4b9c3d45d5acd8d0a368ca557d", + "73e42dca7c4b455f8be4b34236e6ced2", + "c36aec28025e4baab8a3c4a293297f15", + "7569e26e1e2b46e4a7018e1bd2bc92d5", + "9f5795d223e74f1e8e49709ec1e4ddf1", + "5638ccb893164fc79980eb48d06909f9", + "70a528a0a08e4931b845ecc0992e07d6", + "669bbecd55804849bff5a850438d905d", + "245de1eaef2840b69e6c82afee68b4dc", + "ad57405b8f474c0aa92833f83dde70e8", + "cb3391329a7f4d0b93f5efffb9b0dcfe", + "cb0007dffa284be8aff41efacdfc31cb", + "c7de048747a24f9a9ce85396b87b8250", + "066b3f278ec24b299504cea66b3c3e63", + "0e1069c5bf644531902c51283a6d68e1", + "06bd7477f9fe45d0ad4138fc21bd29dc", + "adb68e7a8bea4b77b960e412c67a6286", + "39ec099d38f04f4e8ea334d0c5335e2f", + "044bf34d53024427801e24fbca808dc1", + "e3d2839112ff4b7f9ab5bc04900ff522", + "f620e7774fa04ed0a88d2f78d2243906", + "7a12c0d7b32b445f978809c9aee2c62d", + "5a230441445746d59ea8a10a4d5bb467" + ] + }, + "id": "XEE1FrSIh-EF", + "outputId": "8cd19745-2f6f-41e0-96dd-5a2f72ac3a63" + }, + "outputs": [], + "source": [ + "HF_USER = \"lisekarimi\" # your HF name here!\n", + "\n", + "DATASET_NAME = f\"{HF_USER}/pricer-data\"\n", + "dataset = load_dataset(DATASET_NAME)\n", + "train = dataset['train']\n", + "test = dataset['test']\n", + "split_ratio = 0.1 # 10% for validation\n", + "\n", + "##############################################################################\n", + "# Optional: limit training dataset to TRAIN_SIZE for testing/debugging\n", + "# Comment the two lines below to use the full dataset\n", + "TRAIN_SIZE = 20000\n", + "train = train.select(range(TRAIN_SIZE))\n", + "##############################################################################\n", + "\n", + "total_size = len(train)\n", + "val_size = int(total_size * split_ratio)\n", + "\n", + "val_data = train.select(range(val_size))\n", + "train_data = train.select(range(val_size, total_size))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "lUPNqb2Bse21", + "outputId": "a3d09c8f-ce5a-46b0-e1b0-b4471a659f69" + }, + "outputs": [], + "source": [ + "print(f\"Train data size : {len(train_data)}\")\n", + "print(f\"Validation data size: {len(val_data)}\")\n", + "print(f\"Test data size : {len(test)}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "wixbM-VeVfsR" + }, + "source": [ + "## πŸ› οΈ Hugging Face Configuration" + ], + "outputs": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 35 + }, + "id": "OixVUG06VmZk", + "outputId": "3cb523e0-fd03-4a18-913b-c22fa90e3bdd" + }, + "outputs": [], + "source": [ + "PROJECT_NAME = \"llama3-pricer\"\n", + "\n", + "# Run name for saving the model in the hub\n", + "\n", + "RUN_NAME = f\"{datetime.now():%Y-%m-%d_%H.%M.%S}-size{total_size}\"\n", + "PROJECT_RUN_NAME = f\"{PROJECT_NAME}-{RUN_NAME}\"\n", + "HUB_MODEL_NAME = f\"{HF_USER}/{PROJECT_RUN_NAME}\"\n", + "HUB_MODEL_NAME" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "1-t1nGgnVTU4" + }, + "source": [ + "## πŸ› οΈ wandb Configuration" + ], + "outputs": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Load from Colab's secure storage\n", + "wandb_api_key = userdata.get('WANDB_API_KEY')\n", + "\n", + "# Load from environment variables (.env file) if running Locally (GPU setup)\n", + "# wandb_api_key = os.getenv('WANDB_API_KEY')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "os.environ[\"WANDB_API_KEY\"] = wandb_api_key\n", + "wandb.login()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 156 + }, + "id": "yJNOv3cVvJ68", + "outputId": "0c03623e-6887-49e3-8989-bbe45dfc5d35" + }, + "outputs": [], + "source": [ + "# Configure Weights & Biases to record against our project\n", + "\n", + "LOG_TO_WANDB = True\n", + "\n", + "os.environ[\"WANDB_PROJECT\"] = PROJECT_NAME\n", + "os.environ[\"WANDB_LOG_MODEL\"] = \"checkpoint\" if LOG_TO_WANDB else \"end\"\n", + "os.environ[\"WANDB_WATCH\"] = \"gradients\"\n", + "\n", + "if LOG_TO_WANDB:\n", + " wandb.init(project=PROJECT_NAME, name=RUN_NAME)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "qJWQ0a3wZ0Bw" + }, + "source": [ + "## πŸ“₯ Load the Tokenizer and Model" + ], + "outputs": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 418, + "referenced_widgets": [ + "1b88f6d4010f4451a58abe2c46b74f62", + "139758ba39964f49b65eb67182eef68e", + "9c138d12dcb644fe9b72bd9eb5d26637", + "3bf8626162904a15932480ddbcea0ebd", + "a919a41b53604ccd91331d3f713e1310", + "5b8cdfe01f9a4c248e3de30442411ad4", + "e14d38a4c3e04d68ac30d475b0db1a73", + "dadfd3c2a521420890092be265c0aa50", + "761e88b179104dbbb6455ba81bd1f833", + "11f5b4df0c7344ba9e188f4eca82886f", + "125aa3f0dbd744eb82f8e4de94199736", + "6ca21586e6fc4a608adedba7889eadb5", + "023eb92e8a2b4323bfd12582e3c23962", + "c7c76b9845174e9687107595df27c050", + "78d4a28e03db4775b6e8e071c0b02d5d", + "8483c625762c49679877a37ab0ddcef9", + "1df5f6fe2fc04e60bfcb1f78689824ba", + "add10c416e334928af303d51dfd745c6", + "5e9e9dac85014292b94d347cc4bad3fe", + "d665aa6480624ab697f4e426b51d59de", + "03cce0d3f3a443fc808915b101576e4b", + "f15714023f234c39863b34d1a3721a8e", + "8f7a48d803eb4d2182c9da07af743ac7", + "74892e7b343d410bbbef60c64a823a9a", + "d6a70560831144e39dc9762d397f4c90", + "9b969f7fbcdc491cab71aac42761cd2a", + "d31f9443d1c646309c7a5e1ec39ffc0e", + "0f5a81846ab143bebf6ec422cda3f145", + "f0b05f3f7f37414c9d09470c94e304d7", + "d18784692c9c4ca99e277e6ed51e2bf1", + "f58addfac7c3438a90ebf10c88348d56", + "451deac2eeec45598590579340be0d4b", + "848e0651caf34ef288cca451e3d11274", + "5adf041222f843429c3a9f1b99becfed", + "a4764f36570b4752a1ec4392d2f0146c", + "511a4c6a898346acac9d98fd3a7cdf2c", + "26da7435a2614201a9e5b8087749f0e0", + "6054fa015ae44659beb7473c084c7b5b", + "3b9fc447a9ae4506a1edaf0fa449d9d5", + "6acef8f1820545ef90b22d90ac80427d", + "2a5cbad0b8fd45dc9ee25715b1015aef", + "86a9428f39be4d65a1e922bd9afb3800", + "96d919a1a7f14e91b8e6c91d855e36d5", + "82d7484aa2774015b7ea18d933afa9b6", + "b9d2d4f2c44a4d7cad2b3803c7f6e7be", + "9f3a176a6ae6426a8c1567a835da8680", + "006763d2301f4205a588adf5c19876a0", + "b44eb6596c3441bbaab288030f953a04", + "bf91666a0c054c79acb03d2e1bb38c37", + "f0185f1b4b23445c920a873eb63a9372", + "8e1ac15b677d4c21ad42ea1dda68fe05", + "87746d8d6d3d413ebb46b4e12fb74cc8", + "bb5ea1e92c434a46838f943648de87bd", + "1abcfcba332b40eb901d1331ed84f9bd", + "52fa5fcc629742619fa3105f73d90767", + "1bcc2d5771034c2dbc372031e83a2384", + "221cfaa2a5db4cf1ac399363c3589025", + "793f9bdc92a545519dd3279023e4ab50", + "55e25f5cc12f44f3a39fae501fccd060", + "59463b5e6286483394dedb602991ac95", + "fc95344ea44d40f28702360542afcff7", + "ffb3af537d6c41548ad88027505b04d6", + "6afcf0f6131d4dddbeda796e9c0c5bc5", + "93f65b3bc071453f86fe8f0f6c17d8fd", + "2ac9926ee4644232b43d84cfa95c584d", + "0c5a7738132b4f0f8b4810333b37c588", + "99d41ffa37134be9a57fe5e50a59b67d", + "50e71304ab4f42c29f1994fed9b595b8", + "76b4b0d63e524eb783429169a25be74e", + "441cfadbe4b446f4b61391b7be4d2865", + "6751f0c35b634d7c9b06c4e41f9ff851", + "6a5dc276bbf64bf9b5a99751068ee228", + "b3ac6055014642a285435f877d5651f5", + "e9137600b29c4ecaad4ef8bca5fd5f91", + "634afb9c1b8c4e29b3ec7b76a1108ae4", + "6be0ac91035548fbbe778e3d7fd58e7e", + "e8e9d5c979ac4afba526e38b6d0851be", + "a4ae8ca9c0e7478fbad3b9ed67bc21a2", + "faf3a64e316a43ddbac8ba14573c4eb4", + "a395885e39434f9f98246d0fb1c94c8f", + "d13552c90ead4804a4d5a21121f25536", + "c25b94002c2246a9aa7f6ed1e4a22cfa", + "e3892cf602cb4a49948f26cae1e7644c", + "bc290a324a7147c5b6a722acb41ed05a", + "2b556f5aa6324958ac6fe36bddf17909", + "67c6a0534b3a4345b9c11af1bffdfbf0", + "d767921bb23c485396282cb79a4d1836", + "d598468ad8f94146976f70d873f0b56d", + "b547888cd5494b21911b7d457ab6fbac", + "28362e43274848109c2624e5668942b0", + "7a27fc65bc0b44ce9bd959f4be13514d", + "73bc97e6d9cc4ccd8d134092ce970026", + "c042bf08ab23410098e6d16e837d19ce", + "d2930ad2c08748d0883bb77c68acf940", + "c2a1291730874e8e94232c0d51575f81", + "cb92871b11a0410eb295cc323e5872a7", + "150a5ce5d8124b0eb9e44d8715b8b1ab", + "7a6f05ad1f2e483dbcdca102c66530b0", + "626a29aee42e4e6d8c18d8ea5889734a", + "c549ca0548d04a7d8749a0842c4aa62b", + "958c0ff0f47f4c0fa4e2085f5243d84f", + "a8171febcac94a4b902ff737592f3f47", + "22630cdb7d6f4975bc31cc189987573d", + "2f8a9ccee6ea4cdd8c8c225575cae0ce", + "e40f81c5c4334accbca947964146d238", + "d6849da8e89546469188dc047c66ea25", + "8a67d8a2ac0a4fd7a41aa5c890049525", + "5bf18445be0e46e087cbcd377ccfffbe", + "72b2020c9479471681ce0f42898cfe1c", + "c114fd62eb4b4fdca94654668c8f2374", + "401580df26fc40abb2b774c3d9684921", + "e756b825b211476994a69fb65f4bbf7c", + "b2c26cf10e5a4d4fa8961f5c9cca18ce", + "c288256c73dd44d08916db4e9cf989f0", + "250a72e9650845d2b274bc3c157439f8", + "94281c7e5be049c1a9f3dfa082805133", + "f004f9f743ae4229aa90c92abba6ded6", + "bd8ca5b8aaed4809a93f553d5cb4a887", + "4cec4c2d73de4d52b2143082645536ac", + "893b96616a0e47bfaa0434e10eca1341", + "74e7d88dd4894894ac2c16fdfd29233b", + "9e1f1e4288df407fa03415664dc361d5", + "81dc3f390b9a49f8b1be5c43580b070d", + "917a225a9bb74f8ab034dcdcee3c7247", + "bc6c698857ce4f8eabc1571ba0ff0edf", + "e9ae1c247ae5409f9da4db84ce71a6e3", + "55071660223e4022a6a7836572077c0c", + "8364e661011743af9fd40dabc5a7dfe4", + "ac65442e0d5e43e2998d7c700573228a", + "666f3434ae8a495f8ada8fedb50b7051", + "1977e9f07f104faead7dfcfa8aaed6f2", + "ebe2257c07f345fea72f162542a45142" + ] + }, + "id": "R_O04fKxMMT-", + "outputId": "29aa1cf7-2a2e-492e-adc9-cd0a5bfb123e" + }, + "outputs": [], + "source": [ + "BASE_MODEL = \"meta-llama/Meta-Llama-3.1-8B\"\n", + "\n", + "quant_config = BitsAndBytesConfig(\n", + " load_in_4bit=True, # Reduce the precision to 4 bits\n", + " bnb_4bit_use_double_quant=True,\n", + " bnb_4bit_compute_dtype=torch.bfloat16,\n", + " bnb_4bit_quant_type=\"nf4\"\n", + ")\n", + "\n", + "tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code=True)\n", + "tokenizer.pad_token = tokenizer.eos_token\n", + "tokenizer.padding_side = \"right\"\n", + "\n", + "base_model = AutoModelForCausalLM.from_pretrained(\n", + " BASE_MODEL,\n", + " quantization_config=quant_config,\n", + " device_map=\"auto\",\n", + ")\n", + "base_model.generation_config.pad_token_id = tokenizer.pad_token_id\n", + "\n", + "print(f\"Memory footprint: {base_model.get_memory_footprint() / 1e6:.1f} MB\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "SrCE2Le7RBRj" + }, + "source": [ + "## βš™οΈ Fine-tune our LLaMA 3 8B (4-bit quantized) model with QLoRA\n", + "- 1. Prepare the Data with a Data Collator\n", + "- 2. Define the QLoRA Configuration (LoraConfig)\n", + "- 3. Set the Training Parameters (SFTConfig)\n", + "- 4. Initialize the Fine-Tuning Trainer (SFTTrainer)\n", + "- 5. Run Fine-Tuning and Push to Hub" + ], + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "9BYO0If4uWys" + }, + "source": [ + "### πŸ”„ 1. Prepare the Data with a Data Collator\n", + "\n", + "We only want the model to learn the price, not the product description. Everything before \"Price is $\" is context, not training target. HuggingFace’s DataCollatorForCompletionOnlyLM handles this masking automatically:\n", + "\n", + "1. Tokenizes the response_template (\"Price is $\")\n", + "2. Finds its token position in each input\n", + "3. Masks all tokens before it (context)\n", + "4. Trains the model only on tokens after it (the price)\n", + "\n", + "\n", + "Example:\n", + "\n", + "Input: \"Product: Red T-shirt. Price is $12.99\"\n", + "\n", + "Masked: \"Product: Red T-shirt. Price is $\" β†’ masked (no loss)\n", + "\n", + "\"12.99\" β†’ not masked (model is trained to predict this)\n", + "\n", + "So the model learns to generate 12.99 given the context, but isn’t trained to repeat or memorize the description." + ], + "outputs": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "2omVEaPIVJZa" + }, + "outputs": [], + "source": [ + "response_template = \"Price is $\"\n", + "collator = DataCollatorForCompletionOnlyLM(response_template, tokenizer=tokenizer)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "4DaOeBhyy9eS" + }, + "source": [ + "### 🧠 2. Define the QLoRA Configuration (LoraConfig)" + ], + "outputs": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "0HKuVS_XR3cw" + }, + "outputs": [], + "source": [ + "LORA_R = 32\n", + "LORA_ALPHA = 64\n", + "TARGET_MODULES = [\"q_proj\", \"v_proj\", \"k_proj\", \"o_proj\"]\n", + "LORA_DROPOUT = 0.1\n", + "\n", + "lora_parameters = LoraConfig(\n", + " r=LORA_R,\n", + " lora_alpha=LORA_ALPHA,\n", + " target_modules=TARGET_MODULES,\n", + " lora_dropout=LORA_DROPOUT,\n", + " bias=\"none\",\n", + " task_type=\"CAUSAL_LM\", # Specifies we're doing causal language modeling\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "uLfFsfNQSBAm" + }, + "source": [ + "### βš™οΈ 3. Set the Training Parameters (SFTConfig)" + ], + "outputs": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "7PKXdhPXSJot" + }, + "outputs": [], + "source": [ + "# πŸ“¦ Training Setup:\n", + "EPOCHS = 1\n", + "BATCH_SIZE = 16 # A100 GPU can go up to 16\n", + "GRADIENT_ACCUMULATION_STEPS = 2\n", + "MAX_SEQUENCE_LENGTH = 182 # Max token length per input\n", + "\n", + "# βš™οΈ Optimization:\n", + "LEARNING_RATE = 1e-4\n", + "LR_SCHEDULER_TYPE = 'cosine'\n", + "WARMUP_RATIO = 0.03\n", + "OPTIMIZER = \"paged_adamw_32bit\"\n", + "\n", + "# πŸ’Ύ Checkpointing & Logging:\n", + "SAVE_STEPS = 200 # Checkpoint\n", + "STEPS = 20 # Log every 20 steps\n", + "save_total_limit = 10 # Keep latest 10 only\n", + "\n", + "\n", + "LOG_TO_WANDB = True\n", + "\n", + "HUB_MODEL_NAME = f\"{HF_USER}/{PROJECT_RUN_NAME}\"\n", + "\n", + "train_parameters = SFTConfig(\n", + " # Output & Run\n", + " output_dir=PROJECT_RUN_NAME,\n", + " run_name=RUN_NAME,\n", + " dataset_text_field=\"text\",\n", + " max_seq_length=MAX_SEQUENCE_LENGTH,\n", + "\n", + " # Training\n", + " num_train_epochs=EPOCHS,\n", + " per_device_train_batch_size=BATCH_SIZE,\n", + " gradient_accumulation_steps=GRADIENT_ACCUMULATION_STEPS,\n", + " max_steps=-1,\n", + " group_by_length=True,\n", + "\n", + " # Evaluation\n", + " eval_strategy=\"steps\",\n", + " eval_steps=STEPS,\n", + " per_device_eval_batch_size=1,\n", + "\n", + " # Optimization\n", + " learning_rate=LEARNING_RATE,\n", + " lr_scheduler_type=LR_SCHEDULER_TYPE,\n", + " warmup_ratio=WARMUP_RATIO,\n", + " optim=OPTIMIZER,\n", + " weight_decay=0.001,\n", + " max_grad_norm=0.3,\n", + "\n", + " # Precision\n", + " fp16=False,\n", + " bf16=True,\n", + "\n", + " # Logging & Saving\n", + " logging_steps=STEPS, # See loss after each {STEP} batches\n", + " save_strategy=\"steps\",\n", + " save_steps=SAVE_STEPS, # Model Checkpointed locally\n", + " save_total_limit=save_total_limit,\n", + " report_to=\"wandb\" if LOG_TO_WANDB else None,\n", + "\n", + " # Hub\n", + " push_to_hub=True,\n", + " hub_strategy=\"end\", # Only push once, at the end\n", + " load_best_model_at_end=True, # Loads the best eval_loss checkpoint\n", + " metric_for_best_model=\"eval_loss\", # Monitors eval_loss\n", + " greater_is_better=False, # Lower eval_loss = better model\n", + ")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "1q-a3LHDSoxQ" + }, + "source": [ + "### 🧩 4. Initialize the Fine-Tuning Trainer (SFTTrainer)\n", + "Combining everything" + ], + "outputs": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 290, + "referenced_widgets": [ + "6753caf741414a4c8fa309978253c8cd", + "aeade430d57b4338910ad0c3645fd06a", + "eb7081b71cc14aff9b99dba8f9368def", + "8eb16171df804d06a02351f74bb28dc4", + "9d60a205ebda49ca88220cc4eec716ca", + "d8ff973b90374423b4b5e17a1937111c", + "4bf3bf107f2c4e28a58387c96916e97f", + "d66cb8c1829c439095f4691fa32d7b6e", + "567c8321685045c5a873b3b1edecdc96", + "96ff596facb94acab611201b4adac13f", + "de65507ce09a4ef4ad8f28d46d335acc", + "e40fe92fe9094a58b53f0eeb97d3d629", + "592615cc81624de5a9934f5671d6c188", + "fadf75d91df54f49acef3f178ea53ce3", + "5ccca8ab6cb94a88bb27bd482f7948a9", + "d74dcc2ef9b8442d9ae99db2a79e0c48", + "580ebfa370d34426933e8c7389872e2b", + "1187f05dc99641e9a68d9cf49216c370", + "7deffbba68ba4f018374bd6bec62dd18", + "d24cdc40a6a34d6eb0efbfde17505d6f", + "31d44a308b4b4557934ec887e0b6a817", + "76112ce6fdc4496dba783451efa28cfd", + "15a85e4a77484c9392b2e5cb8767b336", + "4524d775b9034a1f890673a9c005d123", + "5ab6a6b427f84ec685ac52f6ff0d63b5", + "427ee9e90a844313989f623aba124498", + "6d2b7c059e6b42afa955fe01bf38011d", + "5d821ed8ffe14927be799c4d31043a82", + "12f9fab59e9849dcb7b3b17c5674580f", + "dd4a2876db37476fa438e8758c855393", + "f115f97428764c53ac780131fd75bd17", + "1a1e0e562a844ed098e97ce8a62695ee", + "0a7ae7cc902243a5996f730f0fe05cdb", + "07205ea24c3f4959bf9ebd393f5c921d", + "723bb8342ac84eedabd91e3eef178967", + "28714d0cf3d84a48975c8ad31e29691d", + "dd1d90d76d914839a1dad1cddab2c09f", + "e2d55edf98784523bcbeaad0cc2be494", + "d00ecfa9dc44428b989ec1a9deb27eae", + "ba2717985bc342e9827f8901ef655b00", + "6669dc8f20e3461f93c95cef7a90b201", + "29cb36c1943c4e1b9898534aaf32bd37", + "14a1449c13a14afda16bc7c05b7fd840", + "259d315eb4584c699b1c738d411eab7e", + "a4bb13eb7cee4f87b0e3e1a3a1be18e7", + "14d8a699a92044cda33802d96aaa41a2", + "d345350fd5ad4a028fbbc45cfc9f6db3", + "6953210353f840d59457fc54f4f8b829", + "d6cd9e1196f04ecbba83dc0b446b2c65", + "9e380ef863204da5863c9b6e7a2c8340", + "1d1bb803831d46309619f6a0c51c2eeb", + "6a50aaf7ad304a5aa3f29113121e8fe0", + "7a573a39c2b245f5a84626d951584f67", + "a57e66367d4245f6bcd4ad0463535583", + "d6f3327d39a34ec5a44d976f239a61ce", + "8f450df9f161409a8102c1f0b63edad8", + "95d932d12cb8442da17adb8e9782c40c", + "41c5f295b45f4828a9327b699b85ca01", + "9e4f3fd6bf7749f88ccd7ba65dd9446f", + "a8f8cb0d9fb14f30a537977f3d51a2c4", + "4e9e4ed0f2db4d7ba5a5bb0d00676a0c", + "1fe2bab9c9aa4de48e6e2512f9a7d0a1", + "d93ac5affccf404fa3916e7f3dd62943", + "92346fc65f48493d80198ac6d7adf4d8", + "647bfb2a24cc44a0adaf69ced8e99213", + "5c96424cff314aa484e4bc905bcbd761", + "cec2fcfb30194d5ab8c0a3868bad3598", + "35df7031c4964cef9c53bba6eabbe91d", + "e15c772e14264c9889e6dae34015e04b", + "e85b65cb497c48c2b844ae3e5d9efc60", + "52c8495d46ca4a3c8c6694a700d05e95", + "3db6d8a5ce2a40daaae6714807a27997", + "051d74df7ef1468aa968cac5792e7b00", + "75838a7c887545ff9fbbf5887a1336bc", + "59f698c1829148ac90edda008d5c6f69", + "35921436c69643aab792bd1333c749ef", + "2dd51cc6033746e1a8def460e5e51ff5", + "a8a3e5973ee5441087d10dfb17bfa1d6", + "64c3b3c02e844df6bfd3acf1ee23d765", + "83016eccdd7f4dedab9d3ea6e6852977", + "9d4c5a62214f4649b77365349ae4ac88", + "07cb9756d1814a7ba7fb49cccb2763cb", + "492454ad524742bd8bb3f5c3d5b37feb", + "e98053f6b7f045da812088d1e76d3a31", + "f2aeb3ae99cc4b7ca97fb959df1150ad", + "f92e18b6ab0147b1b428724f5155ca61", + "14356b2447e349ee8478478eb231fa81", + "f244a7e331d941f5a99712dcbc5550ea" + ] + }, + "id": "fCwmDmkSATvj", + "outputId": "2b4adc75-e0db-4e0b-c90b-9f9ff2dfd3c6" + }, + "outputs": [], + "source": [ + "# The latest version of trl is showing a warning about labels - please ignore this warning\n", + "fine_tuning = SFTTrainer(\n", + " model=base_model,\n", + " train_dataset=train_data,\n", + " eval_dataset=val_data,\n", + " peft_config=lora_parameters, # QLoRA config\n", + " args=train_parameters, # SFTConfig\n", + " data_collator=collator,\n", + " callbacks=[EarlyStoppingCallback(early_stopping_patience=5)] # Early stop if no val improvement for 5 steps\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "vHz6JA5_XJ07" + }, + "source": [ + "### πŸš€ 5. Run Fine-Tuning and Push to Hub" + ], + "outputs": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "id": "GfvAxnXPvB7w", + "outputId": "d351d89a-b3d7-4e2b-fee2-5ba2e929837e" + }, + "outputs": [], + "source": [ + "fine_tuning.train()\n", + "print(f\"βœ… Best model pushed to HF Hub: {HUB_MODEL_NAME}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![](https://github.com/lisek75/nlp_llms_notebook/blob/main/assets/09_train_eval_loss_steps.png?raw=true)\n", + "\n", + "![](https://github.com/lisek75/nlp_llms_notebook/blob/main/assets/09_train_eval_loss_wandb.png?raw=true)\n", + "\n", + "This chart shows training loss vs evaluation loss over steps during fine-tuning of Llama 31 8B 4-Bit FT (20K Samples).\n", + "\n", + "- Blue line (train/loss): Decreasing overall, with some noise. Final value: 1.8596.\n", + "- Orange line (eval/loss): Smoother and consistently lower than training loss. Final value: 1.8103.\n", + "\n", + "- No overfitting: Eval loss < train loss throughout β€” a good sign.\n", + "- Stable convergence: Both curves flatten around step 500, suggesting the model is reaching training stability.\n", + "- Final eval loss is low, indicating decent generalization to unseen data.\n", + "\n", + "This fine-tuning run looks healthy. We can likely push further with more data - 400K run." + ], + "outputs": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 938 + }, + "id": "32vvrYRVAUNg", + "outputId": "bb4ab0f6-c390-48f3-a71c-2d259bb0ec0b" + }, + "outputs": [], + "source": [ + "if LOG_TO_WANDB:\n", + " wandb.finish()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![](https://github.com/lisek75/nlp_llms_notebook/blob/main/assets/09_run_summary_qlora_llama.png?raw=true)" + ], + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "IyKZ0r38IfT3" + }, + "source": [ + "Now that our best model is pushed to Hugging Face, let’s put it to the test.\n", + "\n", + "πŸ”œ See you in the [next notebook](https://github.com/lisekarimi/lexo/blob/main/09_part7_eval_llama_qlora.ipynb)" + ], + "outputs": [] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "gpuType": "A100", + "provenance": [] + }, + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.7" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} \ No newline at end of file diff --git a/week7/community_contributions/lisekarimi/09_part7_eval_llama_qlora.ipynb b/week7/community_contributions/lisekarimi/09_part7_eval_llama_qlora.ipynb new file mode 100644 index 0000000..bfe78d1 --- /dev/null +++ b/week7/community_contributions/lisekarimi/09_part7_eval_llama_qlora.ipynb @@ -0,0 +1,739 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "GHsssBgWM_l0" + }, + "source": [ + "# πŸ” Predicting Item Prices from Descriptions (Part 7)\n", + "---\n", + "- Data Curation & Preprocessing\n", + "- Model Benchmarking – Traditional ML vs LLMs\n", + "- E5 Embeddings & RAG\n", + "- Fine-Tuning GPT-4o Mini\n", + "- Evaluating LLaMA 3.1 8B Quantized\n", + "- Fine-Tuning LLaMA 3.1 with QLoRA\n", + "- ➑️ Evaluating Fine-Tuned LLaMA\n", + "- Summary & Leaderboard\n", + "\n", + "---\n", + "\n", + "# πŸ§ͺ Part 7: Evaluating the Fine-Tuned LLaMA 3.1 8B (Quantized)\n", + "\n", + "- πŸ§‘β€πŸ’» Skill Level: Advanced\n", + "- βš™οΈ Hardware: ⚠️ GPU required - use Google Colab\n", + "- πŸ› οΈ Requirements: πŸ”‘ HF Token\n", + "- Tasks:\n", + " - Load the tokenizer and fine-tuned base model\n", + " - Load the PEFT adapter for the fine-tuned weights\n", + " - Run evaluation β€” the moment of truth!\n", + "\n", + "πŸ”” **Reminder:** \n", + "As mentioned in Part 6, I fine-tuned the model on only 20K samples. \n", + "In this notebook, we’ll evaluate both this model and the full 400K-sample version fine-tuned by our instructor.\n", + "\n", + "---\n", + "πŸ“’ Find more LLM notebooks on my [GitHub repository](https://github.com/lisekarimi/lexo)" + ], + "outputs": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "MDyR63OTNUJ6" + }, + "outputs": [], + "source": [ + "# Install required packages in Google Colab\n", + "%pip install -q datasets transformers torch peft bitsandbytes matplotlib" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "-yikV8pRBer9" + }, + "outputs": [], + "source": [ + "# imports\n", + "\n", + "import math\n", + "import torch\n", + "from huggingface_hub import login\n", + "import torch.nn.functional as F\n", + "from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, set_seed\n", + "from datasets import load_dataset\n", + "from peft import PeftModel\n", + "import matplotlib.pyplot as plt\n", + "from google.colab import userdata" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "WyFPZeMcM88v" + }, + "outputs": [], + "source": [ + "# Google Colab User Data\n", + "# Ensure you have set the following in your Google Colab environment:\n", + "hf_token = userdata.get('HF_TOKEN')\n", + "login(hf_token, add_to_git_credential=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "30lzJXBH7BcK" + }, + "outputs": [], + "source": [ + "# Helper class for evaluating model predictions\n", + "\n", + "GREEN = \"\\033[92m\"\n", + "YELLOW = \"\\033[93m\"\n", + "RED = \"\\033[91m\"\n", + "RESET = \"\\033[0m\"\n", + "COLOR_MAP = {\"red\":RED, \"orange\": YELLOW, \"green\": GREEN}\n", + "\n", + "class Tester:\n", + "\n", + " def __init__(self, predictor, data, title=None, size=250):\n", + " self.predictor = predictor\n", + " self.data = data\n", + " self.title = title or predictor.__name__.replace(\"_\", \" \").title()\n", + " self.size = size\n", + " self.guesses = []\n", + " self.truths = []\n", + " self.errors = []\n", + " self.sles = []\n", + " self.colors = []\n", + "\n", + " def color_for(self, error, truth):\n", + " if error<40 or error/truth < 0.2:\n", + " return \"green\"\n", + " elif error<80 or error/truth < 0.4:\n", + " return \"orange\"\n", + " else:\n", + " return \"red\"\n", + "\n", + " def run_datapoint(self, i):\n", + " datapoint = self.data[i]\n", + " guess = self.predictor(datapoint[\"text\"])\n", + " truth = datapoint[\"price\"]\n", + " error = abs(guess - truth)\n", + " log_error = math.log(truth+1) - math.log(guess+1)\n", + " sle = log_error ** 2\n", + " color = self.color_for(error, truth)\n", + " # title = datapoint[\"text\"].split(\"\\n\\n\")[1][:20] + \"...\"\n", + " self.guesses.append(guess)\n", + " self.truths.append(truth)\n", + " self.errors.append(error)\n", + " self.sles.append(sle)\n", + " self.colors.append(color)\n", + " # print(f\"{COLOR_MAP[color]}{i+1}: Guess: ${guess:,.2f} Truth: ${truth:,.2f} Error: ${error:,.2f} SLE: {sle:,.2f} Item: {title}{RESET}\")\n", + "\n", + " def chart(self, title):\n", + " # max_error = max(self.errors)\n", + " plt.figure(figsize=(12, 8))\n", + " max_val = max(max(self.truths), max(self.guesses))\n", + " plt.plot([0, max_val], [0, max_val], color='deepskyblue', lw=2, alpha=0.6)\n", + " plt.scatter(self.truths, self.guesses, s=3, c=self.colors)\n", + " plt.xlabel('Ground Truth')\n", + " plt.ylabel('Model Estimate')\n", + " plt.xlim(0, max_val)\n", + " plt.ylim(0, max_val)\n", + " plt.title(title)\n", + "\n", + " # Add color legend\n", + " from matplotlib.lines import Line2D\n", + " legend_elements = [\n", + " Line2D([0], [0], marker='o', color='w', label='Accurate (green)', markerfacecolor='green', markersize=8),\n", + " Line2D([0], [0], marker='o', color='w', label='Medium error (orange)', markerfacecolor='orange', markersize=8),\n", + " Line2D([0], [0], marker='o', color='w', label='High error (red)', markerfacecolor='red', markersize=8)\n", + " ]\n", + " plt.legend(handles=legend_elements, loc='upper right')\n", + "\n", + " plt.show()\n", + "\n", + "\n", + " def report(self):\n", + " average_error = sum(self.errors) / self.size\n", + " rmsle = math.sqrt(sum(self.sles) / self.size)\n", + " hits = sum(1 for color in self.colors if color==\"green\")\n", + " title = f\"{self.title} Error=${average_error:,.2f} RMSLE={rmsle:,.2f} Hits={hits/self.size*100:.1f}%\"\n", + " self.chart(title)\n", + "\n", + " def run(self):\n", + " self.error = 0\n", + " for i in range(self.size):\n", + " self.run_datapoint(i)\n", + " self.report()\n", + "\n", + " @classmethod\n", + " def test(cls, function, data):\n", + " cls(function, data).run()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# πŸ“₯ Load Dataset" + ], + "outputs": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# #If you face NotImplementedError: Loading a dataset cached in a LocalFileSystem is not supported run:\n", + "# %pip install -U datasets" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 177, + "referenced_widgets": [ + "61f42f612e98467684716cc7421c7554", + "a7e864c2ae21482e8bcdbc42a5a65309", + "63405c5e47da4652b052ee6099ead31e", + "0864a38b1c494308a07defced89f4fe3", + "8f089946a97d4becb3ff06b7a65595a2", + "42b865ac9e4f4ecaa475c4d69929e401", + "3478290afe1d48268c7c07206c212eda", + "f21c0db9205f4c40a2f9ea1ddd66b59e", + "4604f38122454bc1b1826311a326eb12", + "6e2b95e33cab4fe9b9f555195b634fac", + "b8f0f357a61c4502962f385291c3bac8", + "fa49b7e56b054faca67334e08bbf622c", + "243d84401ba24360a42c2636d7984772", + "bbcf01edcbcd425b9ca1e61e80f6df4f", + "17b41698c33044c7942e66e63c5c2d2d", + "14dfccde2f6a47679cea42ce965b6ef2", + "6a1570c8980b4d5ebac78348f79c4f1b", + "44f1922676f3417fb7baccd92bf53cea", + "176b023546bc4053a4d484205d7ab200", + "b02018254c4b4fb680e382974380c331", + "766aba35ebf54996990e075e4f692f96", + "24ceffd3b8c64e5f983e52d743ebef8d", + "5b9076b6c05a4454a7233302114b9d8c", + "4bfbd393271844de825a53c7d639fa60", + "3313091548bf414fabf84f5aa2c85d14", + "f98c7fe4ad6d4649a7a104f973992be0", + "fd1eb06d0aa64ba59ae9bb214f2c94ed", + "24237203b2c44709b20ca84b95387849", + "7910e6a4881a43638c4e91dd0f024092", + "f22dad57ee324ca8b927f9a3b8cc6edc", + "20a702b1ccbe499eabf70af974561417", + "48f72254ce6f408c94bf56a3919c032e", + "6bf00cd26256489fb209b8b51ca9fb0e", + "da3c453facaf41b6bc89d311d9f1ce74", + "78487c1a13e84e7bb35a72a07ad9b681", + "3866fe39fcc34120a0b4c4b36c8eaa6c", + "54de8e445909429f9d7ca9ad02e8f190", + "eeda8994cb8d46cc9d5c2212907ab869", + "b670675ee9bc4689a34f997d0da13b82", + "56727a21bb4648fe8ae46d3a61b39f4a", + "da89c856fbf746b496d37cbef92305b9", + "2f4ba348ef7246af8b1cd04352bcbd1d", + "0d86b4a93411494eb8e725440e393cff", + "203c4888674c46bba1033639ad4286a2", + "005dac04aacb4955ae079d36bfc4cd19", + "68ff796bdee44aa380324374ae38fd25", + "411691dce3f1457cb3ee9e8ad652d61d", + "f0fc209cb9e74d0ca3c0c9b14b1450e0", + "6e2155c3ad3243508dff34919eecd0a2", + "68891d88fe7e417abbd508d2089e7960", + "8e1ab77817bc4ec2835b195a0beb1096", + "c638e3a09f6b4caaa078e242b010744e", + "ee9abd78adb54984868ebee19f638e25", + "8280e432938b4e9794c95e47bb9c02fa", + "abdd2ff8028b432091434805f81c455c" + ] + }, + "id": "cvXVoJH8LS6u", + "outputId": "6308b124-a922-4e82-fb6a-5933d3c324e0" + }, + "outputs": [], + "source": [ + "DATASET_NAME = \"lisekarimi/pricer-data\"\n", + "dataset = load_dataset(DATASET_NAME)\n", + "train = dataset['train']\n", + "test = dataset['test']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "xb86e__Wc7j_", + "outputId": "8b699099-7414-4663-fab1-d069d3ec3d35" + }, + "outputs": [], + "source": [ + "test[0]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "qJWQ0a3wZ0Bw" + }, + "source": [ + "## πŸ“₯ Load Tokenizer and Model\n", + "The fine-tuned model (PeftModel) only holds the LoRA adapters, so it requires the base model to apply them correctly." + ], + "outputs": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 401, + "referenced_widgets": [ + "aee2cb6b13d64f1dab9f8190a274bbc0", + "547a2807263e4295af11da5a43ccf5b7", + "00b57ab6c0c44e39ad6fa27b7e5a085b", + "d51c826dc6d749b38ce7e5fdfc730086", + "f276602665c148999240ef916aa8a9c8", + "9d83d7056aed43a59d82955bdb8f272d", + "7a71aedc0f49430ba7c71040c5fa2529", + "108880a9a7bb4a73837889ad2a25fd77", + "0163275024a041e2bc9fba947c371269", + "555a494cbcda41e79ee4584a8122774b", + "5dfbe2af8afc43c691c34c52a47c9790", + "25edb5ab02c3402998b75cffc13d0a55", + "9a4f0361323540aa8428054a0d98ecb3", + "cf149d1eceae43a9808e142fbfe5d4ff", + "de86c9338690424fa0052e5b055cea88", + "2acb3368945a48aeaf9fbc6d22e9238d", + "4c4c4b1507814037bcea0519ec43ba26", + "6d37385e79904b7ca267ad165774f962", + "b14a5f0f71094aa98403edd429cb882e", + "31b28b6183c644f9b5601208a1f72499", + "d3cddc62e0fb4256bf4c74f6a59e686b", + "82cb2192839e451292b27a186daaa7c1", + "2e038c429eed4abdae8d27a7226d7298", + "364c4658aba64512a1f50cdad9cc12f8", + "fde7b1ab1e224fec8e9b761e703b53dd", + "ad5db9c88ce64f73992d2e274ca1206a", + "0e7ada829b22485ca7a628d2c464f3f1", + "ec4f7d2076db4f6a856ab0d5e8edffbd", + "3f00114026a4417db1b142e5bcb7a695", + "e4e9cf32b99848baa6a587fb235ce6b5", + "a109b5ee80574e40a14fa1e186f4f9f4", + "af569da703694c27aa9ca2ddce6c4923", + "886bb94abf2c437eb8505222c4336e85", + "f668156d681e47f39e553f127a44261d", + "9bb3d0deaac6439e9ad67c2bc0565ff4", + "762b36fde5ac4a2982152f3babfa3ed9", + "141911ee360d42ab8dd3b7fa3563bbf0", + "340eae69eeaf4e458e6d8134018f4ad4", + "3226e3a8c4564f6fbd6ffb3eeb7b45e7", + "6ed52680f866470da1e8d4a48b6e42fb", + "6d8a206edb824c5eb06c803e8cab14de", + "86fd4472a7a84940a54f24104689a74d", + "916c0e20af5e4b78a5e86532b0c9a3e8", + "62dd475c101e4859a48ee57a272f71bc", + "a8b7185a12c94adca0e63563d7df3ce4", + "47d57186838d466fb91b6666df85d1b4", + "9d37814d818c466c90892bf1f6e9a190", + "b5fdba30791649a792d192a131890a4e", + "789fe6f5489345c6a8b6a889d20e0ca0", + "5ce12a0983bb49f1a871598a6b9a0a13", + "d9eb89d218a44f21bb4447040e5c8925", + "b04aaa7931e74297a55bca3ebf4ded1d", + "837708f48ded4d78b7ad2e0dc6464e9c", + "32236e0d0b3e46e4b2c26b7ccb63c89e", + "499acde0cedf4ea1a90415f98660aaa5", + "840d3e7824944889ac2091b35f0c17c0", + "08f2fae4688b45729d8f5bf53837e56d", + "133bb5607eb0457888b1fe4e8d3fab3e", + "46bfe5feb9074050b556d804a544140d", + "4c3b0c2d04d24ec6abe8acbadb420712", + "eda1fcca6987495b87cf2206f93a0ecb", + "00b803cf92754db1bbea8ca909e5ccef", + "17e17b928555462abfbfa4caf7992427", + "35f90fa89e8842cdaa487b59da45b3e8", + "2887ef88074c4591b710688fa76329bf", + "0a0c5f00b3cc477e8b7e06550fc6f1cb", + "3b079fe81b7b44d796c531bec1754637", + "e82f8ac6e8eb4ed6a6743e10b8b99904", + "1f7de1e2970c4c8fbfe1ab400297e1a7", + "7ea0d8782a1f4cca9a64b95fe47e8a2e", + "689b49d52b8f4efb94f80d76a0fefab3", + "2005939305c442f7bed3b83ea16e13b1", + "1a6f2631e29444818fdbd9a0de265367", + "6bfc89e091a5448d94d2ea559ce43a21", + "bfc12d40caf4481280888506dfa01505", + "a1fb82d5761843a49a0993ff937cb40d", + "4c9c567918ee478a817b51e2a204d915", + "305623f276ba45e5a57727d1829158e1", + "b2722e271f78405b9151804ffc522530", + "963435e51a7a4ce98510c0372cd05030", + "d394cfc6af384a39b87c72ac6a3788d9", + "2c621a7a90ed4bfd8b52cea9c79e11c1", + "59ac0bb5c046448fbf16a27d2c3205f8", + "7617f5670879416d9dbc2dabda76ef4d", + "b32d6d6ff5dd4ac4adfb063205111707", + "38f3a7159fc34d89bc18e4225473615d", + "2a2c386e432f429f86c303d71472b480", + "ece25eb325004ae48ec5ec00055dd845", + "68e2b37bbd9a44f8a6032526acbf9ea6", + "3af191957e3f453ba803a1c01d6969ae", + "29dba394a6664e0f8984bcb966ccf19b", + "d84373a3f97245ae94bfb666c7e93a17", + "9f917250ccbf4078a90fda1eec71c6f4", + "8171dd4382d24f0a83484fbf967fec03", + "6f97606a500548e980c6481d756c72eb", + "6d1054047d4645a69c272484fd9e0c04", + "7fd14d942d2246bf8df28eca28e13fb2", + "0dabd208524f426bb5c643791e736413", + "368dea7bbf144cf0a667493cb23bddab", + "d6b14f8e43754283ad96543c4c1ffee6", + "f78562ef15524795bb9be326dcaab502", + "b01c8091b96444f687a49c5c51b5faf7", + "baab647e635a46ababa58993965a8159", + "25d9a9b78d554f8fbe92d7e805640c3b", + "95726f4b9bc34434b9d00fcdfe2ff87e", + "a7b835a668ef40c986a6fd51e464d1f4", + "188cac6192fc4b91be3ca5b01bab1d91", + "3537ef715f3447388625ee606555bb85", + "322ca0ccce644c48a2a0f4b44a38776d", + "cc3726d026594cb6ac2d6bafb16562ac", + "f48cc4a0a5d041cf9391a99353ff46af", + "05134ca3a9954341951ff958ff30fe0a", + "3a6aa623f1dd41b8940a41b509fa7500", + "fd58111bb44347b8bdcb984a0e86f9b7", + "c16cfb96177640a991c5509e652c85b9", + "adc0ffacba0846fabd76ed7955397077", + "e074da8f28d84ec891f22e30b86fb954", + "0b53df078f4a4a259b677ccccbdf46cd", + "954d5fa3b18a49589717cfc31fb58779", + "af0beb46b198458794c85803fe5af47f", + "c7322d41ae4c4068880521a136e923b4", + "391d834aa8734d7b9a97c03cab5e1e7d", + "5d779fc6bb1244449a68cf62dfd15698", + "197ca7f2357a4a2c89f5f3da3844c606", + "df4d22e6876b4c0082a7ace3281ff4e5", + "28d44cfae7de4b62be11020d9015f92c", + "3e8d7274ee3a4dfbbdd44ea0b2cd61b6", + "fa768ce193b94a4882a1e796e69cffea", + "c37a4882e4474f8690c4b479baf2d785", + "68a033bbcb4d4774bdb115e09d78365b", + "10b5e7970aa04bd6b3384aa645c48d92", + "f838b073dd254bb091a7db7175cd2ce8" + ] + }, + "id": "lAUAAcEC6ido", + "outputId": "b2983922-5036-4083-8cba-0cb3f51fbc51" + }, + "outputs": [], + "source": [ + "BASE_MODEL = \"meta-llama/Meta-Llama-3.1-8B\"\n", + "\n", + "quant_config = BitsAndBytesConfig(\n", + " load_in_4bit=True, # Reduce the precision to 4 bits\n", + " bnb_4bit_use_double_quant=True,\n", + " bnb_4bit_compute_dtype=torch.bfloat16,\n", + " bnb_4bit_quant_type=\"nf4\"\n", + ")\n", + "\n", + "# Load the Tokenizer and the Model\n", + "\n", + "tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code=True)\n", + "tokenizer.pad_token = tokenizer.eos_token\n", + "tokenizer.padding_side = \"right\"\n", + "\n", + "base_model = AutoModelForCausalLM.from_pretrained(\n", + " BASE_MODEL,\n", + " quantization_config=quant_config,\n", + " device_map=\"auto\",\n", + ")\n", + "base_model.generation_config.pad_token_id = tokenizer.pad_token_id\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "2RJ0G-WRJGMK" + }, + "source": [ + "## πŸ§ͺ Load and Evaluate the Fine-Tuned Model with PEFT Adapters" + ], + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 20K Sample Fine-Tuned Model" + ], + "outputs": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000, + "referenced_widgets": [ + "f0c0a20172294f77a0306801f8d76fb7", + "f68ee0810c2a4ac087ac6ece5279fb09", + "8aa12b380191454ebf55e8b42d0e0f2b", + "63f6cfa30a274ee3835671d8e39a85ef", + "0b980946a50d4248a4c63ef117fc2e8f", + "18283c6dee9447ddaca34ad267773e48", + "a7d10d9147df4adebf913e3023c2a3a4", + "5886ca455d4d4aefa617478f4f69a3ca", + "8c0e83bce4f74e7ba337fc9af5b977b8", + "00dbc32bdb0440c0bc3ba2cc6677b04c", + "243e6d8479ac4958a8d877e28f9b514a", + "10b7df1ecfab4e5cb146932fc4fb2c17", + "07c6fd1fe1ac442dbeb7037161841b78", + "88adf6ab3f3e476fa66ad22e9ff49aa8", + "fe522e9cee55448a9c13a5daaad5e7e7", + "4b1b9e5a67e54a3b90f2c113355e735a", + "5cdbdf93af9344ccabd7c3f236446541", + "c4af3ca6696d4fcd9b831d825456c7fa", + "525b1673c902412db32691056d49fd35", + "42de37b9a74143b4a851a178c484a706", + "f5f42d9201dc4fbaaa9c684fdb748d4a", + "10a0e99256a149a0a94ff652a4fd259a" + ] + }, + "id": "R_O04fKxMMT-", + "outputId": "06fc64f8-3407-460b-e093-0293e958915e" + }, + "outputs": [], + "source": [ + "# Load lisekarimi model (trained on 20K datapoints)\n", + "\n", + "FINETUNED_MODEL = \"lisekarimi/llama3-pricer-2025-04-08_18.44.04-size20000\"\n", + "fine_tuned_model = PeftModel.from_pretrained(base_model, FINETUNED_MODEL)\n", + "print(f\"Memory footprint: {fine_tuned_model.get_memory_footprint() / 1e6:.1f} MB\")\n", + "fine_tuned_model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Je5dR8QEAI1d" + }, + "outputs": [], + "source": [ + "# Gets top 3 predicted tokens from the model\n", + "# Filters valid numeric outputs (prices)\n", + "# Returns a weighted average based on token probabilities\n", + "\n", + "# This code would be more complex if we couldn't take advantage of the fact\n", + "# That Llama generates 1 token for any 3 digit number\n", + "\n", + "top_K = 3\n", + "\n", + "def improved_model_predict(prompt, device=\"cuda\"):\n", + " set_seed(42) # Reproducibility : same prompt = same o/p every time\n", + " inputs = tokenizer.encode(prompt, return_tensors=\"pt\").to(device)\n", + " attention_mask = torch.ones(inputs.shape, device=device)\n", + "\n", + " with torch.no_grad(): # Do not track gradients during inference\n", + " outputs = fine_tuned_model(inputs, attention_mask=attention_mask)\n", + " next_token_logits = outputs.logits[:, -1, :].to('cpu')\n", + "\n", + " next_token_probs = F.softmax(next_token_logits, dim=-1)\n", + " top_prob, top_token_id = next_token_probs.topk(top_K)\n", + "\n", + " prices, weights = [], [] # weights = corresponding probabilities\n", + "\n", + " for i in range(top_K):\n", + " predicted_token = tokenizer.decode(top_token_id[0][i])\n", + " probability = top_prob[0][i]\n", + "\n", + " try:\n", + " result = float(predicted_token)\n", + " except ValueError as e:\n", + " result = 0.0\n", + "\n", + " if result > 0:\n", + " prices.append(result)\n", + " weights.append(probability)\n", + "\n", + " if not prices:\n", + " return 0.0, 0.0\n", + "\n", + " total = sum(weights)\n", + "\n", + " weighted_prices = [price * weight / total for price, weight in zip(prices, weights)]\n", + "\n", + " return sum(weighted_prices).item()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "t_GHfTwHXD5f", + "outputId": "056b0fc2-5632-4be8-ee24-b6bcefe14ab9" + }, + "outputs": [], + "source": [ + "improved_model_predict(test[0][\"text\"], device=\"cuda\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 718 + }, + "id": "W_KcLvyt6kbb", + "outputId": "fba4200d-b911-467b-ab3c-17b78aa3b408" + }, + "outputs": [], + "source": [ + "Tester.test(improved_model_predict, test)" + ] + }, + { + "attachments": { + "0dcb25a7-83fa-4313-a94f-d3a56a0f07bc.png": { + "image/png": "" + } + }, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![image.png](attachment:0dcb25a7-83fa-4313-a94f-d3a56a0f07bc.png)" + ], + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 400K Sample Fine-Tuned Model" + ], + "outputs": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000, + "referenced_widgets": [ + "dd1b57e03f2641d3b702f2cc66942b8f", + "e1d477dccbfc44a8a6da301486180e82", + "c312a5111a284c3db88f22290869c023", + "ce118d8b8146497f9c7fdd3b38188e72", + "bc46c271637341bb82d6b87df22ab2af", + "602adf3242f54731938b68d3cf68465e", + "39fae5e74834421795729a259a046fb8", + "0618d8626e2e46cb9a17f86444de3c48", + "1cd43b5b2fe445088c84e19773ad861e", + "f70a29870ab34f34a1900b2df2bf177e", + "41a96c5e35a44b898b872c189f531d3a", + "0a524a73d5d6478db81256371bf2bc9b", + "275f6179dc624bceaa5d0639fe0b1b00", + "79c41b26746344bc9a220f2376360110", + "287a6430766c44e5a71dda1048fa2a2c", + "3bbe1a454a854747a96fe83e91d6cb3c", + "8a93759afe21414fb0d6684f0a591d60", + "a3d76b3ce67a495db861bac80cfc0864", + "8fc794262ed14fc785c8f06e734c57d4", + "7dc967baa0e7427bb66cf3e26849d508", + "2d7a6dbd15304347a37dbfb6e5ec7203", + "288393e05947444bad11034071015baf" + ] + }, + "id": "Kl6n_0sAbU0g", + "outputId": "2fb53efb-da22-4c29-a594-c2cf5a079388" + }, + "outputs": [], + "source": [ + "FINETUNED_MODEL = \"ed-donner/pricer-2024-09-13_13.04.39\"\n", + "REVISION = \"e8d637df551603dc86cd7a1598a8f44af4d7ae36\"\n", + "fine_tuned_model = PeftModel.from_pretrained(base_model, FINETUNED_MODEL, revision=REVISION)\n", + "print(f\"Memory footprint: {fine_tuned_model.get_memory_footprint() / 1e6:.1f} MB\")\n", + "fine_tuned_model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 718 + }, + "id": "R0YlorBhbeSE", + "outputId": "f42de9bf-d45a-4d2d-c218-fe000d716e54" + }, + "outputs": [], + "source": [ + "Tester.test(improved_model_predict, test)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "πŸŽ‰ And there it is β€” the open-source, quantized, and fine-tuned model outperforms the rest. πŸ™Œ \n", + "\n", + "πŸ“˜ We'll continue in [the next notebook](https://github.com/lisekarimi/lexo/blob/main/09_part8_summary.ipynb) with a final wrap-up and summary of key insights.\n" + ], + "outputs": [] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "gpuType": "T4", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.11" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} \ No newline at end of file diff --git a/week7/community_contributions/lisekarimi/09_part8_summary.ipynb b/week7/community_contributions/lisekarimi/09_part8_summary.ipynb new file mode 100644 index 0000000..f7983a4 --- /dev/null +++ b/week7/community_contributions/lisekarimi/09_part8_summary.ipynb @@ -0,0 +1,75 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "GHsssBgWM_l0" + }, + "source": [ + "# πŸ” Predicting Item Prices from Descriptions (Part 8)\n", + "---\n", + "- Data Curation & Preprocessing\n", + "- Model Benchmarking – Traditional ML vs LLMs\n", + "- E5 Embeddings & RAG\n", + "- Fine-Tuning GPT-4o Mini\n", + "- Evaluating LLaMA 3.1 8B Quantized\n", + "- Fine-Tuning LLaMA 3.1 with QLoRA\n", + "- Evaluating Fine-Tuned LLaMA\n", + "- ➑️ Summary & Leaderboard\n", + "\n", + "---\n", + "\n", + "# πŸ§ͺ Part 8: Summary & Leaderboard\n", + "\n", + "![](https://github.com/lisekarimi/lexo/blob/main/assets/09_ft_leaderboard.png?raw=true)\n", + "\n", + "# πŸ₯‡ The winner is the LLaMA 3.1 8B (4-bit) fine-tuned on 400K samples \n", + "\n", + "LLaMA 3.1 8B (4-bit) fine-tuned on 400K samples is outperforming even the big guy GPT-4o β€” with the lowest error and highest accuracy (75.6%).\n", + "\n", + "RAG + GPT-4o Mini also did well, proving that retrieval adds real value.\n", + "\n", + "On the other hand, traditional ML models and even human guesses, gave weaker results and fell behind the top models.\n", + "\n", + "πŸ’‘ As we’ve seen, a **well-tuned open-source small model** can do amazing things on a focused task β€” sometimes even better than large, closed models.\n", + "It’s not about size β€” it’s about fit, focus, and fine-tuning.\n", + "\n", + "# ✨ Conclusion\n", + "What a journey! From classic ML to state-of-the-art LLMs, from embeddings to retrieval and fine-tuning β€” we explored it all to answer: who predicts prices best?\n", + "\n", + "Thanks for following along β€” see you in the next challenge! πŸš€\n", + "\n", + "---\n", + "πŸ“’ Find more LLM notebooks on my [GitHub repository](https://github.com/lisekarimi/lexo)" + ], + "outputs": [] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "gpuType": "T4", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.11" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} \ No newline at end of file