Merge branch 'main' of github.com:ed-donner/llm_engineering

2025-04-28 09:19:37 -04:00
parent fb636df434 6f8b573f59
commit c8f4c7c14e
44 changed files with 9449 additions and 49 deletions
--- a/week8/community_contributions/pricer_test/README.md
+++ b/week8/community_contributions/pricer_test/README.md
@@ -0,0 +1,41 @@
+# Run Continuous Integration (CI) Tests on Modal
+
+## Unit testing
+Unit test strategy created like in 
+[This example repo](https://github.com/modal-labs/ci-on-modal)
+
+## Usage
+
+All commands below are run from the root of the repository (this directory).
+_Note_: I removed modal-decorators from pricer.ci-module to be able to run unit tests.
+
+### Run tests remotely on Modal
+
+```bash
+modal run pricer.ci::pytest
+```
+
+On the first execution, the [container image](https://modal.com/docs/guide/custom-container)
+for your application will be built.
+
+This image will be cached on Modal and only rebuilt if one of its dependencies,
+like the `requirements.txt` file, changes.
+
+### Debug tests running remotely
+
+To debug the tests, you can open a shell
+in the exact same environment that the tests are run in:
+
+```bash
+modal shell pricer.ci::pytest
+```
+
+_Note_: On the Modal worker, the `pytest` command is run from the home directory, `/root`,
+which contains the `tests` folder, but the `modal shell` command will
+drop you at the top of the filesystem, `/`.
+
+To run test:
+```bash
+cd root
+pytest
+```
--- a/week8/community_contributions/pricer_test/pricer/init.py
+++ b/week8/community_contributions/pricer_test/pricer/init.py
--- a/week8/community_contributions/pricer_test/pricer/ci.py
+++ b/week8/community_contributions/pricer_test/pricer/ci.py
@@ -0,0 +1,100 @@
+from pathlib import Path
+
+import modal
+
+ROOT_PATH = Path(__file__).parent.parent
+
+image = (
+    modal.Image.debian_slim()
+    .pip_install("pytest")
+    .pip_install_from_requirements(ROOT_PATH / "requirements.txt")
+)
+
+app = modal.App("pricer-ci-testing", image=image)
+
+# mount: add local files to the remote container
+tests = modal.Mount.from_local_dir(ROOT_PATH / "tests", remote_path="/root/tests")
+
+@app.function(gpu="any", mounts=[tests])
+def pytest():
+    import subprocess
+    subprocess.run(["pytest", "-vs"], check=True, cwd="/root")
+
+secrets = [modal.Secret.from_name("huggingface-secret")]
+
+# Constants
+
+GPU = "T4"
+BASE_MODEL = "meta-llama/Meta-Llama-3.1-8B"
+PROJECT_NAME = "pricer"
+HF_USER = "ed-donner" # your HF name here! Or use mine if you just want to reproduce my results.
+RUN_NAME = "2024-09-13_13.04.39"
+PROJECT_RUN_NAME = f"{PROJECT_NAME}-{RUN_NAME}"
+REVISION = "e8d637df551603dc86cd7a1598a8f44af4d7ae36"
+FINETUNED_MODEL = f"{HF_USER}/{PROJECT_RUN_NAME}"
+MODEL_DIR = "hf-cache/"
+BASE_DIR = MODEL_DIR + BASE_MODEL
+FINETUNED_DIR = MODEL_DIR + FINETUNED_MODEL
+
+QUESTION = "How much does this cost to the nearest dollar?"
+PREFIX = "Price is $"
+
+
+class Pricer:
+    def download_model_to_folder(self):
+        from huggingface_hub import snapshot_download
+        import os
+        os.makedirs(MODEL_DIR, exist_ok=True)
+        print(f"Using this HF Token: {hf_token}")
+        snapshot_download(BASE_MODEL, local_dir=BASE_DIR, use_auth_token=hf_token)
+        snapshot_download(FINETUNED_MODEL, revision=REVISION, local_dir=FINETUNED_DIR, use_auth_token=hf_token)
+
+    def setup(self):
+        import os
+        import torch
+        from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, set_seed
+        from peft import PeftModel
+        
+        # Quant Config
+        quant_config = BitsAndBytesConfig(
+            load_in_4bit=True,
+            bnb_4bit_use_double_quant=True,
+            bnb_4bit_compute_dtype=torch.bfloat16,
+            bnb_4bit_quant_type="nf4"
+        )
+    
+        # Load model and tokenizer
+        
+        self.tokenizer = AutoTokenizer.from_pretrained(BASE_DIR)
+        self.tokenizer.pad_token = self.tokenizer.eos_token
+        self.tokenizer.padding_side = "right"
+        
+        self.base_model = AutoModelForCausalLM.from_pretrained(
+            BASE_DIR, 
+            quantization_config=quant_config,
+            device_map="auto"
+        )
+    
+        self.fine_tuned_model = PeftModel.from_pretrained(self.base_model, FINETUNED_DIR, revision=REVISION)
+
+    def price(self, description: str) -> float:
+        import os
+        import re
+        import torch
+        from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, set_seed
+        from peft import PeftModel
+    
+        set_seed(42)
+        prompt = f"{QUESTION}\n\n{description}\n\n{PREFIX}"
+        inputs = self.tokenizer.encode(prompt, return_tensors="pt").to("cuda")
+        attention_mask = torch.ones(inputs.shape, device="cuda")
+        outputs = self.fine_tuned_model.generate(inputs, attention_mask=attention_mask, max_new_tokens=5, num_return_sequences=1)
+        result = self.tokenizer.decode(outputs[0])
+    
+        contents = result.split("Price is $")[1]
+        contents = contents.replace(',','')
+        match = re.search(r"[-+]?\d*\.\d+|\d+", contents)
+        return float(match.group()) if match else 0
+
+    def wake_up(self) -> str:
+        return "ok"
--- a/week8/community_contributions/pricer_test/pricer/items.py
+++ b/week8/community_contributions/pricer_test/pricer/items.py
@@ -0,0 +1,101 @@
+from typing import Optional
+from transformers import AutoTokenizer
+import re
+
+BASE_MODEL = "meta-llama/Meta-Llama-3.1-8B"
+MIN_TOKENS = 150
+MAX_TOKENS = 160
+MIN_CHARS = 300
+CEILING_CHARS = MAX_TOKENS * 7
+
+class Item:
+    """
+    An Item is a cleaned, curated datapoint of a Product with a Price
+    """
+    
+    tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code=True)
+    PREFIX = "Price is $"
+    QUESTION = "How much does this cost to the nearest dollar?"
+    REMOVALS = ['"Batteries Included?": "No"', '"Batteries Included?": "Yes"', '"Batteries Required?": "No"', '"Batteries Required?": "Yes"', "By Manufacturer", "Item", "Date First", "Package", ":", "Number of", "Best Sellers", "Number", "Product "]
+
+    title: str
+    price: float
+    category: str
+    token_count: int = 0
+    details: Optional[str]
+    prompt: Optional[str] = None
+    include = False
+
+    def __init__(self, data, price):
+        self.title = data['title']
+        self.price = price
+        self.parse(data)
+
+    def scrub_details(self):
+        """
+        Clean up the details string by removing common text that doesn't add value
+        """
+        details = self.details
+        for remove in self.REMOVALS:
+            details = details.replace(remove, "")
+        return details
+
+    def scrub(self, stuff):
+        """
+        Clean up the provided text by removing unnecessary characters and whitespace
+        Also remove words that are 7+ chars and contain numbers, as these are likely irrelevant product numbers
+        """
+        stuff = re.sub(r'[:\[\]"{}【】\s]+', ' ', stuff).strip()
+        stuff = stuff.replace(" ,", ",").replace(",,,",",").replace(",,",",")
+        words = stuff.split(' ')
+        select = [word for word in words if len(word)<7 or not any(char.isdigit() for char in word)]
+        return " ".join(select)
+    
+    def parse(self, data):
+        """
+        Parse this datapoint and if it fits within the allowed Token range,
+        then set include to True
+        """
+        contents = '\n'.join(data['description'])
+        if contents:
+            contents += '\n'
+        features = '\n'.join(data['features'])
+        if features:
+            contents += features + '\n'
+        self.details = data['details']
+        if self.details:
+            contents += self.scrub_details() + '\n'
+        if len(contents) > MIN_CHARS:
+            contents = contents[:CEILING_CHARS]
+            text = f"{self.scrub(self.title)}\n{self.scrub(contents)}"
+            tokens = self.tokenizer.encode(text, add_special_tokens=False)
+            if len(tokens) > MIN_TOKENS:
+                tokens = tokens[:MAX_TOKENS]
+                text = self.tokenizer.decode(tokens)
+                self.make_prompt(text)
+                self.include = True
+
+    def make_prompt(self, text):
+        """
+        Set the prompt instance variable to be a prompt appropriate for training
+        """
+        self.prompt = f"{self.QUESTION}\n\n{text}\n\n"
+        self.prompt += f"{self.PREFIX}{str(round(self.price))}.00"
+        self.token_count = len(self.tokenizer.encode(self.prompt, add_special_tokens=False))
+
+    def test_prompt(self):
+        """
+        Return a prompt suitable for testing, with the actual price removed
+        """
+        return self.prompt.split(self.PREFIX)[0] + self.PREFIX
+
+    def __repr__(self):
+        """
+        Return a String version of this Item
+        """
+        return f"<{self.title} = ${self.price}>"
+
+        
+
+    
+    
--- a/week8/community_contributions/pricer_test/pricer/keep_warm.py
+++ b/week8/community_contributions/pricer_test/pricer/keep_warm.py
@@ -0,0 +1,10 @@
+import time
+import modal
+from datetime import datetime
+
+Pricer = modal.Cls.lookup("pricer-service", "Pricer")
+pricer = Pricer()
+while True:
+    reply = pricer.wake_up.remote()
+    print(f"{datetime.now()}: {reply}")
+    time.sleep(30)
--- a/week8/community_contributions/pricer_test/pricer/llama.py
+++ b/week8/community_contributions/pricer_test/pricer/llama.py
@@ -0,0 +1,44 @@
+import modal
+from modal import App, Volume, Image
+
+# Setup
+
+app = modal.App("llama")
+image = Image.debian_slim().pip_install("torch", "transformers", "bitsandbytes", "accelerate")
+secrets = [modal.Secret.from_name("hf-secret")]
+GPU = "T4"
+MODEL_NAME = "meta-llama/Meta-Llama-3.1-8B" # "google/gemma-2-2b"
+
+
+
+@app.function(image=image, secrets=secrets, gpu=GPU, timeout=1800)
+def generate(prompt: str) -> str:
+    import os
+    import torch
+    from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, set_seed
+
+    # Quant Config
+    quant_config = BitsAndBytesConfig(
+        load_in_4bit=True,
+        bnb_4bit_use_double_quant=True,
+        bnb_4bit_compute_dtype=torch.bfloat16,
+        bnb_4bit_quant_type="nf4"
+    )
+
+    # Load model and tokenizer
+    
+    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
+    tokenizer.pad_token = tokenizer.eos_token
+    tokenizer.padding_side = "right"
+    
+    model = AutoModelForCausalLM.from_pretrained(
+        MODEL_NAME, 
+        quantization_config=quant_config,
+        device_map="auto"
+    )
+
+    set_seed(42)
+    inputs = tokenizer.encode(prompt, return_tensors="pt").to("cuda")
+    attention_mask = torch.ones(inputs.shape, device="cuda")
+    outputs = model.generate(inputs, attention_mask=attention_mask, max_new_tokens=5, num_return_sequences=1)
+    return tokenizer.decode(outputs[0])
--- a/week8/community_contributions/pricer_test/pricer/testing.py
+++ b/week8/community_contributions/pricer_test/pricer/testing.py
@@ -0,0 +1,75 @@
+import math
+import matplotlib.pyplot as plt
+
+GREEN = "\033[92m"
+YELLOW = "\033[93m"
+RED = "\033[91m"
+RESET = "\033[0m"
+COLOR_MAP = {"red":RED, "orange": YELLOW, "green": GREEN}
+
+class Tester:
+
+    def __init__(self, predictor, data, title=None, size=250):
+        self.predictor = predictor
+        self.data = data
+        self.title = title or predictor.__name__.replace("_", " ").title()
+        self.size = size
+        self.guesses = []
+        self.truths = []
+        self.errors = []
+        self.sles = []
+        self.colors = []
+
+    def color_for(self, error, truth):
+        if error<40 or error/truth < 0.2:
+            return "green"
+        elif error<80 or error/truth < 0.4:
+            return "orange"
+        else:
+            return "red"
+    
+    def run_datapoint(self, i):
+        datapoint = self.data[i]
+        guess = self.predictor(datapoint)
+        truth = datapoint.price
+        error = abs(guess - truth)
+        log_error = math.log(truth+1) - math.log(guess+1)
+        sle = log_error ** 2
+        color = self.color_for(error, truth)
+        title = datapoint.title if len(datapoint.title) <= 40 else datapoint.title[:40]+"..."
+        self.guesses.append(guess)
+        self.truths.append(truth)
+        self.errors.append(error)
+        self.sles.append(sle)
+        self.colors.append(color)
+        print(f"{COLOR_MAP[color]}{i+1}: Guess: ${guess:,.2f} Truth: ${truth:,.2f} Error: ${error:,.2f} SLE: {sle:,.2f} Item: {title}{RESET}")
+
+    def chart(self, title):
+        max_error = max(self.errors)
+        plt.figure(figsize=(12, 8))
+        max_val = max(max(self.truths), max(self.guesses))
+        plt.plot([0, max_val], [0, max_val], color='deepskyblue', lw=2, alpha=0.6)
+        plt.scatter(self.truths, self.guesses, s=3, c=self.colors)
+        plt.xlabel('Ground Truth')
+        plt.ylabel('Model Estimate')
+        plt.xlim(0, max_val)
+        plt.ylim(0, max_val)
+        plt.title(title)
+        plt.show()
+
+    def report(self):
+        average_error = sum(self.errors) / self.size
+        rmsle = math.sqrt(sum(self.sles) / self.size)
+        hits = sum(1 for color in self.colors if color=="green")
+        title = f"{self.title} Error=${average_error:,.2f} RMSLE={rmsle:,.2f} Hits={hits/self.size*100:.1f}%"
+        self.chart(title)
+
+    def run(self):
+        self.error = 0
+        for i in range(self.size):
+            self.run_datapoint(i)
+        self.report()
+
+    @classmethod
+    def test(cls, function, data):
+        cls(function, data).run()
--- a/week8/community_contributions/pricer_test/requirements.txt
+++ b/week8/community_contributions/pricer_test/requirements.txt
@@ -0,0 +1,6 @@
+huggingface
+torch
+transformers
+bitsandbytes
+accelerate
+peft
--- a/week8/community_contributions/pricer_test/tests/test_pricer.py
+++ b/week8/community_contributions/pricer_test/tests/test_pricer.py
@@ -0,0 +1,84 @@
+import pdb
+from pricer.ci import Pricer
+from unittest.mock import patch, MagicMock
+import torch
+import pytest
+from transformers import BitsAndBytesConfig
+
+BASE_MODEL = "meta-llama/Meta-Llama-3.1-8B"
+PROJECT_NAME = "pricer"
+HF_USER = "ed-donner" # your HF name here! Or use mine if you just want to reproduce my results.
+RUN_NAME = "2024-09-13_13.04.39"
+PROJECT_RUN_NAME = f"{PROJECT_NAME}-{RUN_NAME}"
+REVISION = "e8d637df551603dc86cd7a1598a8f44af4d7ae36"
+FINETUNED_MODEL = f"{HF_USER}/{PROJECT_RUN_NAME}"
+MODEL_DIR = "hf-cache/"
+BASE_DIR = MODEL_DIR + BASE_MODEL
+FINETUNED_DIR = MODEL_DIR + FINETUNED_MODEL
+
+@pytest.fixture
+def pricer():
+    return Pricer()
+
+def test_wake_up():
+    pricer = Pricer()
+    assert pricer.wake_up() == "ok"
+
+
+@patch('transformers.AutoTokenizer')
+@patch('peft.PeftModel')
+@patch('transformers.AutoModelForCausalLM')
+def test_setup(MockAutoModel, MockPeftModel, MockAutoTokenizer, pricer):
+    # Setup mocks
+    mock_tokenizer = MockAutoTokenizer.from_pretrained.return_value
+    mock_model = MockAutoModel.from_pretrained.return_value
+    mock_peft_model = MockPeftModel.from_pretrained.return_value
+    
+    # Call the setup method
+    pricer.setup()
+    
+    # Assertions to ensure the setup method works correctly
+    MockAutoTokenizer.from_pretrained.assert_called_once_with(BASE_DIR)
+    assert pricer.tokenizer == mock_tokenizer
+    assert pricer.tokenizer.pad_token == pricer.tokenizer.eos_token
+    assert pricer.tokenizer.padding_side == "right"
+    
+    quant_config = BitsAndBytesConfig(
+            load_in_4bit=True,
+            bnb_4bit_use_double_quant=True,
+            bnb_4bit_compute_dtype=torch.bfloat16,
+            bnb_4bit_quant_type="nf4"
+        )
+    
+    MockAutoModel.from_pretrained.assert_called_once_with(
+        BASE_DIR, 
+        quantization_config=quant_config, 
+        device_map="auto"
+        )
+    assert pricer.base_model == mock_model
+    
+    MockPeftModel.from_pretrained.assert_called_once_with(mock_model, FINETUNED_DIR, revision=REVISION)
+    assert pricer.fine_tuned_model == mock_peft_model
+
+
+@patch('transformers.AutoTokenizer')
+@patch('peft.PeftModel')
+def test_price(MockPeftModel, MockAutoTokenizer, pricer):
+ # Setup mocks
+    mock_tokenizer = MockAutoTokenizer.return_value
+    mock_tokenizer.encode.return_value = torch.tensor([[1, 2, 3]])
+    mock_tokenizer.decode.return_value = "Price is $123.45"
+    
+    mock_model = MockPeftModel.return_value
+    mock_model.generate.return_value = torch.tensor([[1, 2, 3, 4, 5]])
+    
+    # Assign mocks to the pricer instance
+    pricer.tokenizer = mock_tokenizer
+    pricer.fine_tuned_model = mock_model
+    
+    # Call the method
+    description = "Test description"
+    result = pricer.price(description)
+    
+    # Assert the result
+    assert result == 123.45