Unit test harness started

2025-04-11 15:52:45 +03:00
parent cdddffefa5
commit 415aef3cbe
9 changed files with 391 additions and 0 deletions
--- a/week8/community_contributions/pricer_test/README.md
+++ b/week8/community_contributions/pricer_test/README.md
@@ -0,0 +1,47 @@
 # Run Continuous Integration (CI) Tests on Modal
 Note!
 The HF secret in Modal is named "huggingface-secret". Pls rename if your secret has another name.
 ## Test modal deployment
 You can test pricer.ci in Modal:
 (`modal deploy -m pricer.ci`)
 In python CLI:
 (`import modal`)
 (`Pricer = modal.Cls.lookup("pricer-ci-testing", "Pricer")`)
 (`pricer = Pricer()`)
 (`reply = pricer.price.remote("Quadcast HyperX condenser mic, connects via usb-c to your computer for crystal clear audio")`)
 (`print(reply)`)
 ## Unit testing
 Unit test strategy created like in 
 [This example repo](https://github.com/modal-labs/ci-on-modal)
 ## Usage
 All commands below are run from the root of the repository (this directory).
 ### Run tests remotely on Modal
 ```bash
 modal run pricer.ci
 ```
 On the first execution, the [container image](https://modal.com/docs/guide/custom-container)
 for your application will be built.
 This image will be cached on Modal and only rebuilt if one of its dependencies,
 like the `requirements.txt` file, changes.
 ### Debug tests running remotely
 To debug the tests, you can open a shell
 in the exact same environment that the tests are run in:
 ```bash
 modal shell pricer.ci
 ```
 _Note_: On the Modal worker, the `pytest` command is run from the home directory, `/root`,
 which contains the `tests` folder, but the `modal shell` command will
 drop you at the top of the filesystem, `/`.
--- a/week8/community_contributions/pricer_test/pricer/init.py
+++ b/week8/community_contributions/pricer_test/pricer/init.py
--- a/week8/community_contributions/pricer_test/pricer/ci.py
+++ b/week8/community_contributions/pricer_test/pricer/ci.py
@@ -0,0 +1,103 @@
 from pathlib import Path
 import modal
 ROOT_PATH = Path(__file__).parent.parent
 image = (
    modal.Image.debian_slim()
    .pip_install("pytest")
    .pip_install_from_requirements(ROOT_PATH / "requirements.txt")
 )
 app = modal.App("pricer-ci-testing", image=image)
 # mount: add local files to the remote container
 tests = modal.Mount.from_local_dir(ROOT_PATH / "tests", remote_path="/root/tests")
@app.function(gpu="any", mounts=[tests])
 def pytest():
    import subprocess
    subprocess.run(["pytest", "-vs"], check=True, cwd="/root")
 secrets = [modal.Secret.from_name("huggingface-secret")]
 # Constants
 GPU = "T4"
 BASE_MODEL = "meta-llama/Meta-Llama-3.1-8B"
 PROJECT_NAME = "pricer"
 HF_USER = "ed-donner" # your HF name here! Or use mine if you just want to reproduce my results.
 RUN_NAME = "2024-09-13_13.04.39"
 PROJECT_RUN_NAME = f"{PROJECT_NAME}-{RUN_NAME}"
 REVISION = "e8d637df551603dc86cd7a1598a8f44af4d7ae36"
 FINETUNED_MODEL = f"{HF_USER}/{PROJECT_RUN_NAME}"
 MODEL_DIR = "hf-cache/"
 BASE_DIR = MODEL_DIR + BASE_MODEL
 FINETUNED_DIR = MODEL_DIR + FINETUNED_MODEL
 QUESTION = "How much does this cost to the nearest dollar?"
 PREFIX = "Price is $"
@app.cls(image=image, secrets=secrets, gpu=GPU, timeout=1800)
 class Pricer:
    @modal.build()
    def download_model_to_folder(self):
        from huggingface_hub import snapshot_download
        import os
        os.makedirs(MODEL_DIR, exist_ok=True)
        snapshot_download(BASE_MODEL, local_dir=BASE_DIR)
        snapshot_download(FINETUNED_MODEL, revision=REVISION, local_dir=FINETUNED_DIR)
    @modal.enter()
    def setup(self):
        import os
        import torch
        from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, set_seed
        from peft import PeftModel
        # Quant Config
        quant_config = BitsAndBytesConfig(
            load_in_4bit=True,
            bnb_4bit_use_double_quant=True,
            bnb_4bit_compute_dtype=torch.bfloat16,
            bnb_4bit_quant_type="nf4"
        )
        # Load model and tokenizer
        self.tokenizer = AutoTokenizer.from_pretrained(BASE_DIR)
        self.tokenizer.pad_token = self.tokenizer.eos_token
        self.tokenizer.padding_side = "right"
        self.base_model = AutoModelForCausalLM.from_pretrained(
            BASE_DIR, 
            quantization_config=quant_config,
            device_map="auto"
        )
        self.fine_tuned_model = PeftModel.from_pretrained(self.base_model, FINETUNED_DIR, revision=REVISION)
    @modal.method()
    def price(self, description: str) -> float:
        import os
        import re
        import torch
        from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, set_seed
        from peft import PeftModel
        set_seed(42)
        prompt = f"{QUESTION}\n\n{description}\n\n{PREFIX}"
        inputs = self.tokenizer.encode(prompt, return_tensors="pt").to("cuda")
        attention_mask = torch.ones(inputs.shape, device="cuda")
        outputs = self.fine_tuned_model.generate(inputs, attention_mask=attention_mask, max_new_tokens=5, num_return_sequences=1)
        result = self.tokenizer.decode(outputs[0])
        contents = result.split("Price is $")[1]
        contents = contents.replace(',','')
        match = re.search(r"[-+]?\d*\.\d+|\d+", contents)
        return float(match.group()) if match else 0
    @modal.method()
    def wake_up(self) -> str:
        return "ok"
--- a/week8/community_contributions/pricer_test/pricer/items.py
+++ b/week8/community_contributions/pricer_test/pricer/items.py
@@ -0,0 +1,101 @@
 from typing import Optional
 from transformers import AutoTokenizer
 import re
 BASE_MODEL = "meta-llama/Meta-Llama-3.1-8B"
 MIN_TOKENS = 150
 MAX_TOKENS = 160
 MIN_CHARS = 300
 CEILING_CHARS = MAX_TOKENS * 7
 class Item:
    """
    An Item is a cleaned, curated datapoint of a Product with a Price
    """
    tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code=True)
    PREFIX = "Price is $"
    QUESTION = "How much does this cost to the nearest dollar?"
    REMOVALS = ['"Batteries Included?": "No"', '"Batteries Included?": "Yes"', '"Batteries Required?": "No"', '"Batteries Required?": "Yes"', "By Manufacturer", "Item", "Date First", "Package", ":", "Number of", "Best Sellers", "Number", "Product "]
    title: str
    price: float
    category: str
    token_count: int = 0
    details: Optional[str]
    prompt: Optional[str] = None
    include = False
    def __init__(self, data, price):
        self.title = data['title']
        self.price = price
        self.parse(data)
    def scrub_details(self):
        """
        Clean up the details string by removing common text that doesn't add value
        """
        details = self.details
        for remove in self.REMOVALS:
            details = details.replace(remove, "")
        return details
    def scrub(self, stuff):
        """
        Clean up the provided text by removing unnecessary characters and whitespace
        Also remove words that are 7+ chars and contain numbers, as these are likely irrelevant product numbers
        """
        stuff = re.sub(r'[:\[\]"{}【】\s]+', ' ', stuff).strip()
        stuff = stuff.replace(" ,", ",").replace(",,,",",").replace(",,",",")
        words = stuff.split(' ')
        select = [word for word in words if len(word)<7 or not any(char.isdigit() for char in word)]
        return " ".join(select)
    def parse(self, data):
        """
        Parse this datapoint and if it fits within the allowed Token range,
        then set include to True
        """
        contents = '\n'.join(data['description'])
        if contents:
            contents += '\n'
        features = '\n'.join(data['features'])
        if features:
            contents += features + '\n'
        self.details = data['details']
        if self.details:
            contents += self.scrub_details() + '\n'
        if len(contents) > MIN_CHARS:
            contents = contents[:CEILING_CHARS]
            text = f"{self.scrub(self.title)}\n{self.scrub(contents)}"
            tokens = self.tokenizer.encode(text, add_special_tokens=False)
            if len(tokens) > MIN_TOKENS:
                tokens = tokens[:MAX_TOKENS]
                text = self.tokenizer.decode(tokens)
                self.make_prompt(text)
                self.include = True
    def make_prompt(self, text):
        """
        Set the prompt instance variable to be a prompt appropriate for training
        """
        self.prompt = f"{self.QUESTION}\n\n{text}\n\n"
        self.prompt += f"{self.PREFIX}{str(round(self.price))}.00"
        self.token_count = len(self.tokenizer.encode(self.prompt, add_special_tokens=False))
    def test_prompt(self):
        """
        Return a prompt suitable for testing, with the actual price removed
        """
        return self.prompt.split(self.PREFIX)[0] + self.PREFIX
    def __repr__(self):
        """
        Return a String version of this Item
        """
        return f"<{self.title} = ${self.price}>"
--- a/week8/community_contributions/pricer_test/pricer/keep_warm.py
+++ b/week8/community_contributions/pricer_test/pricer/keep_warm.py
@@ -0,0 +1,10 @@
 import time
 import modal
 from datetime import datetime
 Pricer = modal.Cls.lookup("pricer-service", "Pricer")
 pricer = Pricer()
 while True:
    reply = pricer.wake_up.remote()
    print(f"{datetime.now()}: {reply}")
    time.sleep(30)
--- a/week8/community_contributions/pricer_test/pricer/llama.py
+++ b/week8/community_contributions/pricer_test/pricer/llama.py
@@ -0,0 +1,44 @@
 import modal
 from modal import App, Volume, Image
 # Setup
 app = modal.App("llama")
 image = Image.debian_slim().pip_install("torch", "transformers", "bitsandbytes", "accelerate")
 secrets = [modal.Secret.from_name("hf-secret")]
 GPU = "T4"
 MODEL_NAME = "meta-llama/Meta-Llama-3.1-8B" # "google/gemma-2-2b"
@app.function(image=image, secrets=secrets, gpu=GPU, timeout=1800)
 def generate(prompt: str) -> str:
    import os
    import torch
    from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, set_seed
    # Quant Config
    quant_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_use_double_quant=True,
        bnb_4bit_compute_dtype=torch.bfloat16,
        bnb_4bit_quant_type="nf4"
    )
    # Load model and tokenizer
    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
    tokenizer.pad_token = tokenizer.eos_token
    tokenizer.padding_side = "right"
    model = AutoModelForCausalLM.from_pretrained(
        MODEL_NAME, 
        quantization_config=quant_config,
        device_map="auto"
    )
    set_seed(42)
    inputs = tokenizer.encode(prompt, return_tensors="pt").to("cuda")
    attention_mask = torch.ones(inputs.shape, device="cuda")
    outputs = model.generate(inputs, attention_mask=attention_mask, max_new_tokens=5, num_return_sequences=1)
    return tokenizer.decode(outputs[0])
--- a/week8/community_contributions/pricer_test/pricer/testing.py
+++ b/week8/community_contributions/pricer_test/pricer/testing.py
@@ -0,0 +1,75 @@
 import math
 import matplotlib.pyplot as plt
 GREEN = "\033[92m"
 YELLOW = "\033[93m"
 RED = "\033[91m"
 RESET = "\033[0m"
 COLOR_MAP = {"red":RED, "orange": YELLOW, "green": GREEN}
 class Tester:
    def __init__(self, predictor, data, title=None, size=250):
        self.predictor = predictor
        self.data = data
        self.title = title or predictor.__name__.replace("_", " ").title()
        self.size = size
        self.guesses = []
        self.truths = []
        self.errors = []
        self.sles = []
        self.colors = []
    def color_for(self, error, truth):
        if error<40 or error/truth < 0.2:
            return "green"
        elif error<80 or error/truth < 0.4:
            return "orange"
        else:
            return "red"
    def run_datapoint(self, i):
        datapoint = self.data[i]
        guess = self.predictor(datapoint)
        truth = datapoint.price
        error = abs(guess - truth)
        log_error = math.log(truth+1) - math.log(guess+1)
        sle = log_error ** 2
        color = self.color_for(error, truth)
        title = datapoint.title if len(datapoint.title) <= 40 else datapoint.title[:40]+"..."
        self.guesses.append(guess)
        self.truths.append(truth)
        self.errors.append(error)
        self.sles.append(sle)
        self.colors.append(color)
        print(f"{COLOR_MAP[color]}{i+1}: Guess: ${guess:,.2f} Truth: ${truth:,.2f} Error: ${error:,.2f} SLE: {sle:,.2f} Item: {title}{RESET}")
    def chart(self, title):
        max_error = max(self.errors)
        plt.figure(figsize=(12, 8))
        max_val = max(max(self.truths), max(self.guesses))
        plt.plot([0, max_val], [0, max_val], color='deepskyblue', lw=2, alpha=0.6)
        plt.scatter(self.truths, self.guesses, s=3, c=self.colors)
        plt.xlabel('Ground Truth')
        plt.ylabel('Model Estimate')
        plt.xlim(0, max_val)
        plt.ylim(0, max_val)
        plt.title(title)
        plt.show()
    def report(self):
        average_error = sum(self.errors) / self.size
        rmsle = math.sqrt(sum(self.sles) / self.size)
        hits = sum(1 for color in self.colors if color=="green")
        title = f"{self.title} Error=${average_error:,.2f} RMSLE={rmsle:,.2f} Hits={hits/self.size*100:.1f}%"
        self.chart(title)
    def run(self):
        self.error = 0
        for i in range(self.size):
            self.run_datapoint(i)
        self.report()
    @classmethod
    def test(cls, function, data):
        cls(function, data).run()
--- a/week8/community_contributions/pricer_test/requirements.txt
+++ b/week8/community_contributions/pricer_test/requirements.txt
@@ -0,0 +1,6 @@
 huggingface
 torch
 transformers
 bitsandbytes
 accelerate
 peft
--- a/week8/community_contributions/pricer_test/tests/test_lib.py
+++ b/week8/community_contributions/pricer_test/tests/test_lib.py
@@ -0,0 +1,5 @@
 from my_pkg.lib import has_gpu
 def test_torch_cuda():
    assert has_gpu()