Fixed modal issue

This commit is contained in:
Edward Donner
2025-04-30 15:33:27 -04:00
parent 0360cc4d75
commit 17f797c024
2 changed files with 35 additions and 41 deletions

View File

@@ -322,15 +322,14 @@
"\n",
"## A way to improve the speed of the Modal pricer service\n",
"\n",
"A student mentioned to me that he was concerned by how slow Modal seems to be. The reason is that Modal puts our service to sleep if we don't use it, and then it takes 2.5 minutes to spin back up.\n",
"The first time you run this modal class, it might take as much as 10 minutes to build. \n",
"Subsequently it should be much faster.. 30 seconds if it needs to wake up, otherwise 2 seconds. \n",
"If you want it to always be 2 seconds, you can keep the container from going to sleep by editing this constant in pricer_service2.py:\n",
"\n",
"I've added a utility called `keep_warm.py` that will keep our Modal warm by pinging it every 30 seconds.\n",
"`MIN_CONTAINERS = 0`\n",
"\n",
"To use the utliity, bring up a new Terminal (Mac) or Anaconda prompt (Windows), ensure the environment is activated with `conda activate llms`\n",
"\n",
"Then run: `python keep_warm.py` from within the week8 drectory.\n",
"\n",
"Remember to press ctrl+C or exit the window when you no longer need Modal running.\n"
"Make it 1 to keep a container alive. \n",
"But please note: this will eat up credits! Only do this if you are comfortable to have a process running continually.\n"
]
},
{

View File

@@ -1,22 +1,13 @@
import modal
from pathlib import PurePosixPath
from modal import App, Volume, Image
# Setup - define our infrastructure with code!
app = modal.App("pricer-service")
secrets = [modal.Secret.from_name("huggingface-secret")]
image = modal.Image.debian_slim().pip_install(
"huggingface", "torch", "transformers", "bitsandbytes",
"accelerate", "peft", "huggingface_hub[hf_transfer]"
).env({"HF_HUB_ENABLE_HF_TRANSFER": "1"})
# This is where we cache model files to avoid redownloading each time a container is started
hf_cache_vol = modal.Volume.from_name("hf-cache", create_if_missing=True)
image = Image.debian_slim().pip_install("huggingface", "torch", "transformers", "bitsandbytes", "accelerate", "peft")
secrets = [modal.Secret.from_name("hf-secret")]
# Constants
GPU = "T4"
# Keep N containers active to avoid cold starts
MIN_CONTAINERS = 0
BASE_MODEL = "meta-llama/Meta-Llama-3.1-8B"
PROJECT_NAME = "pricer"
HF_USER = "ed-donner" # your HF name here! Or use mine if you just want to reproduce my results.
@@ -24,28 +15,28 @@ RUN_NAME = "2024-09-13_13.04.39"
PROJECT_RUN_NAME = f"{PROJECT_NAME}-{RUN_NAME}"
REVISION = "e8d637df551603dc86cd7a1598a8f44af4d7ae36"
FINETUNED_MODEL = f"{HF_USER}/{PROJECT_RUN_NAME}"
# Mount for cache location
MODEL_DIR = PurePosixPath("/models")
BASE_DIR = MODEL_DIR / BASE_MODEL
FINETUNED_DIR = MODEL_DIR / FINETUNED_MODEL
CACHE_DIR = "/cache"
QUESTION = "How much does this cost to the nearest dollar?"
PREFIX = "Price is $"
@app.cls(image=image, secrets=secrets, gpu=GPU, timeout=1800, min_containers=MIN_CONTAINERS, volumes={MODEL_DIR: hf_cache_vol})
hf_cache_volume = Volume.from_name("hf-hub-cache", create_if_missing=True)
@app.cls(
image=image.env({"HF_HUB_CACHE": CACHE_DIR}),
secrets=secrets,
gpu=GPU,
timeout=1800,
volumes={CACHE_DIR: hf_cache_volume}
)
class Pricer:
@modal.enter()
def setup(self):
import torch
from huggingface_hub import snapshot_download
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, set_seed
from peft import PeftModel
# Download and cache model files to the volume
snapshot_download(BASE_MODEL, local_dir=BASE_DIR)
snapshot_download(FINETUNED_MODEL, revision=REVISION, local_dir=FINETUNED_DIR)
# Quant Config
quant_config = BitsAndBytesConfig(
load_in_4bit=True,
@@ -55,22 +46,23 @@ class Pricer:
)
# Load model and tokenizer
self.tokenizer = AutoTokenizer.from_pretrained(BASE_DIR)
self.tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
self.tokenizer.pad_token = self.tokenizer.eos_token
self.tokenizer.padding_side = "right"
self.base_model = AutoModelForCausalLM.from_pretrained(
BASE_DIR,
BASE_MODEL,
quantization_config=quant_config,
device_map="auto"
)
self.fine_tuned_model = PeftModel.from_pretrained(self.base_model, FINETUNED_DIR, revision=REVISION)
self.fine_tuned_model = PeftModel.from_pretrained(self.base_model, FINETUNED_MODEL, revision=REVISION)
@modal.method()
def price(self, description: str) -> float:
import re, torch
from transformers import set_seed
import os
import re
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, set_seed
from peft import PeftModel
set_seed(42)
prompt = f"{QUESTION}\n\n{description}\n\n{PREFIX}"
@@ -84,3 +76,6 @@ class Pricer:
match = re.search(r"[-+]?\d*\.\d+|\d+", contents)
return float(match.group()) if match else 0
@modal.method()
def wake_up(self) -> str:
return "ok"