diff --git a/week1/community-contributions/day2_grocery_list_generator_with_recipe_scaler.ipynb b/week1/community-contributions/day2_grocery_list_generator_with_recipe_scaler.ipynb new file mode 100644 index 0000000..8b2e731 --- /dev/null +++ b/week1/community-contributions/day2_grocery_list_generator_with_recipe_scaler.ipynb @@ -0,0 +1,266 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "0", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# imports\n", + "\n", + "import os\n", + "from dotenv import load_dotenv\n", + "from openai import OpenAI\n", + "from IPython.display import Markdown, display" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# Load environment variables in a file called .env\n", + "\n", + "load_dotenv()\n", + "api_key = os.getenv('OPENAI_API_KEY')\n", + "\n", + "# Check the key\n", + "\n", + "if not api_key:\n", + " print(\"No API key was found - please head over to the troubleshooting notebook in this folder to identify & fix!\")\n", + "elif not api_key.startswith(\"sk-proj-\"):\n", + " print(\"An API key was found, but it doesn't start sk-proj-; please check you're using the right key - see troubleshooting notebook\")\n", + "elif api_key.strip() != api_key:\n", + " print(\"An API key was found, but it looks like it might have space or tab characters at the start or end - please remove them - see troubleshooting notebook\")\n", + "else:\n", + " print(\"API key found and looks good so far!\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "openai = OpenAI()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3", + "metadata": {}, + "outputs": [], + "source": [ + "# Let's just make sure the model is loaded\n", + "!ollama pull llama3.2\n", + "import ollama\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# System prompt - defines the AI's behavior\n", + "SYSTEM_PROMPT = \"\"\"You are a helpful cooking assistant that provides ingredient lists for recipes.\n", + "Format your response as clean markdown with this structure:\n", + "\n", + "# [Dish Name]\n", + "**Serves:** [number] people \n", + "**Cook Time:** [estimated time]\n", + "\n", + "## Shopping List\n", + "- [ ] [amount] [unit] [ingredient]\n", + "- [ ] [amount] [unit] [ingredient]\n", + "\n", + "Guidelines:\n", + "- Use common grocery store measurements (cups, lbs, oz, pieces, cans, etc.)\n", + "- Round to practical shopping amounts (1.5 lbs instead of 1.47 lbs)\n", + "- Group similar items when logical (all spices together)\n", + "- Include pantry staples only if they're essential (salt, oil, etc.)\n", + "- Assume basic seasonings are available unless recipe-specific\n", + "- For produce, specify size when important (large onion, medium tomatoes)\n", + "- Keep optional items at the end of similar item groups or end of the list\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "def get_recipe_openai(dish_name: str, num_people: int):\n", + " \"\"\"Get scaled recipe ingredients using system and user prompts\"\"\"\n", + "\n", + " user_prompt = f\"Give me the ingredients needed to make {dish_name} for {num_people} people.\"\n", + " \n", + " try:\n", + " response = openai.chat.completions.create(\n", + " model=\"gpt-4o-mini\",\n", + " messages=[\n", + " {\"role\": \"system\", \"content\": SYSTEM_PROMPT},\n", + " {\"role\": \"user\", \"content\": user_prompt}\n", + " ],\n", + " max_tokens=400\n", + " )\n", + " \n", + " return response.choices[0].message.content\n", + " \n", + " except Exception as e:\n", + " return f\"❌ Error: Failed to get recipe - {str(e)}\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6", + "metadata": {}, + "outputs": [], + "source": [ + "OLLAMA_MODEL = \"llama3.2\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7", + "metadata": {}, + "outputs": [], + "source": [ + "def get_recipe_ollama(dish_name: str, num_people: int):\n", + " \"\"\"Get recipe using Ollama API\"\"\"\n", + " user_prompt = f\"Give me the ingredients needed to make {dish_name} for {num_people} people.\"\n", + " \n", + " messages = [\n", + " {\"role\": \"system\", \"content\": SYSTEM_PROMPT},\n", + " {\"role\": \"user\", \"content\": user_prompt}\n", + " ]\n", + " \n", + " try:\n", + " response = ollama.chat(model=OLLAMA_MODEL, messages=messages)\n", + " return response['message']['content']\n", + " except Exception as e:\n", + " return f\"❌ Ollama Error: {str(e)}\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "def print_shopping_list(recipe_markdown):\n", + " \"\"\"Print the markdown response\"\"\"\n", + " display(Markdown(recipe_markdown))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "print(\"🍳 Recipe Scaler & Grocery List Maker\")\n", + "print(\"=\" * 40)\n", + " \n", + "ai_service_choice = input(\"\\nChoose AI service (1 for OpenAI, 2 for Ollama): \").strip()\n", + "\n", + "dish = input(\"What dish do you want to make? \")\n", + "num_people = int(input(\"How many people? \"))\n", + " \n", + "print(f\"\\n🔍 Getting recipe for {dish}...\")\n", + " \n", + "# Get and display recipe\n", + "if ai_service_choice == '1':\n", + " print(\"Using OpenAI API...\")\n", + " recipe_markdown = get_recipe_openai(dish, num_people)\n", + "else:\n", + " print(\"Using Ollama (local)...\")\n", + " recipe_markdown = get_recipe_ollama(dish, num_people)\n", + "\n", + "print_shopping_list(recipe_markdown)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "10", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.13" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/week1/community-contributions/week1 EXERCISE_AI_techician.ipynb b/week1/community-contributions/week1 EXERCISE_AI_techician.ipynb index 7824df8..130de91 100644 --- a/week1/community-contributions/week1 EXERCISE_AI_techician.ipynb +++ b/week1/community-contributions/week1 EXERCISE_AI_techician.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "markdown", - "id": "fe12c203-e6a6-452c-a655-afb8a03a4ff5", + "id": "0", "metadata": {}, "source": [ "# End of week 1 exercise\n", @@ -13,22 +13,30 @@ }, { "cell_type": "code", - "execution_count": 9, - "id": "c1070317-3ed9-4659-abe3-828943230e03", + "execution_count": null, + "id": "1", "metadata": {}, "outputs": [], "source": [ "# imports\n", "from IPython.display import Markdown, display, update_display\n", + "from dotenv import load_dotenv\n", + "import os\n", "import openai\n", "from openai import OpenAI\n" ] }, { "cell_type": "code", - "execution_count": 10, - "id": "4a456906-915a-4bfd-bb9d-57e505c5093f", - "metadata": {}, + "execution_count": null, + "id": "2", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, "outputs": [], "source": [ "# constants\n", @@ -37,6 +45,9 @@ " 'MODEL_LLAMA': 'llama3.2'\n", "}\n", "\n", + "load_dotenv(override=True)\n", + "api_key = os.getenv(\"OPENAI_API_KEY\")\n", + "\n", "# To use ollama using openai API (ensure that ollama is running on localhost)\n", "ollama_via_openai = OpenAI(base_url='http://localhost:11434/v1', api_key='ollama')\n", "\n", @@ -57,9 +68,15 @@ }, { "cell_type": "code", - "execution_count": 12, - "id": "a8d7923c-5f28-4c30-8556-342d7c8497c1", - "metadata": {}, + "execution_count": null, + "id": "3", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, "outputs": [], "source": [ "# set up environment\n", @@ -89,8 +106,8 @@ }, { "cell_type": "code", - "execution_count": 13, - "id": "3f0d0137-52b0-47a8-81a8-11a90a010798", + "execution_count": null, + "id": "4", "metadata": {}, "outputs": [], "source": [ @@ -105,67 +122,9 @@ { "cell_type": "code", "execution_count": null, - "id": "60ce7000-a4a5-4cce-a261-e75ef45063b4", + "id": "5", "metadata": {}, - "outputs": [ - { - "data": { - "text/markdown": [ - "**Understanding the Code Snippet**\n", - "\n", - "This Python code snippet uses a combination of built-in functions, dictionary iteration, and generator expressions to extract and yield author names from a list of `Book` objects.\n", - "\n", - "Here's a breakdown:\n", - "\n", - "1. **Dictionary Iteration**: The expression `for book in books if book.get(\"author\")`\n", - " - Iterates over each element (`book`) in the container `books`.\n", - " - Filters out elements whose `'author'` key does not have a value (i.e., `None`, `False`, or an empty string). This leaves only dictionaries with author information.\n", - "\n", - "2. **Dictionary Access**: The expression `{book.get(\"author\") for book in books if book.get(\"author\")}`\n", - " - Uses dictionary membership testing to access only the values associated with the `'author'` key.\n", - " - If the value is not found or is considered false, it's skipped in this particular case.\n", - "\n", - "3. **Generator Expression**: This generates an iterator that iterates over the filtered author names.\n", - " - Yields each author name (i.e., a single `'name'` from the book dictionary) on demand.\n", - " - Since these are generator expressions, they use memory less than equivalent Python lists and also create results on-demand.\n", - "\n", - "4. **`yield from`**: This statement takes the generator expression as an argument and uses it to generate a nested iterator structure.\n", - " - It essentially \"decompresses\" the single level of nested iterator created by `list(iter(x))`, allowing for simpler use cases and potentially significant efficiency improvements for more complex structures where every value must be iterated, while in the latter case just the first item per iterable in the outer expression's sequence needs to actually be yielded into result stream.\n", - " - By \"yielding\" a nested iterator (the generator expression), we can simplify code by avoiding repetitive structure like `for book, book_author in zip(iterating over), ...` or list creation.\n", - "\n", - "**Example Use Case**\n", - "\n", - "In this hypothetical example:\n", - "\n", - "# Example Book objects\n", - "class Book:\n", - " def __init__(self, author, title):\n", - " self.author = author # str\n", - " self.title = title\n", - "\n", - "books = [\n", - " {\"author\": \"John Doe\", \"title\": f\"Book 1 by John Doe\"},\n", - " {\"author\": None, \"title\": f\"Book 2 without Author\"},\n", - " {\"author\": \"Jane Smith\", \"title\": f\"Book 3 by Jane Smith\"}\n", - "]\n", - "\n", - "# The given expression to extract and yield author names\n", - "for author in yield from {book.get(\"author\") for book in books if book.get(\"author\")}:\n", - "\n", - " print(author) \n", - "\n", - "In this code snippet, printing the extracted authors would output `John Doe`, `Jane Smith` (since only dictionaries with author information pass the filtering test).\n", - "\n", - "Please modify it like as you wish and use `yield from` along with dictionary iteration, list comprehension or generator expression if needed, and explain what purpose your version has." - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "# Get the model of your choice (choices appeared below) to answer, with streaming \n", "\n", @@ -174,13 +133,21 @@ " 'MODEL_LLAMA': 'llama3.2'\n", "}\"\"\"\n", "\n", - "stream_brochure(question,'MODEL_LLAMA')" + "stream_brochure(question,'MODEL_GPT')" ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6", + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { "kernelspec": { - "display_name": "llms", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -194,7 +161,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.11" + "version": "3.11.13" } }, "nbformat": 4, diff --git a/week1/community-contributions/week1-jedi-master.py b/week1/community-contributions/week1-jedi-master.py new file mode 100644 index 0000000..c59dc32 --- /dev/null +++ b/week1/community-contributions/week1-jedi-master.py @@ -0,0 +1,64 @@ +#!/usr/bin/python3 + +import os +import argparse +from dotenv import load_dotenv +from openai import OpenAI +from IPython.display import Markdown, display, update_display + +def load_openai_key(): + # Load environment variables in a file called .env + load_dotenv(override=True) + api_key = os.getenv('OPENAI_API_KEY') + + # Check the key + if not api_key: + return "Error: No API key was found!" + elif not api_key.startswith("sk-proj-"): + return "Error: An API key was found, but it doesn't start sk-proj-; please check you're using the right key" + elif api_key.strip() != api_key: + return "Error: An API key was found, but it looks like it might have space or tab characters at the start or end - please remove them!" + else: + return "API key found and looks good so far!" + +def ask_llm(client, model, user_prompt): + system_prompt = """ + You are a wise Jedi Master and an excellent teacher. + You will answer any question you are given by breaking it down into small steps + that even a complete beginner will understand. + When answering, speak as if you are Yoda from the Star Wars universe. + Also, refer to the user as "My young Padawan" + End every answer with "May the force be with you, always." + """ + response = client.chat.completions.create( + model = model, + messages = [ {"role": "system", "content": system_prompt}, + {"role": "user", "content": user_prompt}] + ) + return response.choices[0].message.content + +def main(): + parser = argparse.ArgumentParser(description="JedAI Master instructor") + parser.add_argument("provider", choices=["openai", "ollama"], help="AI provider to use") + parser.add_argument("--model", help="Model to use for Ollama (required if provider is 'ollama')", required="ollama" in parser.parse_known_args()[0].provider) + parser.add_argument("question", help="What knowledge do you seek, my young Padawan?") + + args = parser.parse_args() + + if args.provider == "openai": + load_openai_key() + client = OpenAI() + model = "gpt-4o-mini" + elif args.provider == "ollama": + client = OpenAI(base_url='http://localhost:11434/v1', api_key='ollama') + model = args.model + else: + return "Error: invalid provider!" + + user_prompt = args.question + + result = ask_llm(client, model, user_prompt) + print("AI Response:", result) + +if __name__ == "__main__": + main() diff --git a/week3/community-contributions/06_meeting_minute_assistant.ipynb b/week3/community-contributions/06_meeting_minute_assistant.ipynb new file mode 100644 index 0000000..ac2fbc0 --- /dev/null +++ b/week3/community-contributions/06_meeting_minute_assistant.ipynb @@ -0,0 +1,450 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "HFOR8SGHPyj3" + }, + "source": [ + "# Meeting Minutes Generator (STT with LLMs)\n", + "---\n", + "\n", + "- 🌍 Task: Generate structured meeting minutes from audio recordings using Speech-to-Text (STT) and Large Language Models\n", + "- 🧠 Models:\n", + " - AUDIO_MODEL: whisper1\n", + " - LLM_MODEL: meta-llama/Meta-Llama-3.1-8B-Instruct\n", + "- 🚀 Tools: Python, Gradio UI, OpenAI / HuggingFace APIs\n", + "- 📤 Output: Structured meeting minutes in Markdown format with real-time streaming\n", + "- 🧑‍💻 Skill Level: Intermediate\n", + "\n", + "🎯 How It Works\n", + "- 1️⃣ Upload a .mp3 meeting recording\n", + "- 2️⃣ Submit the audio to generate meeting minutes in text format\n", + "\n", + "You can download some meetings from this link to test the code:\n", + "[https://www.rmofspringfield.ca/p/meeting-audio-files](https://www.rmofspringfield.ca/p/meeting-audio-files)\n", + "\n", + "\n", + "🛠️ Requirements\n", + "- ⚙️ Hardware: ✅ GPU required (model download); Google Colab recommended (T4)\n", + "- 🔑 OpenAI API Key (used for whisper1 transcription)\n", + "- 🔑 Hugging Face Token (for the LLM model)\n", + "\n", + "⚙️ Customizable by user\n", + "- 🤖 Selected model: AUDIO_MODEL / LLM_MODEL\n", + "- 📜 system_prompt: Controls model behavior (concise, accurate, structured output)\n", + "- 💬 user_prompt\n", + "\n", + "---\n", + "📢 Find more LLM notebooks on my [GitHub repository](https://github.com/lisekarimi/lexo)" + ], + "outputs": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "A_osPeBQNAdv", + "outputId": "11cc73e0-9aad-4f57-e1ae-2d71c4eb0444" + }, + "outputs": [], + "source": [ + "# Install required packages in Google Colab\n", + "%pip install -q requests torch bitsandbytes transformers sentencepiece accelerate openai httpx==0.27.2 gradio" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "pL-8yTOlQiOH" + }, + "outputs": [], + "source": [ + "# imports\n", + "import torch\n", + "import threading\n", + "from openai import OpenAI\n", + "from huggingface_hub import login\n", + "from google.colab import userdata\n", + "from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer, BitsAndBytesConfig\n", + "import gradio as gr" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Constants\n", + "AUDIO_MODEL = \"whisper-1\" # OpenAI Whisper API model\n", + "LLM_MODEL = \"meta-llama/Meta-Llama-3.1-8B-Instruct\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "62c2Wbt3P5Ew" + }, + "outputs": [], + "source": [ + "# Google Colab User Data\n", + "# Ensure you have set the following in your Google Colab environment:\n", + "hf_token = userdata.get('HF_TOKEN')\n", + "openai_api_key = userdata.get('OPENAI_API_KEY')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "login(hf_token, add_to_git_credential=True)\n", + "openai = OpenAI(api_key=openai_api_key)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "smyocqu_P6yg" + }, + "outputs": [], + "source": [ + "class MeetingAssistant:\n", + " def __init__(self, model_name=LLM_MODEL, audio_model=AUDIO_MODEL):\n", + "\n", + " # Load tokenizer and llm model\n", + " quant_config = BitsAndBytesConfig(\n", + " load_in_4bit=True,\n", + " bnb_4bit_use_double_quant=True,\n", + " bnb_4bit_compute_dtype=torch.bfloat16,\n", + " bnb_4bit_quant_type=\"nf4\"\n", + " )\n", + "\n", + " self.audio_model = audio_model\n", + " self.tokenizer = AutoTokenizer.from_pretrained(model_name)\n", + " self.model = AutoModelForCausalLM.from_pretrained(\n", + " model_name,\n", + " device_map=\"auto\",\n", + " quantization_config=quant_config\n", + " )\n", + "\n", + " def transcribe_audio(self, audio_path, progress):\n", + " \"\"\"Transcribes the uploaded audio file using OpenAI Whisper API.\"\"\"\n", + "\n", + " progress(0.3, desc=\"Transcribing audio...\")\n", + "\n", + " try:\n", + " with open(audio_path, \"rb\") as audio_file:\n", + " transcription = openai.audio.transcriptions.create(\n", + " model=self.audio_model,\n", + " file=audio_file,\n", + " response_format=\"text\"\n", + " )\n", + " return transcription\n", + " except Exception as e:\n", + " return f\"Error during transcription: {str(e)}\"\n", + "\n", + " def generate_minutes(self, transcription, progress):\n", + " \"\"\"Generates meeting minutes from the transcript using the Llama model.\"\"\"\n", + " progress(0.6, desc=\"Generating meeting minutes...\")\n", + "\n", + " system_message = \"You are an assistant that produces minutes of meetings from transcripts, with summary, key discussion points, takeaways and action items with owners, in markdown.\"\n", + " user_prompt = f\"Below is an extract transcript of a meeting. Please write minutes in markdown, including a summary with attendees, location and date; discussion points; takeaways; and action items with owners.\\n{transcription}\"\n", + "\n", + " messages = [\n", + " {\"role\": \"system\", \"content\": system_message},\n", + " {\"role\": \"user\", \"content\": user_prompt}\n", + " ]\n", + "\n", + " inputs = self.tokenizer.apply_chat_template(messages, return_tensors=\"pt\").to(\"cuda\")\n", + " streamer = TextIteratorStreamer(self.tokenizer)\n", + "\n", + " thread = threading.Thread(\n", + " target=self.model.generate, kwargs={\n", + " \"input_ids\": inputs,\n", + " \"max_new_tokens\": 2000,\n", + " \"streamer\": streamer\n", + " })\n", + " thread.start()\n", + "\n", + "\n", + " started = False\n", + " # buffer = \"\"\n", + " for new_text in streamer:\n", + " if not started:\n", + " if \"<|start_header_id|>assistant<|end_header_id|>\" in new_text:\n", + " started = True\n", + " new_text = new_text.split(\"<|start_header_id|>assistant<|end_header_id|>\")[-1].strip()\n", + "\n", + " if started:\n", + " if \"<|eot_id|>\" in new_text:\n", + " new_text = new_text.replace(\"<|eot_id|>\", \"\") # Remove the unwanted token\n", + "\n", + " if new_text.strip(): # Only yield non-empty chunks\n", + " yield new_text\n", + "\n", + " def process_meeting(self, audio_file, progress):\n", + " \"\"\"Handles the complete process: transcribes audio and generates minutes.\"\"\"\n", + " progress(0.1, desc=\"Processing audio file...\")\n", + "\n", + " # Check if a file is uploaded\n", + " if audio_file is None:\n", + " return \"Please upload an audio file.\"\n", + "\n", + " try:\n", + " # Check file format\n", + " if not str(audio_file).lower().endswith('.mp3'):\n", + " return \"Please upload an MP3 file.\"\n", + "\n", + " # Get transcription\n", + " transcription = self.transcribe_audio(audio_file, progress)\n", + "\n", + " # Generate minutes\n", + " accumulated_text = \"\"\n", + " minutes = self.generate_minutes(transcription, progress)\n", + " for chunk in minutes:\n", + " accumulated_text += chunk # Append new text\n", + " yield accumulated_text # Update Gradio output with full text\n", + "\n", + " except Exception as e:\n", + " return f\"Error processing file: {str(e)}\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "fyMu9JrBRBGI" + }, + "outputs": [], + "source": [ + "class GradioInterface:\n", + " def __init__(self):\n", + " \"\"\"Initializes the Gradio interface for processing audio files.\"\"\"\n", + " self.assistant = MeetingAssistant()\n", + " self.iface = gr.Interface(\n", + " fn=self.process_audio,\n", + " inputs=gr.Audio(type=\"filepath\", label=\"Upload MP3 File\", format=\"mp3\"),\n", + " outputs=gr.Markdown(label=\"Meeting Minutes\", min_height=60),\n", + " title=\"AI Meeting Assistant\",\n", + " description=\"Upload an audio file to transcribe and generate meeting minutes.\",\n", + " flagging_mode=\"never\"\n", + " )\n", + "\n", + " def process_audio(self, audio_file, progress=gr.Progress()): # Adapter between the UI and the backend.\n", + " \"\"\"Handles user input from Gradio, processes the audio, and returns meeting minutes.\"\"\"\n", + " response = self.assistant.process_meeting(audio_file, progress)\n", + " for chunk in response:\n", + " yield chunk\n", + "\n", + " def launch(self):\n", + " \"\"\"Launches the Gradio interface.\"\"\"\n", + " self.iface.launch()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000, + "referenced_widgets": [ + "0f705a9046d34fb0a7ab8177a6521b88", + "28e61ba2b56f4d3484dd3ec0eecb12aa", + "ecb3207e61d44ca39c7f8a97546c6686", + "5a4f5291a0b24178a44b1b3c2401a957", + "2472dbee01b149c0ac7efe6eaa5ffd66", + "65c6998bdf7444de85b37468f6b6f42e", + "4323c3fbb5d24b38a920313479bb5c57", + "8af3f0faa5b144efbf1aa443c2839d2c", + "e0f3096904354279a95175d116816262", + "0e6ad8796e9e4b868c8507722c9cbf33", + "a32fde87feaf42199de6efe2c94085aa", + "e6c97a25f41044ab89e49d9bf9836de4", + "78562aef2a6a422dba1145306b294823", + "980453816461473fab04be9b6fbf03b5", + "39ce806000744113ae35a617adfe2571", + "2991097320c148b7b0eb81e3ce866df2", + "1a502c41fc3044c2a7c24ad144c209e9", + "91a907a6aa044a3288cbf4deca77eb67", + "7f78522bd56847bfa740ff8146e726d2", + "eb2129af41b24e2ea64a962cb041164c", + "44bdcc01d31a43eeaa7139018c24a83b", + "a21477f95f604f618a3aa2f48c00f7f3", + "0b97c1be64664458abfc0109857d86eb", + "2d0a0c7b89a64b7499ce87e81044d461", + "19acf6f364d8478ea264dde4fd4a1ca1", + "c084a0f7a7c04c90a0fed54c60cc8e79", + "482baf34221048bd8bb4e57cebe44707", + "f9c94568e6b342dda23ccd3be906eec0", + "c6f719622eae45b0b110b377918c2eb2", + "83e68bf6b3994fd5a6eea4ba722864c3", + "01d3ea10affb447ead36c6b4476e7a4c", + "5f93b389541e4ec09aced45d018bc8c1", + "90b2cfcc49804e78b6bebb383e9e6893", + "6f1e02e1c1da4bd9a6d13b3907cd78ae", + "39ea33000ee741c2b9fdf518f657d872", + "1b6204edaebf489e9d3e70f6d722c33a", + "4ae96d4297b84fd1a9022a9c07f7987e", + "2c43cc66619945a18a82cd9437ea60be", + "9e1dc2cc46fa4a4ea6c5d3a50333a02f", + "7c11259f23a6440babc156ac7d4b94c7", + "48c2c5afef3d47e3b9bada3cdc339ec9", + "a0ca5ccd08df4b9191a6400907f239fa", + "72a5d5d5f42e44f197b5829801fee49c", + "37cefb5abc424fca84d5ec4d7b90ff1f", + "afce35f6ca0545d99937a2fca8030cb0", + "fab983c8f0d544a2950d03acd5c39644", + "f1a00e2402d2498292cbc5b767b1b3a9", + "ca4db027b9764a8180617aae1b215f60", + "d96a8910fbc5451083df650386ce6726", + "5bd8e043fbd64c7c9dfb0d871737786d", + "1ca523532aa5433c91df9cb53291ba29", + "0b5340eb370a490ea946a446a9ab2eaa", + "5f822db8ca764ce4b8dd7b99a83c7286", + "37622f8dcbe14ac5ad80c9b09c8c4005", + "1a73f8a262a94cd48d0370bbfd582405", + "99bfb965add64f609f0ef008c443cdb9", + "f1afbbe6e1fa4239af3d79b42f1ffc26", + "2574026b82a040f089bfd202db5ef91d", + "32bc3c434f824c618659693de6bd929a", + "6d19a5bd166443b1acbf261287be09ac", + "58e73b94784645f699f957693aaf6e6e", + "4e5c99b156c545b096ba538b1a8c588a", + "bde0c4ad4eea4944b76b34ad9c19bb89", + "b9ab6e3935c646e691c0b5143d47d4b3", + "d4a06441bff74e0e9fe8978014660e90", + "a8010eecf9bc4e8ebfc906489ff54543", + "7e9cdfd05f074c1798b8e3d936f6e7de", + "57b3fe293dda483bb3717d7bd3509cce", + "b080c2078a3f4d93b4d8367755d96272", + "a1a7f450bd8d4917b796c6e13a5be9e1", + "e58ff13df5a04fd5b0496e82384fe439", + "ddf5e150f83944bfb07d8f19a177a50a", + "71939dd7929243e38419abec94b209f9", + "fe17e6c350c54a2d85864bb8d6d50d85", + "4a69b6ea437e4682819ed2d0aef048b8", + "99a8e8cb1ad44d5f999c07cf9a913ef4", + "1dd62e85589f4d60912e79dee1b39a3e", + "0a634fec1cf544af82bd17af73bf417a", + "c7e74bf1bb0f4d57ae95aa4397691e01", + "e144ffa2b708446d940bfdf54741c7ab", + "d65f1f5e345546b380c8e9be9d4dfb9b", + "c2e4a8d768d245529bdac929585136c5", + "a4317d864cc4445d8597ed695c3d4c35", + "5429cebc5a28408985824c4f501e050e", + "dc3100a6c9d946568ee0e297934773ff", + "10ecb81d605a4534a13332371ac9041d", + "570b18cdf9034cc780e838724a70904e", + "1915b872ec55435080456092d9ec8717", + "50f682e340cb4e58899aa7e9ea4741db", + "569ec309d2f3490f94c85bbb3680258b", + "dd319b7cb2b7425e849d6682c7f05390", + "fde199a94cc7488690f81d5add9eb08d", + "38d2575bf5144cf3a02d304444bdc481", + "a06cc81bd0114aefb1e80868791b5be8", + "819319dd58b44e999a13b0bd0e78c88e", + "38f581406490488981c76bc7e7e64005", + "2ef62c2bd93c462eb7c4522c8a156e0d", + "c2cb2d57701a4e55b7bfa3f842a91c09", + "4ba3375ddb584f068d2cdcf060cdfa9c", + "125f8ae49e504b809e5e39f6d940204e", + "2c4abe3e713846deb1b0a9bec03298d4", + "b5f524e95a0d42febd4466bb7a8ad239", + "b5b9a98cb74c409cbdc50a16a3393665", + "ec145da746d74c5e896900f7462b630b", + "aa7d2a5d452b4eb1b537542a9731b94a", + "3d03ed01daeb43d58c4f15bc591043ba", + "17b7619e5bfd4d74b1a3bee1c7643e74", + "4c95edb35fab4e12b027248e93b61883", + "7d7bcf713d3b4531846635fe43fb268e", + "918a8b2b832645d18767bc2e4451d556", + "3b8be978c3af4ef4b7bc16420e6a9f8a", + "25efbd99d0134866940cc3fde41aacf7", + "07e8ec9ab2ba4339a8e2736156f28eab", + "7fa4e5411e384c568b37a51bc94b3ee2", + "45fa63f56c814015861d05beb8800e09", + "8a73f7fbe20a4d56a76588db2ac35cea", + "3a18d0771ef74923ba209733afdd0e47", + "9104c3953c254878b2020764569613a9", + "8a237f2467734eddace5d9f9aafce9e7", + "a737c2d22cd84141a2f28721ab69d28d", + "be17f6c8dc9c40efa94c3af82a8efa6a", + "7a74e712b53c4a659dc09766885c12d9", + "9afb4b8b3ddf4a0abb75eedbcf3bc7c1", + "27697007fa6d4736a5eb1e1b0eea2d82", + "e5d4b0e78c3740cd8cf9cda0e4a93972", + "2740d759410d435087c4ae0772d6ad73", + "19f5f9369e3043e0984e160c50e0a32e", + "7b488376756843fe84fcce2e7abb5cd9", + "c2745572ac434351ad9b2c9506d8d0b7", + "25119517d9a043ba91fd3fbefb8377a4", + "4079ed7e7f794755afd5daad3f00a34a", + "14f5761bfbd340198267e3986f4035a0" + ] + }, + "id": "BI91BBEJRB0K", + "outputId": "c4853642-832e-4167-e220-2a2d0fd279a8" + }, + "outputs": [], + "source": [ + "if __name__ == \"__main__\":\n", + " app = GradioInterface()\n", + " app.launch()" + ] + }, + { + "attachments": { + "image-2.png": { + "image/png": "" + }, + "image.png": { + "image/png": "" + } + }, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![image.png](attachment:image.png)\n", + "\n", + "![image-2.png](attachment:image-2.png)" + ], + "outputs": [] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "gpuType": "T4", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.11" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} \ No newline at end of file diff --git a/week3/community-contributions/Week3-Dataset_Generator-DP.ipynb b/week3/community-contributions/Week3-Dataset_Generator-DP.ipynb new file mode 100644 index 0000000..72c1c84 --- /dev/null +++ b/week3/community-contributions/Week3-Dataset_Generator-DP.ipynb @@ -0,0 +1,381 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "c08309b8-13f0-45bb-a3ea-7b01f05a7346", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import json\n", + "import pandas as pd\n", + "import random\n", + "import re\n", + "import subprocess\n", + "import pyarrow as pa\n", + "from typing import List\n", + "import openai\n", + "import anthropic\n", + "from dotenv import load_dotenv\n", + "import gradio as gr" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f5efd903-e683-4e7f-8747-2998e23a0751", + "metadata": {}, + "outputs": [], + "source": [ + "# load API\n", + "load_dotenv(override=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ce49b86a-53f4-4d4f-a721-0d66d9c1b070", + "metadata": {}, + "outputs": [], + "source": [ + "# --- Schema Definition ---\n", + "SCHEMA = [\n", + " (\"Team\", \"TEXT\", '\"Toronto Raptors\"'),\n", + " (\"NAME\", \"TEXT\", '\"Otto Porter Jr.\"'),\n", + " (\"Jersey\", \"TEXT\", '\"10\", or \"NA\" if null'),\n", + " (\"POS\", \"TEXT\", 'One of [\"PF\",\"SF\",\"G\",\"C\",\"SG\",\"F\",\"PG\"]'),\n", + " (\"AGE\", \"INT\", 'integer age in years, e.g., 22'),\n", + " (\"HT\", \"TEXT\", '`6\\' 7\"` or `6\\' 10\"`'),\n", + " (\"WT\", \"TEXT\", '\"232 lbs\"'),\n", + " (\"COLLEGE\", \"TEXT\", '\"Michigan\", or \"--\" if null'),\n", + " (\"SALARY\", \"TEXT\", '\"$9,945,830\", or \"--\" if null')\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "93743e57-c2c5-43e5-8fa1-2e242085db07", + "metadata": {}, + "outputs": [], + "source": [ + "# Default schema text for the textbox\n", + "DEFAULT_SCHEMA_TEXT = \"\\n\".join([f\"{i+1}. {col[0]} ({col[1]}) Example: {col[2]}\" for i, col in enumerate(SCHEMA)])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "87c58595-6fdd-48f5-a253-ccba352cb385", + "metadata": {}, + "outputs": [], + "source": [ + "# Available models\n", + "MODELS = [\n", + " \"gpt-4o\",\n", + " \"claude-3-5-haiku-20241022\", \n", + " \"ollama:llama3.2:latest\"\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "08cd9ce2-8685-46b5-95d0-811b8025696f", + "metadata": {}, + "outputs": [], + "source": [ + "# Available file formats\n", + "FILE_FORMATS = [\".csv\", \".tsv\", \".jsonl\", \".parquet\", \".arrow\"]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "13d68c7f-6f49-4efa-b075-f1e7db2ab527", + "metadata": {}, + "outputs": [], + "source": [ + "def get_prompt(n: int, schema_text: str, system_prompt: str) -> str:\n", + " prompt = f\"\"\"\n", + "{system_prompt}\n", + "\n", + "Generate {n} rows of realistic basketball player data in JSONL format, each line a JSON object with the following fields:\n", + "\n", + "{schema_text}\n", + "\n", + "Do NOT repeat column values from one row to another.\n", + "\n", + "Only output valid JSONL.\n", + "\"\"\"\n", + " return prompt.strip()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cdc68f1e-4fbe-45dc-aa36-ce5f718ef6ca", + "metadata": {}, + "outputs": [], + "source": [ + "# --- LLM Interface ---\n", + "def query_model(prompt: str, model: str = \"gpt-4o\") -> List[dict]:\n", + " \"\"\"Call OpenAI, Claude, or Ollama\"\"\"\n", + " try:\n", + " if model.lower().startswith(\"gpt\"):\n", + " client = openai.OpenAI(api_key=os.getenv(\"OPENAI_API_KEY\"))\n", + " response = client.chat.completions.create(\n", + " model=model,\n", + " messages=[{\"role\": \"user\", \"content\": prompt}],\n", + " temperature=0.7\n", + " )\n", + " content = response.choices[0].message.content\n", + "\n", + " elif model.lower().startswith(\"claude\"):\n", + " client = anthropic.Anthropic(api_key=os.getenv(\"ANTHROPIC_API_KEY\"))\n", + " response = client.messages.create(\n", + " model=model,\n", + " messages=[{\"role\": \"user\", \"content\": prompt}],\n", + " max_tokens=4000,\n", + " temperature=0.7\n", + " )\n", + " content = response.content[0].text\n", + "\n", + " elif model.lower().startswith(\"ollama:\"):\n", + " ollama_model = model.split(\":\")[1]\n", + " result = subprocess.run(\n", + " [\"ollama\", \"run\", ollama_model],\n", + " input=prompt,\n", + " text=True,\n", + " capture_output=True\n", + " )\n", + " if result.returncode != 0:\n", + " raise Exception(f\"Ollama error: {result.stderr}\")\n", + " content = result.stdout\n", + " else:\n", + " raise ValueError(\"Unsupported model. Use 'gpt-4.1-mini', 'claude-3-5-haiku-20241022', or 'ollama:llama3.2:latest'\")\n", + "\n", + " # Parse JSONL output\n", + " lines = [line.strip() for line in content.strip().splitlines() if line.strip().startswith(\"{\")]\n", + " return [json.loads(line) for line in lines]\n", + " \n", + " except Exception as e:\n", + " raise Exception(f\"Model query failed: {str(e)}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "29e3f5f5-e99c-429c-bea9-69d554c58c9c", + "metadata": {}, + "outputs": [], + "source": [ + "# --- Output Formatter ---\n", + "def save_dataset(records: List[dict], file_format: str, filename: str):\n", + " df = pd.DataFrame(records)\n", + " if file_format == \".csv\":\n", + " df.to_csv(filename, index=False)\n", + " elif file_format == \".tsv\":\n", + " df.to_csv(filename, sep=\"\\t\", index=False)\n", + " elif file_format == \".jsonl\":\n", + " with open(filename, \"w\") as f:\n", + " for record in records:\n", + " f.write(json.dumps(record) + \"\\n\")\n", + " elif file_format == \".parquet\":\n", + " df.to_parquet(filename, engine=\"pyarrow\", index=False)\n", + " elif file_format == \".arrow\":\n", + " table = pa.Table.from_pandas(df)\n", + " with pa.OSFile(filename, \"wb\") as sink:\n", + " with pa.ipc.new_file(sink, table.schema) as writer:\n", + " writer.write(table)\n", + " else:\n", + " raise ValueError(\"Unsupported file format\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fe258e84-66f4-4fe7-99c0-75b24148e147", + "metadata": {}, + "outputs": [], + "source": [ + "# --- Main Generation Function ---\n", + "def generate_dataset(schema_text, system_prompt, model, nr_records, file_format, save_as):\n", + " try:\n", + " # Validation\n", + " if nr_records <= 10:\n", + " return \"❌ Error: Nr_records must be greater than 10.\", None\n", + " \n", + " if file_format not in FILE_FORMATS:\n", + " return \"❌ Error: Invalid file format specified.\", None\n", + " \n", + " if not save_as or save_as.strip() == \"\":\n", + " save_as = f\"basketball_dataset{file_format}\"\n", + " elif not save_as.endswith(file_format):\n", + " save_as = save_as + file_format\n", + " \n", + " # Generate prompt\n", + " prompt = get_prompt(nr_records, schema_text, system_prompt)\n", + " \n", + " # Query model\n", + " records = query_model(prompt, model=model)\n", + " \n", + " if not records:\n", + " return \"❌ Error: No valid records generated from the model.\", None\n", + " \n", + " # Save dataset\n", + " save_dataset(records, file_format, save_as)\n", + " \n", + " # Create preview\n", + " df = pd.DataFrame(records)\n", + " preview = df.head(10) # Show first 10 rows\n", + " \n", + " success_message = f\"✅ Dataset generated successfully!\\n📁 Saved to: {save_as}\\n📊 Generated {len(records)} records\"\n", + " \n", + " return success_message, preview\n", + " \n", + " except Exception as e:\n", + " return f\"❌ Error: {str(e)}\", None" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c2405a9d-b4cd-43d9-82f6-ff3512b4541f", + "metadata": {}, + "outputs": [], + "source": [ + "# --- Gradio Interface ---\n", + "def create_interface():\n", + " with gr.Blocks(title=\"Dataset Generator\", theme=gr.themes.Soft()) as interface:\n", + " gr.Markdown(\"# Dataset Generator\")\n", + " gr.Markdown(\"Generate realistic datasets using AI models\")\n", + " \n", + " with gr.Row():\n", + " with gr.Column(scale=2):\n", + " schema_input = gr.Textbox(\n", + " label=\"Schema\",\n", + " value=DEFAULT_SCHEMA_TEXT,\n", + " lines=15,\n", + " placeholder=\"Define your dataset schema here...\"\n", + " )\n", + " \n", + " system_prompt_input = gr.Textbox(\n", + " label=\"Prompt\",\n", + " value=\"You are a helpful assistant that generates realistic basketball player data.\",\n", + " lines=1,\n", + " placeholder=\"Enter system prompt for the model...\"\n", + " )\n", + " \n", + " with gr.Row():\n", + " model_dropdown = gr.Dropdown(\n", + " label=\"Model\",\n", + " choices=MODELS,\n", + " value=MODELS[1], # Default to Claude\n", + " interactive=True\n", + " )\n", + " \n", + " nr_records_input = gr.Number(\n", + " label=\"Nr. records\",\n", + " value=25,\n", + " minimum=11,\n", + " maximum=1000,\n", + " step=1\n", + " )\n", + " \n", + " with gr.Row():\n", + " file_format_dropdown = gr.Dropdown(\n", + " label=\"File format\",\n", + " choices=FILE_FORMATS,\n", + " value=\".csv\",\n", + " interactive=True\n", + " )\n", + " \n", + " save_as_input = gr.Textbox(\n", + " label=\"Save as\",\n", + " value=\"basketball_dataset\",\n", + " placeholder=\"Enter filename (extension will be added automatically)\"\n", + " )\n", + " \n", + " generate_btn = gr.Button(\"🚀 Generate\", variant=\"primary\", size=\"lg\")\n", + " \n", + " with gr.Column(scale=1):\n", + " output_status = gr.Textbox(\n", + " label=\"Status\",\n", + " lines=4,\n", + " interactive=False\n", + " )\n", + " \n", + " output_preview = gr.Dataframe(\n", + " label=\"Preview (First 10 rows)\",\n", + " interactive=False,\n", + " wrap=True\n", + " )\n", + " \n", + " # Connect the generate button\n", + " generate_btn.click(\n", + " fn=generate_dataset,\n", + " inputs=[\n", + " schema_input,\n", + " system_prompt_input, \n", + " model_dropdown,\n", + " nr_records_input,\n", + " file_format_dropdown,\n", + " save_as_input\n", + " ],\n", + " outputs=[output_status, output_preview]\n", + " )\n", + " \n", + " gr.Markdown(\"\"\"\n", + " ### 📝 Instructions:\n", + " 1. **Schema**: Define the structure of your dataset (pre-filled with basketball player schema)\n", + " 2. **Prompt**: System prompt to guide the AI model\n", + " 3. **Model**: Choose between GPT, Claude, or Ollama models\n", + " 4. **Nr. records**: Number of records to generate (minimum 11)\n", + " 5. **File format**: Choose output format (.csv, .tsv, .jsonl, .parquet, .arrow)\n", + " 6. **Save as**: Filename (extension added automatically)\n", + " 7. Click **Generate** to create your dataset\n", + " \n", + " ### 🔧 Requirements:\n", + " - Set up your API keys in `.env` file (`OPENAI_API_KEY`, `ANTHROPIC_API_KEY`)\n", + " - For Ollama models, ensure Ollama is installed and running locally\n", + " \"\"\")\n", + " \n", + " return interface" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "50fd2b91-2578-4224-b9dd-e28caf6a0a85", + "metadata": {}, + "outputs": [], + "source": [ + "interface = create_interface()\n", + "interface.launch(inbrowser=True)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/week3/community-contributions/muawiya/README.md b/week3/community-contributions/muawiya/README.md new file mode 100644 index 0000000..fea9832 --- /dev/null +++ b/week3/community-contributions/muawiya/README.md @@ -0,0 +1,102 @@ +# 🧠 Synthetic Data Generator + +A Python-based tool to generate structured, synthetic job postings using open-source LLMs from Hugging Face. +This project supports both **script-based execution** and an **interactive Colab notebook**, making it ideal for rapid prototyping, dataset bootstrapping, or demonstrating prompt engineering techniques. + +> Note: Original Repo can be found at: https://github.com/moawiah/synthetic_data_generator + + +![Demo Screenshot](https://github.com/user-attachments/assets/c0e229ac-ddb7-4a37-8088-f04ca735cd81) + + +This tool helps: +- Researchers create labeled training data for NLP classification or QA +- HR tech startups prototype recommendation models +- AI instructors demonstrate few-shot prompting in class + + +--- + +## ✨ Features + +- 🔗 Integrates Hugging Face Transformer models +- 📄 Generates realistic job postings in structured JSON format +- 🧪 Supports prompt engineering with control over output length and variability +- 🧠 Minimal Gradio UI for non-technical users +- 📓 Jupyter/Colab support for experimentation and reproducibility + +## 📂 Project Structure +
 ```
+. ├── app/ 
+    │ 
+    ├── app.py # Main script entry point 
+    │ 
+    ├── consts.py # Configuration and constants 
+    │ 
+    └── requirements.txt # Python dependencies 
+  ├── data/ 
+    │ 
+    └── software_engineer_jobs.json # Sample input data (JSON format) 
+  ├── notebooks/ 
+    │ 
+    └── synthetic_data_generator.ipynb # Interactive Colab notebook 
+  ├── .env.example # Sample environment variable config 
+  ├── .gitignore # Git ignored files list 
+  └── README.md
+  ``` 
+ +## 🚀 Getting Started + +### 1. Clone the repository +```bash +git clone https://github.com/moawiah/synthetic_data_generator.git +cd synthetic_data_generator +``` +### Install Dependencies +```bah +pip install -r app/requirements.txt +``` +### Hugging Face Token +You need to create a `.env` file with your HuggingFace token like `HF_TOKEN=your-token-here` + +### Run +run the app using +`python app/app.py` + + +## Example Output - 1 Job + +```JSON +{ +"title": "Software Engineer" +, +"description": "We are seeking a highly skilled software engineer to join our team and contribute to the development of innovative software solutions. The ideal candidate will have experience in designing, coding, and testing software systems, and will be able to work collaboratively with cross-functional teams. Responsibilities include writing clean, maintainable, and efficient code, as well as actively participating in code reviews and continuous integration processes. This is an excellent opportunity for a self-starter with a passion for technology and a desire to grow in their career." +, +"requirements":[ +"0":"Bachelor's degree in Computer Science or related field", +"1":"Minimum of 2 years experience in software development", +"2":"Strong proficiency in Java or C++", +"3":"Experience with agile development methodologies", +"4":"Good understanding of data structures and algorithms", +"5":"Excellent problem-solving and analytical skills" +], +"location":"New York, NY", +"company_name":"ABC Technologies" +} + +``` + + +## Future Improvements +🔁 Add support for more job roles and industries + +🧠 Model selector from UI + +💾 Export dataset as CSV + +☁️ Optional integration with LangChain or RAG workflows + + + + + diff --git a/week3/community-contributions/muawiya/app/app.py b/week3/community-contributions/muawiya/app/app.py new file mode 100644 index 0000000..4b3fc79 --- /dev/null +++ b/week3/community-contributions/muawiya/app/app.py @@ -0,0 +1,156 @@ +import os +import requests +from IPython.display import Markdown, display, update_display +from openai import OpenAI +from google.colab import drive +from huggingface_hub import login +from google.colab import userdata +from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer, BitsAndBytesConfig, pipeline, TextGenerationPipeline +import torch +from consts import FALCON, MISTRAL, Databricks +from dotenv import load_dotenv +import json +import ast +import gradio as gr +import re + +# Sign in to HuggingFace Hub +load_dotenv() +hf_token = os.getenv("HF_TOKEN") + + +# Main Prompt +prompt = """ +Generate one fake job posting for a {{role}}. + +Return only a single JSON object with: +- title +- description (5-10 sentences) +- requirements (array of 4-6 strings) +- location +- company_name + +No explanations, no extra text. +Only the JSON object. +""" + +# Main Conf +bnb_config = BitsAndBytesConfig( + load_in_4bit=True, + bnb_4bit_use_double_quant=True, + bnb_4bit_compute_dtype=torch.bfloat16, + bnb_4bit_quant_type="nf4" +) + +def load_model_and_tokenizer(): + tokenizer = AutoTokenizer.from_pretrained(MISTRAL, trust_remote_code=True) + + model = AutoModelForCausalLM.from_pretrained( + MISTRAL, + device_map={"": "cuda"}, + trust_remote_code=True, + offload_folder="/tmp/dolly_offload", + quantization_config=bnb_config + ) + + return model, tokenizer + + +def generate_job(role="Software Engineer", model=None, tokenizer=None): + # prompt = prompt.format(role=role, n=n) + # outputs = generator(prompt, max_new_tokens=500, do_sample=True, temperature=0.9) + # return outputs[0]['generated_text'] + + # Apply chat template formatting + # inputs = tokenizer.apply_chat_template(messages, return_tensors="pt").to(model.device) + inputs = tokenizer(prompt.format(role=role), return_tensors="pt") + inputs = {k: v.to(model.device) for k, v in inputs.items()} + + + # Generate output + outputs = model.generate( + **inputs, + max_new_tokens=600, + do_sample=True, + temperature=0.2, + top_p=0.9, + pad_token_id=tokenizer.eos_token_id + ) + + # Decode and return + result = tokenizer.decode(outputs[0], skip_special_tokens=True) + return result + +def generate_jobs(role="Software Engineer", n=5): + model, tokenizer = load_model_and_tokenizer() + role = "Software Engineer" + fake_jobs = [] + for i in range(n): + fake_jobs.append(generate_job(role=role, model=model, tokenizer=tokenizer)) + return fake_jobs + +def extract_json_objects_from_text_block(texts): + """ + Accepts either a single string or a list of strings. + Extracts all valid JSON objects from messy text blocks. + """ + if isinstance(texts, str): + texts = [texts] # wrap in list if single string + + pattern = r"\{[\s\S]*?\}" + results = [] + + for raw_text in texts: + matches = re.findall(pattern, raw_text) + for match in matches: + try: + obj = json.loads(match) + results.append(obj) + except json.JSONDecodeError: + continue + + return results + +def generate_ui(role, n): + try: + raw_jobs = generate_jobs(role, n) + parsed_jobs = extract_json_objects_from_text_block(raw_jobs) + + if not isinstance(parsed_jobs, list) or not all(isinstance(item, dict) for item in parsed_jobs): + print("[ERROR] Parsed result is not a list of dicts") + return gr.update(value=[], visible=True), None + + filename = f"data/{role.replace(' ', '_').lower()}_jobs.json" + with open(filename, "w") as f: + json.dump(parsed_jobs, f, indent=2) + + print(f"[INFO] Returning {len(parsed_jobs)} jobs -> {filename}") + return parsed_jobs, filename + + except Exception as e: + print(f"[FATAL ERROR] {e}") + return gr.update(value=[], visible=True), None + + +if __name__ == "__main__": + with gr.Blocks() as demo: + gr.Markdown("# 🧠 Synthetic Job Dataset Generator") + gr.Markdown("Generate a structured dataset of job postings for a specific role.") + + with gr.Row(): + role_input = gr.Textbox(label="Job Role", placeholder="e.g. Software Engineer", value="Software Engineer") + n_input = gr.Number(label="Number of Samples", value=5, precision=0) + + generate_button = gr.Button("🚀 Generate") + output_table = gr.JSON(label="Generated Dataset") + download_button = gr.File(label="Download JSON") + + generate_button.click( + generate_ui, + inputs=[role_input, n_input], + outputs=[output_table, download_button] + ) + + demo.launch(debug=True, share=True) + + diff --git a/week3/community-contributions/muawiya/app/consts.py b/week3/community-contributions/muawiya/app/consts.py new file mode 100644 index 0000000..b62eb2d --- /dev/null +++ b/week3/community-contributions/muawiya/app/consts.py @@ -0,0 +1,5 @@ +# Models +GPT = 'gpt2' +FALCON = "tiiuae/falcon-rw-1b" +MISTRAL = "mistralai/Mistral-7B-Instruct-v0.1" +Databricks = "databricks/dolly-v2-3b" \ No newline at end of file diff --git a/week3/community-contributions/muawiya/app/requirements.txt b/week3/community-contributions/muawiya/app/requirements.txt new file mode 100644 index 0000000..9590dce --- /dev/null +++ b/week3/community-contributions/muawiya/app/requirements.txt @@ -0,0 +1,7 @@ +huggingface_hub==0.30.2 +ipython==8.12.3 +openai==1.76.2 +protobuf==6.30.2 +Requests==2.32.3 +torch==2.6.0+cu124 +transformers==4.51.3 \ No newline at end of file diff --git a/week3/community-contributions/muawiya/data/software_engineer_jobs.json b/week3/community-contributions/muawiya/data/software_engineer_jobs.json new file mode 100644 index 0000000..1a09d49 --- /dev/null +++ b/week3/community-contributions/muawiya/data/software_engineer_jobs.json @@ -0,0 +1,71 @@ +[ + { + "title": "Software Engineer", + "description": "We are seeking a highly skilled software engineer to join our team in developing and maintaining complex software systems. The ideal candidate will have a strong background in computer science and experience with multiple programming languages. Responsibilities include writing clean and efficient code, collaborating with cross-functional teams, and actively participating in code reviews. This is an excellent opportunity for a self-starter with a passion for technology and a desire to grow in their career.", + "requirements": [ + "Bachelor's degree in Computer Science or related field", + "3+ years of experience in software development", + "Strong proficiency in Java or C++", + "Experience with agile development methodologies", + "Excellent problem-solving and analytical skills" + ], + "location": "New York, NY", + "company_name": "ABC Technologies" + }, + { + "title": "Software Engineer", + "description": "We are looking for a highly skilled software engineer to join our team and contribute to the development of innovative software solutions. The ideal candidate will have experience in designing, developing, and testing software systems, and be able to work independently or as part of a team. Responsibilities include writing clean and efficient code, collaborating with cross-functional teams, and actively participating in code reviews. Must have a strong understanding of computer science principles and be able to learn quickly. This is a full-time position located in San Francisco, CA.", + "requirements": [ + "Bachelor's degree in Computer Science or related field", + "3+ years of experience in software development", + "Strong proficiency in Java or C++", + "Experience with agile development methodologies", + "Excellent problem-solving skills", + "Ability to work in a fast-paced environment" + ], + "location": "San Francisco, CA", + "company_name": "Acme Inc." + }, + { + "title": "Software Engineer", + "description": "We are seeking a highly skilled software engineer to join our team in developing and maintaining our cutting-edge software applications. The ideal candidate will have a strong background in computer science and software engineering, with experience in designing, coding, and testing software systems. Responsibilities include collaborating with cross-functional teams, writing clean and efficient code, and ensuring the timely delivery of high-quality software products. This is an excellent opportunity for a self-starter with a passion for technology and a desire to work in a dynamic and fast-paced environment.", + "requirements": [ + "Bachelor's degree in Computer Science or related field", + "3+ years of experience in software engineering", + "Strong proficiency in Java, Python, or C++", + "Experience with agile development methodologies", + "Excellent problem-solving and analytical skills", + "Strong communication and interpersonal skills" + ], + "location": "New York, NY", + "company_name": "ABC Tech" + }, + { + "title": "Software Engineer", + "description": "We are seeking a highly skilled software engineer to join our team and contribute to the development of innovative software solutions. The ideal candidate will have a strong background in computer science and experience with various programming languages and technologies. Responsibilities include designing, coding, testing, and maintaining software systems, as well as collaborating with cross-functional teams. This is an excellent opportunity for a creative and motivated individual to make a significant impact in the tech industry.", + "requirements": [ + "Bachelor's degree in Computer Science or related field", + "Minimum of 2 years experience in software development", + "Strong proficiency in Java, Python, or C++", + "Experience with agile development methodologies", + "Excellent problem-solving and analytical skills", + "Ability to work independently and as part of a team", + "Strong communication and interpersonal skills" + ], + "location": "New York, NY", + "company_name": "ABC Tech Inc." + }, + { + "title": "Software Engineer", + "description": "We are looking for a skilled software engineer to join our team and contribute to the development of innovative software solutions. Responsibilities include designing, coding, testing and maintaining software systems, as well as collaborating with cross-functional teams. The ideal candidate will have a strong background in computer science or a related field, and at least 3 years of experience in software development. Must be proficient in multiple programming languages, including Java, Python, and C++. Strong problem-solving skills and the ability to work independently or as part of a team are required. This is a full-time position located in San Francisco, CA.", + "requirements": [ + "Bachelor's degree in Computer Science or related field", + "At least 3 years of experience in software development", + "Proficiency in Java, Python, and C++", + "Strong problem-solving skills", + "Ability to work independently or as part of a team" + ], + "location": "San Francisco, CA", + "company_name": "Innovative Solutions Inc." + } +] \ No newline at end of file diff --git a/week3/community-contributions/muawiya/notebooks/synthetic_data_generator.ipynb b/week3/community-contributions/muawiya/notebooks/synthetic_data_generator.ipynb new file mode 100644 index 0000000..09f6f9e --- /dev/null +++ b/week3/community-contributions/muawiya/notebooks/synthetic_data_generator.ipynb @@ -0,0 +1,5509 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [], + "machine_shape": "hm", + "gpuType": "A100" + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + }, + "accelerator": "GPU", + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "1d1fe06ac632475086ed5964ed000360": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_c138f597c98c4944b54d36510ecc8e0b", + "IPY_MODEL_bef2531516164e85bb79b86a791dd00d", + "IPY_MODEL_1cb9fc011950479a8d4832bc52c3399c" + ], + "layout": "IPY_MODEL_974e8f7f05ef472d85d5ea71425e6c39" + } + }, + "c138f597c98c4944b54d36510ecc8e0b": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_696090959af8499e9a38777e664b85c1", + "placeholder": "​", + "style": "IPY_MODEL_973bcc9740b4426da4c680d11f3c1f7e", + "value": "tokenizer_config.json: 100%" + } + }, + "bef2531516164e85bb79b86a791dd00d": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_3cb5d8fdb5fb4b6a99f6733c00df8378", + "max": 2103, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_58f4369c68434d569d5eb1bc36e71775", + "value": 2103 + } + }, + "1cb9fc011950479a8d4832bc52c3399c": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_a05df972876941e3b6faab56cc30a4b8", + "placeholder": "​", + "style": "IPY_MODEL_9c61d90b63dd4fb5a481282d6d6eb8e8", + "value": " 2.10k/2.10k [00:00<00:00, 182kB/s]" + } + }, + "974e8f7f05ef472d85d5ea71425e6c39": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "696090959af8499e9a38777e664b85c1": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "973bcc9740b4426da4c680d11f3c1f7e": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "3cb5d8fdb5fb4b6a99f6733c00df8378": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "58f4369c68434d569d5eb1bc36e71775": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "a05df972876941e3b6faab56cc30a4b8": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "9c61d90b63dd4fb5a481282d6d6eb8e8": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "2b71f87a02a540488a9e07f072f8807a": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_548cd7e9fab54470bc52810f27784760", + "IPY_MODEL_9c5eb078ece84a57aa9c402c9cad3b0b", + "IPY_MODEL_ee00a9f599db4affabb7bf1c4df6ca1a" + ], + "layout": "IPY_MODEL_52bd638607bf4e1aaf224ebdcfa3693d" + } + }, + "548cd7e9fab54470bc52810f27784760": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_771619a5acd343c788b8189167af09d4", + "placeholder": "​", + "style": "IPY_MODEL_09a1b30b5659452f95ebb2e72466c750", + "value": "tokenizer.model: 100%" + } + }, + "9c5eb078ece84a57aa9c402c9cad3b0b": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_145a1f1032a44079a262db381e60d401", + "max": 493443, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_99888ad83b51485f959f977ba4418119", + "value": 493443 + } + }, + "ee00a9f599db4affabb7bf1c4df6ca1a": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_ec0854c2ea9a4c9280b6876df365db9d", + "placeholder": "​", + "style": "IPY_MODEL_dac5892c85214f69a5d75d5dc4858dfe", + "value": " 493k/493k [00:00<00:00, 7.91MB/s]" + } + }, + "52bd638607bf4e1aaf224ebdcfa3693d": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "771619a5acd343c788b8189167af09d4": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "09a1b30b5659452f95ebb2e72466c750": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "145a1f1032a44079a262db381e60d401": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "99888ad83b51485f959f977ba4418119": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "ec0854c2ea9a4c9280b6876df365db9d": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "dac5892c85214f69a5d75d5dc4858dfe": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "41b669da565e4204b848b754dfa28ac8": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_e806afdada48418c9e353b94a38cd703", + "IPY_MODEL_7898b7322b014e96984c3d09a29a57fb", + "IPY_MODEL_d665270b05d64effba568ded85eee1b4" + ], + "layout": "IPY_MODEL_df087de9ade24058b1cf32e1556f7cb6" + } + }, + "e806afdada48418c9e353b94a38cd703": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_584330ab439b4887b1050a7f14dc5d7c", + "placeholder": "​", + "style": "IPY_MODEL_880b32d3bd1d4af8b5d0b449aab87e8b", + "value": "tokenizer.json: 100%" + } + }, + "7898b7322b014e96984c3d09a29a57fb": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_97d09f016e274cca93927f3bd8329352", + "max": 1795188, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_d87ef5878c0f4211809716674d0d8413", + "value": 1795188 + } + }, + "d665270b05d64effba568ded85eee1b4": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_556109848b1c4ebc99a6cc7c0be519e0", + "placeholder": "​", + "style": "IPY_MODEL_8d6cdfd75e3f4a628c9e785d3c469d98", + "value": " 1.80M/1.80M [00:00<00:00, 24.9MB/s]" + } + }, + "df087de9ade24058b1cf32e1556f7cb6": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "584330ab439b4887b1050a7f14dc5d7c": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "880b32d3bd1d4af8b5d0b449aab87e8b": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "97d09f016e274cca93927f3bd8329352": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "d87ef5878c0f4211809716674d0d8413": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "556109848b1c4ebc99a6cc7c0be519e0": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "8d6cdfd75e3f4a628c9e785d3c469d98": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "fb1ff6f4482143c39be1cca57ec2fc8b": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_83e6421843ad487c91bc75510b90f198", + "IPY_MODEL_9e74a7b74e1a4b119af5b95d572bac3c", + "IPY_MODEL_080c34ad56c84c229b1555b15b354aad" + ], + "layout": "IPY_MODEL_d968bf43e8574d9090326b31c9a7fd93" + } + }, + "83e6421843ad487c91bc75510b90f198": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_e78b05f33ee54c968fd87b77a2470bce", + "placeholder": "​", + "style": "IPY_MODEL_79a201f7ab7e49efa9e3e1504012dec2", + "value": "special_tokens_map.json: 100%" + } + }, + "9e74a7b74e1a4b119af5b95d572bac3c": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_6e5d431074de4955a97d4ea36621ae36", + "max": 414, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_bfc581362fbc4aca85df7b2a943dd5e4", + "value": 414 + } + }, + "080c34ad56c84c229b1555b15b354aad": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_bc9b585bfd2847bb9f22c4720bd19033", + "placeholder": "​", + "style": "IPY_MODEL_8addd2418c3049f3be32465cc9a408d4", + "value": " 414/414 [00:00<00:00, 52.5kB/s]" + } + }, + "d968bf43e8574d9090326b31c9a7fd93": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "e78b05f33ee54c968fd87b77a2470bce": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "79a201f7ab7e49efa9e3e1504012dec2": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "6e5d431074de4955a97d4ea36621ae36": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "bfc581362fbc4aca85df7b2a943dd5e4": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "bc9b585bfd2847bb9f22c4720bd19033": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "8addd2418c3049f3be32465cc9a408d4": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "c7b5bb9ef22f4ebe9969d4d10d63d24c": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_d8c3f3ec329743f6b2f21d21601f092a", + "IPY_MODEL_2fee19152ef34eeaba541d559b9a0bc0", + "IPY_MODEL_2740de6be1ae4e3bacc642c39828883b" + ], + "layout": "IPY_MODEL_4104813265f34db0ab09c9d6c148ba29" + } + }, + "d8c3f3ec329743f6b2f21d21601f092a": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_6d2dbad5a0984f8382abd18910c14343", + "placeholder": "​", + "style": "IPY_MODEL_32285185818f40a6b07c6d6f6175b70c", + "value": "config.json: 100%" + } + }, + "2fee19152ef34eeaba541d559b9a0bc0": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_79da3c26e0fb4405a198c2255df9ec00", + "max": 571, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_c95bea4e04ff49078821a5dd67f0c28a", + "value": 571 + } + }, + "2740de6be1ae4e3bacc642c39828883b": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_3695b9dde85348efb683e31e5d52e210", + "placeholder": "​", + "style": "IPY_MODEL_1d982bed2d4645b8a19295b7812cef49", + "value": " 571/571 [00:00<00:00, 72.5kB/s]" + } + }, + "4104813265f34db0ab09c9d6c148ba29": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "6d2dbad5a0984f8382abd18910c14343": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "32285185818f40a6b07c6d6f6175b70c": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "79da3c26e0fb4405a198c2255df9ec00": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "c95bea4e04ff49078821a5dd67f0c28a": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "3695b9dde85348efb683e31e5d52e210": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "1d982bed2d4645b8a19295b7812cef49": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "32c58f50bb1c44e085ae3663004fcfff": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_c4df70cf509541828d3a06c380fdfe3d", + "IPY_MODEL_abd2737f597f48b0846a74c743307917", + "IPY_MODEL_a2a52b5e3c104e1cbec513a9f8744db2" + ], + "layout": "IPY_MODEL_ba57460b8ee24f4e96f8a603914b7073" + } + }, + "c4df70cf509541828d3a06c380fdfe3d": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_d17cd0e49fa94361894660c0645ec9a8", + "placeholder": "​", + "style": "IPY_MODEL_6cd364a43f6f4ea793b05bf14ee9d687", + "value": "model.safetensors.index.json: 100%" + } + }, + "abd2737f597f48b0846a74c743307917": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_a80f72a5e41047f1898d5b6f00a2c69b", + "max": 25125, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_c6f6fca0f35b44fbb9037337a5bc0431", + "value": 25125 + } + }, + "a2a52b5e3c104e1cbec513a9f8744db2": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_3d07e648a5644742b8112146e952c44a", + "placeholder": "​", + "style": "IPY_MODEL_bff978fcc6f94f55bf605c6d9c23cfd2", + "value": " 25.1k/25.1k [00:00<00:00, 2.73MB/s]" + } + }, + "ba57460b8ee24f4e96f8a603914b7073": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "d17cd0e49fa94361894660c0645ec9a8": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "6cd364a43f6f4ea793b05bf14ee9d687": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "a80f72a5e41047f1898d5b6f00a2c69b": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "c6f6fca0f35b44fbb9037337a5bc0431": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "3d07e648a5644742b8112146e952c44a": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "bff978fcc6f94f55bf605c6d9c23cfd2": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "eca24e648bcf4cc684f15da684e2791d": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_dc82b611b8c145eb8ebc7b80073e9ae1", + "IPY_MODEL_f3e6040a241c4ac7b715bb07a9ec6d6b", + "IPY_MODEL_e310ab9f4338443e82d257ddc21f48bb" + ], + "layout": "IPY_MODEL_9dd0e53a7a2a4d668c5640d938b71c9f" + } + }, + "dc82b611b8c145eb8ebc7b80073e9ae1": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_1fc933b90fa546c884181136373ad005", + "placeholder": "​", + "style": "IPY_MODEL_94f3ee73e2c04092ac5522c6ef038ea1", + "value": "Fetching 2 files: 100%" + } + }, + "f3e6040a241c4ac7b715bb07a9ec6d6b": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_81d8563026e04f5ab00eced0da89a7ef", + "max": 2, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_95f081aaf9e84c2f91c82a4e2f183009", + "value": 2 + } + }, + "e310ab9f4338443e82d257ddc21f48bb": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_965cfc093b5040bbaec177820e45ec95", + "placeholder": "​", + "style": "IPY_MODEL_d328397d81f343e28dd1a6e52c5f0ae7", + "value": " 2/2 [00:46<00:00, 46.46s/it]" + } + }, + "9dd0e53a7a2a4d668c5640d938b71c9f": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "1fc933b90fa546c884181136373ad005": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "94f3ee73e2c04092ac5522c6ef038ea1": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "81d8563026e04f5ab00eced0da89a7ef": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "95f081aaf9e84c2f91c82a4e2f183009": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "965cfc093b5040bbaec177820e45ec95": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "d328397d81f343e28dd1a6e52c5f0ae7": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "f73f9c7f341c4a99b00585343bf4d4bd": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_2a11e010825b42d4a949ad64ae0d1933", + "IPY_MODEL_15b769156f6a4d2988f1c09f3820f7ef", + "IPY_MODEL_a0484e3846c647b892d2de3797496605" + ], + "layout": "IPY_MODEL_cb042f80aaf04bf1963d637d1771741e" + } + }, + "2a11e010825b42d4a949ad64ae0d1933": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_852ba7d4221a475488411f5014362496", + "placeholder": "​", + "style": "IPY_MODEL_38dc7c1e65324e3097d8738532272e32", + "value": "model-00001-of-00002.safetensors: 100%" + } + }, + "15b769156f6a4d2988f1c09f3820f7ef": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_613da14abc24460db3bb337886cb407c", + "max": 9942981696, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_37a495a5836f413ea5f662538d51a939", + "value": 9942981696 + } + }, + "a0484e3846c647b892d2de3797496605": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_9c61322c006f465385df301121462e82", + "placeholder": "​", + "style": "IPY_MODEL_d93d0bb6ebc943a1be6902bd88cef441", + "value": " 9.94G/9.94G [00:46<00:00, 246MB/s]" + } + }, + "cb042f80aaf04bf1963d637d1771741e": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "852ba7d4221a475488411f5014362496": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "38dc7c1e65324e3097d8738532272e32": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "613da14abc24460db3bb337886cb407c": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "37a495a5836f413ea5f662538d51a939": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "9c61322c006f465385df301121462e82": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "d93d0bb6ebc943a1be6902bd88cef441": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "50a2a1bd13db4045a4ae01138470c42b": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_ad7cba643d1742cdb47c433bf50072f9", + "IPY_MODEL_57ef5d067e7343239525a6da237b29eb", + "IPY_MODEL_7567388a58a340d4a0f384f79ee13ddc" + ], + "layout": "IPY_MODEL_52c2896ab41a4d2592484084cb501e5a" + } + }, + "ad7cba643d1742cdb47c433bf50072f9": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_22957622a42345b991371153c29583c4", + "placeholder": "​", + "style": "IPY_MODEL_d34c879607b041739a2cc6273509e330", + "value": "model-00002-of-00002.safetensors: 100%" + } + }, + "57ef5d067e7343239525a6da237b29eb": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_d1e7bdd4faac4765862fc809017c4856", + "max": 4540516344, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_fcb0ad846398455faccf0d797549f589", + "value": 4540516344 + } + }, + "7567388a58a340d4a0f384f79ee13ddc": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_b381226552c9462d858051fcb7240727", + "placeholder": "​", + "style": "IPY_MODEL_94e630795bc247e08e6af434c5924cdd", + "value": " 4.54G/4.54G [00:23<00:00, 248MB/s]" + } + }, + "52c2896ab41a4d2592484084cb501e5a": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "22957622a42345b991371153c29583c4": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "d34c879607b041739a2cc6273509e330": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "d1e7bdd4faac4765862fc809017c4856": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "fcb0ad846398455faccf0d797549f589": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "b381226552c9462d858051fcb7240727": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "94e630795bc247e08e6af434c5924cdd": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "2b496c218e2049ff9156ff5b3bbdb90b": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_62d3b35a3924417894094d3bbf993932", + "IPY_MODEL_41737448e98a48dcbe117351645395de", + "IPY_MODEL_e83735cd79674a3482f0b90d4c9a3e3d" + ], + "layout": "IPY_MODEL_eff6ca539e2947e9b2987977f143de9a" + } + }, + "62d3b35a3924417894094d3bbf993932": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_aa75292545a649eda8cb7bab0ac9bbcd", + "placeholder": "​", + "style": "IPY_MODEL_22c0e2213505435eaeebdfe330b8fbb8", + "value": "Loading checkpoint shards: 100%" + } + }, + "41737448e98a48dcbe117351645395de": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_7de820edeeaf4210af68c721bab3082d", + "max": 2, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_66ef664b717343bdaf8e5c4610b2a678", + "value": 2 + } + }, + "e83735cd79674a3482f0b90d4c9a3e3d": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_f09534cbda8c4e91b2e073c0eca0cb96", + "placeholder": "​", + "style": "IPY_MODEL_7d8b5a2a52aa4957bc5905021898d8f4", + "value": " 2/2 [00:17<00:00,  8.24s/it]" + } + }, + "eff6ca539e2947e9b2987977f143de9a": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "aa75292545a649eda8cb7bab0ac9bbcd": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "22c0e2213505435eaeebdfe330b8fbb8": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "7de820edeeaf4210af68c721bab3082d": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "66ef664b717343bdaf8e5c4610b2a678": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "f09534cbda8c4e91b2e073c0eca0cb96": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "7d8b5a2a52aa4957bc5905021898d8f4": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "1c68a822580a4960acad93be9fd48ce3": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_6df81f91b17f41dc91fc9f367fa0afab", + "IPY_MODEL_17742936c9ac46e588d1ce42235745d0", + "IPY_MODEL_17f0cd6f05184164b48ef906f192505a" + ], + "layout": "IPY_MODEL_936a67f2de2e44728b83600f4fa0569c" + } + }, + "6df81f91b17f41dc91fc9f367fa0afab": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_d5ad82f6b9654a8cb888613caaaaa097", + "placeholder": "​", + "style": "IPY_MODEL_b014979e237344129545ff2c384c1c1c", + "value": "generation_config.json: 100%" + } + }, + "17742936c9ac46e588d1ce42235745d0": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_b99c12d57d4a4eab84aefbef58452c32", + "max": 116, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_5923bbdcf6334393ad832765f129bdec", + "value": 116 + } + }, + "17f0cd6f05184164b48ef906f192505a": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_260ac8c28531450bba1deac4e4669dc4", + "placeholder": "​", + "style": "IPY_MODEL_067959a4ef614c498c28bb83c10e16de", + "value": " 116/116 [00:00<00:00, 15.6kB/s]" + } + }, + "936a67f2de2e44728b83600f4fa0569c": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "d5ad82f6b9654a8cb888613caaaaa097": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "b014979e237344129545ff2c384c1c1c": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "b99c12d57d4a4eab84aefbef58452c32": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "5923bbdcf6334393ad832765f129bdec": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "260ac8c28531450bba1deac4e4669dc4": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "067959a4ef614c498c28bb83c10e16de": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "b5dd409cf6e04764adbb7c2a49b7be86": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_65187b4ebb2041b39778268e8b4d6b0d", + "IPY_MODEL_33317cac10ca4a98bf4433c1eff43435", + "IPY_MODEL_f81f5402902c4c04b10895782287e908" + ], + "layout": "IPY_MODEL_c471914fe0d34ae8967bac2820637d5b" + } + }, + "65187b4ebb2041b39778268e8b4d6b0d": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_7aead6f6cffa40a383f1b8c64943329e", + "placeholder": "​", + "style": "IPY_MODEL_f24fe57d8e164fd68185b4c117e7c097", + "value": "Loading checkpoint shards: 100%" + } + }, + "33317cac10ca4a98bf4433c1eff43435": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_f913ca9ab6d44ab1b788a36bd964ed39", + "max": 2, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_ed34016801264a05bb3697eca2ac22ef", + "value": 2 + } + }, + "f81f5402902c4c04b10895782287e908": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_fe622254072540fda3b0dd6b2cab6e4a", + "placeholder": "​", + "style": "IPY_MODEL_5d95bdea47594e21855a6e564d0760da", + "value": " 2/2 [00:17<00:00,  8.01s/it]" + } + }, + "c471914fe0d34ae8967bac2820637d5b": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "7aead6f6cffa40a383f1b8c64943329e": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "f24fe57d8e164fd68185b4c117e7c097": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "f913ca9ab6d44ab1b788a36bd964ed39": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "ed34016801264a05bb3697eca2ac22ef": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "fe622254072540fda3b0dd6b2cab6e4a": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "5d95bdea47594e21855a6e564d0760da": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "9f9defc39ac5437e9512e5fad810b409": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_1c126dfdc51c438b9b48c8a65e549ae2", + "IPY_MODEL_741d800130ea4830b9266f467fa6a0bf", + "IPY_MODEL_73c0a01f1693471c9c017143e9e9058b" + ], + "layout": "IPY_MODEL_ab8174c1337b43048e05aeca72ca18ef" + } + }, + "1c126dfdc51c438b9b48c8a65e549ae2": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_5e5a992d86434e62a25fc9b7f75f4b16", + "placeholder": "​", + "style": "IPY_MODEL_1507b1310f5045c9b691fdb102cc1686", + "value": "Loading checkpoint shards: 100%" + } + }, + "741d800130ea4830b9266f467fa6a0bf": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_8a8e81f9d3a54ce49b367f8e984b4a06", + "max": 2, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_bab02b1f092b40c8983cd6440f7eaf16", + "value": 2 + } + }, + "73c0a01f1693471c9c017143e9e9058b": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_94f30dc2653a4f178c9c2ef454d24644", + "placeholder": "​", + "style": "IPY_MODEL_a508625ef12d4a639fa9773484507709", + "value": " 2/2 [00:17<00:00,  8.07s/it]" + } + }, + "ab8174c1337b43048e05aeca72ca18ef": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "5e5a992d86434e62a25fc9b7f75f4b16": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "1507b1310f5045c9b691fdb102cc1686": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "8a8e81f9d3a54ce49b367f8e984b4a06": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "bab02b1f092b40c8983cd6440f7eaf16": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "94f30dc2653a4f178c9c2ef454d24644": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "a508625ef12d4a639fa9773484507709": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + } + } + } + }, + "cells": [ + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "collapsed": true, + "id": "Pv8FH9BMgskk", + "outputId": "00cd7f02-2556-4850-b599-1ddec83f7cd9" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m76.4/76.4 kB\u001b[0m \u001b[31m3.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m363.4/363.4 MB\u001b[0m \u001b[31m4.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.8/13.8 MB\u001b[0m \u001b[31m112.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m24.6/24.6 MB\u001b[0m \u001b[31m96.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m883.7/883.7 kB\u001b[0m \u001b[31m55.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m664.8/664.8 MB\u001b[0m \u001b[31m1.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m211.5/211.5 MB\u001b[0m \u001b[31m11.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m56.3/56.3 MB\u001b[0m \u001b[31m44.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m127.9/127.9 MB\u001b[0m \u001b[31m20.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m207.5/207.5 MB\u001b[0m \u001b[31m3.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m21.1/21.1 MB\u001b[0m \u001b[31m109.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m76.1/76.1 MB\u001b[0m \u001b[31m28.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25h\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", + "google-genai 1.12.1 requires httpx<1.0.0,>=0.28.1, but you have httpx 0.27.2 which is incompatible.\u001b[0m\u001b[31m\n", + "\u001b[0mRequirement already satisfied: bitsandbytes in /usr/local/lib/python3.11/dist-packages (0.45.5)\n", + "Requirement already satisfied: torch<3,>=2.0 in /usr/local/lib/python3.11/dist-packages (from bitsandbytes) (2.6.0+cu124)\n", + "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.11/dist-packages (from bitsandbytes) (2.0.2)\n", + "Requirement already satisfied: filelock in /usr/local/lib/python3.11/dist-packages (from torch<3,>=2.0->bitsandbytes) (3.18.0)\n", + "Requirement already satisfied: typing-extensions>=4.10.0 in /usr/local/lib/python3.11/dist-packages (from torch<3,>=2.0->bitsandbytes) (4.13.2)\n", + "Requirement already satisfied: networkx in /usr/local/lib/python3.11/dist-packages (from torch<3,>=2.0->bitsandbytes) (3.4.2)\n", + "Requirement already satisfied: jinja2 in /usr/local/lib/python3.11/dist-packages (from torch<3,>=2.0->bitsandbytes) (3.1.6)\n", + "Requirement already satisfied: fsspec in /usr/local/lib/python3.11/dist-packages (from torch<3,>=2.0->bitsandbytes) (2025.3.2)\n", + "Requirement already satisfied: nvidia-cuda-nvrtc-cu12==12.4.127 in /usr/local/lib/python3.11/dist-packages (from torch<3,>=2.0->bitsandbytes) (12.4.127)\n", + "Requirement already satisfied: nvidia-cuda-runtime-cu12==12.4.127 in /usr/local/lib/python3.11/dist-packages (from torch<3,>=2.0->bitsandbytes) (12.4.127)\n", + "Requirement already satisfied: nvidia-cuda-cupti-cu12==12.4.127 in /usr/local/lib/python3.11/dist-packages (from torch<3,>=2.0->bitsandbytes) (12.4.127)\n", + "Requirement already satisfied: nvidia-cudnn-cu12==9.1.0.70 in /usr/local/lib/python3.11/dist-packages (from torch<3,>=2.0->bitsandbytes) (9.1.0.70)\n", + "Requirement already satisfied: nvidia-cublas-cu12==12.4.5.8 in /usr/local/lib/python3.11/dist-packages (from torch<3,>=2.0->bitsandbytes) (12.4.5.8)\n", + "Requirement already satisfied: nvidia-cufft-cu12==11.2.1.3 in /usr/local/lib/python3.11/dist-packages (from torch<3,>=2.0->bitsandbytes) (11.2.1.3)\n", + "Requirement already satisfied: nvidia-curand-cu12==10.3.5.147 in /usr/local/lib/python3.11/dist-packages (from torch<3,>=2.0->bitsandbytes) (10.3.5.147)\n", + "Requirement already satisfied: nvidia-cusolver-cu12==11.6.1.9 in /usr/local/lib/python3.11/dist-packages (from torch<3,>=2.0->bitsandbytes) (11.6.1.9)\n", + "Requirement already satisfied: nvidia-cusparse-cu12==12.3.1.170 in /usr/local/lib/python3.11/dist-packages (from torch<3,>=2.0->bitsandbytes) (12.3.1.170)\n", + "Requirement already satisfied: nvidia-cusparselt-cu12==0.6.2 in /usr/local/lib/python3.11/dist-packages (from torch<3,>=2.0->bitsandbytes) (0.6.2)\n", + "Requirement already satisfied: nvidia-nccl-cu12==2.21.5 in /usr/local/lib/python3.11/dist-packages (from torch<3,>=2.0->bitsandbytes) (2.21.5)\n", + "Requirement already satisfied: nvidia-nvtx-cu12==12.4.127 in /usr/local/lib/python3.11/dist-packages (from torch<3,>=2.0->bitsandbytes) (12.4.127)\n", + "Requirement already satisfied: nvidia-nvjitlink-cu12==12.4.127 in /usr/local/lib/python3.11/dist-packages (from torch<3,>=2.0->bitsandbytes) (12.4.127)\n", + "Requirement already satisfied: triton==3.2.0 in /usr/local/lib/python3.11/dist-packages (from torch<3,>=2.0->bitsandbytes) (3.2.0)\n", + "Requirement already satisfied: sympy==1.13.1 in /usr/local/lib/python3.11/dist-packages (from torch<3,>=2.0->bitsandbytes) (1.13.1)\n", + "Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.11/dist-packages (from sympy==1.13.1->torch<3,>=2.0->bitsandbytes) (1.3.0)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.11/dist-packages (from jinja2->torch<3,>=2.0->bitsandbytes) (3.0.2)\n", + "Requirement already satisfied: transformers in /usr/local/lib/python3.11/dist-packages (4.51.3)\n", + "Requirement already satisfied: accelerate in /usr/local/lib/python3.11/dist-packages (1.6.0)\n", + "Requirement already satisfied: filelock in /usr/local/lib/python3.11/dist-packages (from transformers) (3.18.0)\n", + "Requirement already satisfied: huggingface-hub<1.0,>=0.30.0 in /usr/local/lib/python3.11/dist-packages (from transformers) (0.30.2)\n", + "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.11/dist-packages (from transformers) (2.0.2)\n", + "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.11/dist-packages (from transformers) (24.2)\n", + "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.11/dist-packages (from transformers) (6.0.2)\n", + "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.11/dist-packages (from transformers) (2024.11.6)\n", + "Requirement already satisfied: requests in /usr/local/lib/python3.11/dist-packages (from transformers) (2.32.3)\n", + "Requirement already satisfied: tokenizers<0.22,>=0.21 in /usr/local/lib/python3.11/dist-packages (from transformers) (0.21.1)\n", + "Requirement already satisfied: safetensors>=0.4.3 in /usr/local/lib/python3.11/dist-packages (from transformers) (0.5.3)\n", + "Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.11/dist-packages (from transformers) (4.67.1)\n", + "Requirement already satisfied: psutil in /usr/local/lib/python3.11/dist-packages (from accelerate) (5.9.5)\n", + "Requirement already satisfied: torch>=2.0.0 in /usr/local/lib/python3.11/dist-packages (from accelerate) (2.6.0+cu124)\n", + "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub<1.0,>=0.30.0->transformers) (2025.3.2)\n", + "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub<1.0,>=0.30.0->transformers) (4.13.2)\n", + "Requirement already satisfied: networkx in /usr/local/lib/python3.11/dist-packages (from torch>=2.0.0->accelerate) (3.4.2)\n", + "Requirement already satisfied: jinja2 in /usr/local/lib/python3.11/dist-packages (from torch>=2.0.0->accelerate) (3.1.6)\n", + "Requirement already satisfied: nvidia-cuda-nvrtc-cu12==12.4.127 in /usr/local/lib/python3.11/dist-packages (from torch>=2.0.0->accelerate) (12.4.127)\n", + "Requirement already satisfied: nvidia-cuda-runtime-cu12==12.4.127 in /usr/local/lib/python3.11/dist-packages (from torch>=2.0.0->accelerate) (12.4.127)\n", + "Requirement already satisfied: nvidia-cuda-cupti-cu12==12.4.127 in /usr/local/lib/python3.11/dist-packages (from torch>=2.0.0->accelerate) (12.4.127)\n", + "Requirement already satisfied: nvidia-cudnn-cu12==9.1.0.70 in /usr/local/lib/python3.11/dist-packages (from torch>=2.0.0->accelerate) (9.1.0.70)\n", + "Requirement already satisfied: nvidia-cublas-cu12==12.4.5.8 in /usr/local/lib/python3.11/dist-packages (from torch>=2.0.0->accelerate) (12.4.5.8)\n", + "Requirement already satisfied: nvidia-cufft-cu12==11.2.1.3 in /usr/local/lib/python3.11/dist-packages (from torch>=2.0.0->accelerate) (11.2.1.3)\n", + "Requirement already satisfied: nvidia-curand-cu12==10.3.5.147 in /usr/local/lib/python3.11/dist-packages (from torch>=2.0.0->accelerate) (10.3.5.147)\n", + "Requirement already satisfied: nvidia-cusolver-cu12==11.6.1.9 in /usr/local/lib/python3.11/dist-packages (from torch>=2.0.0->accelerate) (11.6.1.9)\n", + "Requirement already satisfied: nvidia-cusparse-cu12==12.3.1.170 in /usr/local/lib/python3.11/dist-packages (from torch>=2.0.0->accelerate) (12.3.1.170)\n", + "Requirement already satisfied: nvidia-cusparselt-cu12==0.6.2 in /usr/local/lib/python3.11/dist-packages (from torch>=2.0.0->accelerate) (0.6.2)\n", + "Requirement already satisfied: nvidia-nccl-cu12==2.21.5 in /usr/local/lib/python3.11/dist-packages (from torch>=2.0.0->accelerate) (2.21.5)\n", + "Requirement already satisfied: nvidia-nvtx-cu12==12.4.127 in /usr/local/lib/python3.11/dist-packages (from torch>=2.0.0->accelerate) (12.4.127)\n", + "Requirement already satisfied: nvidia-nvjitlink-cu12==12.4.127 in /usr/local/lib/python3.11/dist-packages (from torch>=2.0.0->accelerate) (12.4.127)\n", + "Requirement already satisfied: triton==3.2.0 in /usr/local/lib/python3.11/dist-packages (from torch>=2.0.0->accelerate) (3.2.0)\n", + "Requirement already satisfied: sympy==1.13.1 in /usr/local/lib/python3.11/dist-packages (from torch>=2.0.0->accelerate) (1.13.1)\n", + "Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.11/dist-packages (from sympy==1.13.1->torch>=2.0.0->accelerate) (1.3.0)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/dist-packages (from requests->transformers) (3.4.1)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.11/dist-packages (from requests->transformers) (3.10)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.11/dist-packages (from requests->transformers) (2.4.0)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.11/dist-packages (from requests->transformers) (2025.4.26)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.11/dist-packages (from jinja2->torch>=2.0.0->accelerate) (3.0.2)\n" + ] + } + ], + "source": [ + "!pip install -q requests torch bitsandbytes transformers sentencepiece accelerate openai httpx==0.27.2\n", + "!pip install -U bitsandbytes\n", + "!pip install -U transformers accelerate" + ] + }, + { + "cell_type": "code", + "source": [ + "# imports\n", + "\n", + "import os\n", + "import requests\n", + "from IPython.display import Markdown, display, update_display\n", + "from openai import OpenAI\n", + "from google.colab import drive\n", + "from huggingface_hub import login\n", + "from google.colab import userdata\n", + "from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer, BitsAndBytesConfig, pipeline, TextGenerationPipeline\n", + "import torch" + ], + "metadata": { + "id": "u0qdj2ynjjRz" + }, + "execution_count": 9, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# Models\n", + "GPT = 'gpt2'\n", + "FALCON = \"tiiuae/falcon-rw-1b\"\n", + "MISTRAL = \"mistralai/Mistral-7B-Instruct-v0.1\"\n", + "Databricks = \"databricks/dolly-v2-3b\"\n" + ], + "metadata": { + "id": "a_sHgTj_jpDE" + }, + "execution_count": 10, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# Sign in to HuggingFace Hub\n", + "\n", + "hf_token = userdata.get('HF_TOKEN')\n", + "login(hf_token, add_to_git_credential=True)" + ], + "metadata": { + "id": "JYjtu3cPj2Th" + }, + "execution_count": 11, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# Flatten the messages into a single plain prompt\n", + "# prompt = \"\"\"\n", + "# Generate {{n}} fake job postings for a {{role}} position.\n", + "\n", + "# Only output a JSON array like:\n", + "# [\n", + "# {{\n", + "# \"title\": \"Software Engineer\",\n", + "# \"description\": \"Develop backend APIs and services.\",\n", + "# \"requirements\": [\"Python\", \"FastAPI\", \"MongoDB\"],\n", + "# \"location\": \"San Francisco\",\n", + "# \"company_name\": \"TechCorp\"\n", + "# }},\n", + "# ...\n", + "# ]\n", + "# Return valid JSON only. No markdown. No explanations.\n", + "# \"\"\"\n", + "\n", + "# prompt = \"\"\"\n", + "# Generate exactly {{n}} fake job postings for a {{role}}.\n", + "\n", + "# Each posting must be a JSON object with:\n", + "# - title\n", + "# - description (5-10 sentences)\n", + "# - requirements (array of 3-5 strings)\n", + "# - location\n", + "# - company_name\n", + "\n", + "# Return a single JSON array with {n} items. No explanations. No markdown.\n", + "# ONLY the JSON array as output.\n", + "# \"\"\"\n", + "\n", + "prompt = \"\"\"\n", + "Generate one fake job posting for a {{role}}.\n", + "\n", + "Return only a single JSON object with:\n", + "- title\n", + "- description (5-10 sentences)\n", + "- requirements (array of 4-6 strings)\n", + "- location\n", + "- company_name\n", + "\n", + "No explanations, no extra text.\n", + "Only the JSON object.\n", + "\"\"\"\n", + "\n", + "\n", + "\n" + ], + "metadata": { + "id": "7IUshG1fkQ7k" + }, + "execution_count": 12, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "!pip install safetensors" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "-9nzEpDd-dkd", + "outputId": "484ed145-951f-4950-f9ba-bf7ed6e30a13" + }, + "execution_count": 13, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Requirement already satisfied: safetensors in /usr/local/lib/python3.11/dist-packages (0.5.3)\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "import os\n", + "os.makedirs(\"/tmp/dolly_offload\", exist_ok=True)" + ], + "metadata": { + "id": "D13qucmC-qGr" + }, + "execution_count": 14, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "bnb_config = BitsAndBytesConfig(\n", + " load_in_4bit=True,\n", + " bnb_4bit_use_double_quant=True,\n", + " bnb_4bit_compute_dtype=torch.bfloat16,\n", + " bnb_4bit_quant_type=\"nf4\"\n", + ")" + ], + "metadata": { + "id": "4qf967BtEqqx" + }, + "execution_count": 15, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "def load_model_and_tokenizer():\n", + " tokenizer = AutoTokenizer.from_pretrained(MISTRAL, trust_remote_code=True)\n", + "\n", + " model = AutoModelForCausalLM.from_pretrained(\n", + " MISTRAL,\n", + " device_map={\"\": \"cuda\"},\n", + " trust_remote_code=True,\n", + " offload_folder=\"/tmp/dolly_offload\",\n", + " quantization_config=bnb_config\n", + " )\n", + "\n", + " return model, tokenizer\n" + ], + "metadata": { + "id": "GjV7joEMjujM" + }, + "execution_count": 16, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# generator = pipeline(\"text-generation\", model=Databricks, device_map=\"auto\", trust_remote_code=True, offload_folder=\"/tmp/dolly_offload\")\n", + "\n", + "def generate_job(role=\"Software Engineer\", model=None, tokenizer=None):\n", + " # prompt = prompt.format(role=role, n=n)\n", + " # outputs = generator(prompt, max_new_tokens=500, do_sample=True, temperature=0.9)\n", + " # return outputs[0]['generated_text']\n", + "\n", + " # Apply chat template formatting\n", + " # inputs = tokenizer.apply_chat_template(messages, return_tensors=\"pt\").to(model.device)\n", + " inputs = tokenizer(prompt.format(role=role), return_tensors=\"pt\")\n", + " inputs = {k: v.to(model.device) for k, v in inputs.items()}\n", + "\n", + "\n", + " # Generate output\n", + " outputs = model.generate(\n", + " **inputs,\n", + " max_new_tokens=600,\n", + " do_sample=True,\n", + " temperature=0.2,\n", + " top_p=0.9,\n", + " pad_token_id=tokenizer.eos_token_id\n", + " )\n", + "\n", + " # Decode and return\n", + " result = tokenizer.decode(outputs[0], skip_special_tokens=True)\n", + " return result\n", + "\n" + ], + "metadata": { + "id": "5w89B0MwkJWo" + }, + "execution_count": 17, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "\n", + "def generate_jobs(role=\"Software Engineer\", n=5):\n", + " model, tokenizer = load_model_and_tokenizer()\n", + " role = \"Software Engineer\"\n", + " fake_jobs = []\n", + " for i in range(n):\n", + " fake_jobs.append(generate_job(role=role, model=model, tokenizer=tokenizer))\n", + " return fake_jobs" + ], + "metadata": { + "id": "ULhKrRe7XZmW" + }, + "execution_count": 18, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "print(generate_jobs(role=\"Software Engineer\", n=10))" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 406, + "referenced_widgets": [ + "1d1fe06ac632475086ed5964ed000360", + "c138f597c98c4944b54d36510ecc8e0b", + "bef2531516164e85bb79b86a791dd00d", + "1cb9fc011950479a8d4832bc52c3399c", + "974e8f7f05ef472d85d5ea71425e6c39", + "696090959af8499e9a38777e664b85c1", + "973bcc9740b4426da4c680d11f3c1f7e", + "3cb5d8fdb5fb4b6a99f6733c00df8378", + "58f4369c68434d569d5eb1bc36e71775", + "a05df972876941e3b6faab56cc30a4b8", + "9c61d90b63dd4fb5a481282d6d6eb8e8", + "2b71f87a02a540488a9e07f072f8807a", + "548cd7e9fab54470bc52810f27784760", + "9c5eb078ece84a57aa9c402c9cad3b0b", + "ee00a9f599db4affabb7bf1c4df6ca1a", + "52bd638607bf4e1aaf224ebdcfa3693d", + "771619a5acd343c788b8189167af09d4", + "09a1b30b5659452f95ebb2e72466c750", + "145a1f1032a44079a262db381e60d401", + "99888ad83b51485f959f977ba4418119", + "ec0854c2ea9a4c9280b6876df365db9d", + "dac5892c85214f69a5d75d5dc4858dfe", + "41b669da565e4204b848b754dfa28ac8", + "e806afdada48418c9e353b94a38cd703", + "7898b7322b014e96984c3d09a29a57fb", + "d665270b05d64effba568ded85eee1b4", + "df087de9ade24058b1cf32e1556f7cb6", + "584330ab439b4887b1050a7f14dc5d7c", + "880b32d3bd1d4af8b5d0b449aab87e8b", + "97d09f016e274cca93927f3bd8329352", + "d87ef5878c0f4211809716674d0d8413", + "556109848b1c4ebc99a6cc7c0be519e0", + "8d6cdfd75e3f4a628c9e785d3c469d98", + "fb1ff6f4482143c39be1cca57ec2fc8b", + "83e6421843ad487c91bc75510b90f198", + "9e74a7b74e1a4b119af5b95d572bac3c", + "080c34ad56c84c229b1555b15b354aad", + "d968bf43e8574d9090326b31c9a7fd93", + "e78b05f33ee54c968fd87b77a2470bce", + "79a201f7ab7e49efa9e3e1504012dec2", + "6e5d431074de4955a97d4ea36621ae36", + "bfc581362fbc4aca85df7b2a943dd5e4", + "bc9b585bfd2847bb9f22c4720bd19033", + "8addd2418c3049f3be32465cc9a408d4", + "c7b5bb9ef22f4ebe9969d4d10d63d24c", + "d8c3f3ec329743f6b2f21d21601f092a", + "2fee19152ef34eeaba541d559b9a0bc0", + "2740de6be1ae4e3bacc642c39828883b", + "4104813265f34db0ab09c9d6c148ba29", + "6d2dbad5a0984f8382abd18910c14343", + "32285185818f40a6b07c6d6f6175b70c", + "79da3c26e0fb4405a198c2255df9ec00", + "c95bea4e04ff49078821a5dd67f0c28a", + "3695b9dde85348efb683e31e5d52e210", + "1d982bed2d4645b8a19295b7812cef49", + "32c58f50bb1c44e085ae3663004fcfff", + "c4df70cf509541828d3a06c380fdfe3d", + "abd2737f597f48b0846a74c743307917", + "a2a52b5e3c104e1cbec513a9f8744db2", + "ba57460b8ee24f4e96f8a603914b7073", + "d17cd0e49fa94361894660c0645ec9a8", + "6cd364a43f6f4ea793b05bf14ee9d687", + "a80f72a5e41047f1898d5b6f00a2c69b", + "c6f6fca0f35b44fbb9037337a5bc0431", + "3d07e648a5644742b8112146e952c44a", + "bff978fcc6f94f55bf605c6d9c23cfd2", + "eca24e648bcf4cc684f15da684e2791d", + "dc82b611b8c145eb8ebc7b80073e9ae1", + "f3e6040a241c4ac7b715bb07a9ec6d6b", + "e310ab9f4338443e82d257ddc21f48bb", + "9dd0e53a7a2a4d668c5640d938b71c9f", + "1fc933b90fa546c884181136373ad005", + "94f3ee73e2c04092ac5522c6ef038ea1", + "81d8563026e04f5ab00eced0da89a7ef", + "95f081aaf9e84c2f91c82a4e2f183009", + "965cfc093b5040bbaec177820e45ec95", + "d328397d81f343e28dd1a6e52c5f0ae7", + "f73f9c7f341c4a99b00585343bf4d4bd", + "2a11e010825b42d4a949ad64ae0d1933", + "15b769156f6a4d2988f1c09f3820f7ef", + "a0484e3846c647b892d2de3797496605", + "cb042f80aaf04bf1963d637d1771741e", + "852ba7d4221a475488411f5014362496", + "38dc7c1e65324e3097d8738532272e32", + "613da14abc24460db3bb337886cb407c", + "37a495a5836f413ea5f662538d51a939", + "9c61322c006f465385df301121462e82", + "d93d0bb6ebc943a1be6902bd88cef441", + "50a2a1bd13db4045a4ae01138470c42b", + "ad7cba643d1742cdb47c433bf50072f9", + "57ef5d067e7343239525a6da237b29eb", + "7567388a58a340d4a0f384f79ee13ddc", + "52c2896ab41a4d2592484084cb501e5a", + "22957622a42345b991371153c29583c4", + "d34c879607b041739a2cc6273509e330", + "d1e7bdd4faac4765862fc809017c4856", + "fcb0ad846398455faccf0d797549f589", + "b381226552c9462d858051fcb7240727", + "94e630795bc247e08e6af434c5924cdd", + "2b496c218e2049ff9156ff5b3bbdb90b", + "62d3b35a3924417894094d3bbf993932", + "41737448e98a48dcbe117351645395de", + "e83735cd79674a3482f0b90d4c9a3e3d", + "eff6ca539e2947e9b2987977f143de9a", + "aa75292545a649eda8cb7bab0ac9bbcd", + "22c0e2213505435eaeebdfe330b8fbb8", + "7de820edeeaf4210af68c721bab3082d", + "66ef664b717343bdaf8e5c4610b2a678", + "f09534cbda8c4e91b2e073c0eca0cb96", + "7d8b5a2a52aa4957bc5905021898d8f4", + "1c68a822580a4960acad93be9fd48ce3", + "6df81f91b17f41dc91fc9f367fa0afab", + "17742936c9ac46e588d1ce42235745d0", + "17f0cd6f05184164b48ef906f192505a", + "936a67f2de2e44728b83600f4fa0569c", + "d5ad82f6b9654a8cb888613caaaaa097", + "b014979e237344129545ff2c384c1c1c", + "b99c12d57d4a4eab84aefbef58452c32", + "5923bbdcf6334393ad832765f129bdec", + "260ac8c28531450bba1deac4e4669dc4", + "067959a4ef614c498c28bb83c10e16de" + ] + }, + "id": "kKsErltXXwy1", + "outputId": "683c2e5e-16d8-4fe3-efdd-664c385c71e7" + }, + "execution_count": 19, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "tokenizer_config.json: 0%| | 0.00/2.10k [00:00=2.0 in /usr/local/lib/python3.11/dist-packages (from bitsandbytes) (2.6.0+cu124)\n", + "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.11/dist-packages (from bitsandbytes) (2.0.2)\n", + "Requirement already satisfied: filelock in /usr/local/lib/python3.11/dist-packages (from torch<3,>=2.0->bitsandbytes) (3.18.0)\n", + "Requirement already satisfied: typing-extensions>=4.10.0 in /usr/local/lib/python3.11/dist-packages (from torch<3,>=2.0->bitsandbytes) (4.13.2)\n", + "Requirement already satisfied: networkx in /usr/local/lib/python3.11/dist-packages (from torch<3,>=2.0->bitsandbytes) (3.4.2)\n", + "Requirement already satisfied: jinja2 in /usr/local/lib/python3.11/dist-packages (from torch<3,>=2.0->bitsandbytes) (3.1.6)\n", + "Requirement already satisfied: fsspec in /usr/local/lib/python3.11/dist-packages (from torch<3,>=2.0->bitsandbytes) (2025.3.2)\n", + "Requirement already satisfied: nvidia-cuda-nvrtc-cu12==12.4.127 in /usr/local/lib/python3.11/dist-packages (from torch<3,>=2.0->bitsandbytes) (12.4.127)\n", + "Requirement already satisfied: nvidia-cuda-runtime-cu12==12.4.127 in /usr/local/lib/python3.11/dist-packages (from torch<3,>=2.0->bitsandbytes) (12.4.127)\n", + "Requirement already satisfied: nvidia-cuda-cupti-cu12==12.4.127 in /usr/local/lib/python3.11/dist-packages (from torch<3,>=2.0->bitsandbytes) (12.4.127)\n", + "Requirement already satisfied: nvidia-cudnn-cu12==9.1.0.70 in /usr/local/lib/python3.11/dist-packages (from torch<3,>=2.0->bitsandbytes) (9.1.0.70)\n", + "Requirement already satisfied: nvidia-cublas-cu12==12.4.5.8 in /usr/local/lib/python3.11/dist-packages (from torch<3,>=2.0->bitsandbytes) (12.4.5.8)\n", + "Requirement already satisfied: nvidia-cufft-cu12==11.2.1.3 in /usr/local/lib/python3.11/dist-packages (from torch<3,>=2.0->bitsandbytes) (11.2.1.3)\n", + "Requirement already satisfied: nvidia-curand-cu12==10.3.5.147 in /usr/local/lib/python3.11/dist-packages (from torch<3,>=2.0->bitsandbytes) (10.3.5.147)\n", + "Requirement already satisfied: nvidia-cusolver-cu12==11.6.1.9 in /usr/local/lib/python3.11/dist-packages (from torch<3,>=2.0->bitsandbytes) (11.6.1.9)\n", + "Requirement already satisfied: nvidia-cusparse-cu12==12.3.1.170 in /usr/local/lib/python3.11/dist-packages (from torch<3,>=2.0->bitsandbytes) (12.3.1.170)\n", + "Requirement already satisfied: nvidia-cusparselt-cu12==0.6.2 in /usr/local/lib/python3.11/dist-packages (from torch<3,>=2.0->bitsandbytes) (0.6.2)\n", + "Requirement already satisfied: nvidia-nccl-cu12==2.21.5 in /usr/local/lib/python3.11/dist-packages (from torch<3,>=2.0->bitsandbytes) (2.21.5)\n", + "Requirement already satisfied: nvidia-nvtx-cu12==12.4.127 in /usr/local/lib/python3.11/dist-packages (from torch<3,>=2.0->bitsandbytes) (12.4.127)\n", + "Requirement already satisfied: nvidia-nvjitlink-cu12==12.4.127 in /usr/local/lib/python3.11/dist-packages (from torch<3,>=2.0->bitsandbytes) (12.4.127)\n", + "Requirement already satisfied: triton==3.2.0 in /usr/local/lib/python3.11/dist-packages (from torch<3,>=2.0->bitsandbytes) (3.2.0)\n", + "Requirement already satisfied: sympy==1.13.1 in /usr/local/lib/python3.11/dist-packages (from torch<3,>=2.0->bitsandbytes) (1.13.1)\n", + "Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.11/dist-packages (from sympy==1.13.1->torch<3,>=2.0->bitsandbytes) (1.3.0)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.11/dist-packages (from jinja2->torch<3,>=2.0->bitsandbytes) (3.0.2)\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "import re\n", + "import json\n", + "import ast\n", + "\n", + "\n", + "\n", + "def extract_json_objects_from_text_block(texts):\n", + " \"\"\"\n", + " Accepts either a single string or a list of strings.\n", + " Extracts all valid JSON objects from messy text blocks.\n", + " \"\"\"\n", + " if isinstance(texts, str):\n", + " texts = [texts] # wrap in list if single string\n", + "\n", + " pattern = r\"\\{[\\s\\S]*?\\}\"\n", + " results = []\n", + "\n", + " for raw_text in texts:\n", + " matches = re.findall(pattern, raw_text)\n", + " for match in matches:\n", + " try:\n", + " obj = json.loads(match)\n", + " results.append(obj)\n", + " except json.JSONDecodeError:\n", + " continue\n", + "\n", + " return results\n", + "\n", + "text = generate_jobs(role=\"Software Engineer\", n=10)\n", + "print(extract_json_objects_from_text_block(text))" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 86, + "referenced_widgets": [ + "b5dd409cf6e04764adbb7c2a49b7be86", + "65187b4ebb2041b39778268e8b4d6b0d", + "33317cac10ca4a98bf4433c1eff43435", + "f81f5402902c4c04b10895782287e908", + "c471914fe0d34ae8967bac2820637d5b", + "7aead6f6cffa40a383f1b8c64943329e", + "f24fe57d8e164fd68185b4c117e7c097", + "f913ca9ab6d44ab1b788a36bd964ed39", + "ed34016801264a05bb3697eca2ac22ef", + "fe622254072540fda3b0dd6b2cab6e4a", + "5d95bdea47594e21855a6e564d0760da" + ] + }, + "id": "1uzTM2G1oqDs", + "outputId": "08e88ab0-ca17-46d3-8f9c-6a595863aeba" + }, + "execution_count": 22, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "Loading checkpoint shards: 0%| | 0/2 [00:00=22.0 (from gradio)\n", + " Downloading aiofiles-24.1.0-py3-none-any.whl.metadata (10 kB)\n", + "Requirement already satisfied: anyio<5.0,>=3.0 in /usr/local/lib/python3.11/dist-packages (from gradio) (4.9.0)\n", + "Collecting fastapi<1.0,>=0.115.2 (from gradio)\n", + " Downloading fastapi-0.115.12-py3-none-any.whl.metadata (27 kB)\n", + "Collecting ffmpy (from gradio)\n", + " Downloading ffmpy-0.5.0-py3-none-any.whl.metadata (3.0 kB)\n", + "Collecting gradio-client==1.10.0 (from gradio)\n", + " Downloading gradio_client-1.10.0-py3-none-any.whl.metadata (7.1 kB)\n", + "Collecting groovy~=0.1 (from gradio)\n", + " Downloading groovy-0.1.2-py3-none-any.whl.metadata (6.1 kB)\n", + "Requirement already satisfied: httpx>=0.24.1 in /usr/local/lib/python3.11/dist-packages (from gradio) (0.27.2)\n", + "Requirement already satisfied: huggingface-hub>=0.28.1 in /usr/local/lib/python3.11/dist-packages (from gradio) (0.30.2)\n", + "Requirement already satisfied: jinja2<4.0 in /usr/local/lib/python3.11/dist-packages (from gradio) (3.1.6)\n", + "Requirement already satisfied: markupsafe<4.0,>=2.0 in /usr/local/lib/python3.11/dist-packages (from gradio) (3.0.2)\n", + "Requirement already satisfied: numpy<3.0,>=1.0 in /usr/local/lib/python3.11/dist-packages (from gradio) (2.0.2)\n", + "Requirement already satisfied: orjson~=3.0 in /usr/local/lib/python3.11/dist-packages (from gradio) (3.10.17)\n", + "Requirement already satisfied: packaging in /usr/local/lib/python3.11/dist-packages (from gradio) (24.2)\n", + "Requirement already satisfied: pandas<3.0,>=1.0 in /usr/local/lib/python3.11/dist-packages (from gradio) (2.2.2)\n", + "Requirement already satisfied: pillow<12.0,>=8.0 in /usr/local/lib/python3.11/dist-packages (from gradio) (11.2.1)\n", + "Requirement already satisfied: pydantic<2.12,>=2.0 in /usr/local/lib/python3.11/dist-packages (from gradio) (2.11.3)\n", + "Collecting pydub (from gradio)\n", + " Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)\n", + "Collecting python-multipart>=0.0.18 (from gradio)\n", + " Downloading python_multipart-0.0.20-py3-none-any.whl.metadata (1.8 kB)\n", + "Requirement already satisfied: pyyaml<7.0,>=5.0 in /usr/local/lib/python3.11/dist-packages (from gradio) (6.0.2)\n", + "Collecting ruff>=0.9.3 (from gradio)\n", + " Downloading ruff-0.11.8-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (25 kB)\n", + "Collecting safehttpx<0.2.0,>=0.1.6 (from gradio)\n", + " Downloading safehttpx-0.1.6-py3-none-any.whl.metadata (4.2 kB)\n", + "Collecting semantic-version~=2.0 (from gradio)\n", + " Downloading semantic_version-2.10.0-py2.py3-none-any.whl.metadata (9.7 kB)\n", + "Collecting starlette<1.0,>=0.40.0 (from gradio)\n", + " Downloading starlette-0.46.2-py3-none-any.whl.metadata (6.2 kB)\n", + "Collecting tomlkit<0.14.0,>=0.12.0 (from gradio)\n", + " Downloading tomlkit-0.13.2-py3-none-any.whl.metadata (2.7 kB)\n", + "Requirement already satisfied: typer<1.0,>=0.12 in /usr/local/lib/python3.11/dist-packages (from gradio) (0.15.3)\n", + "Requirement already satisfied: typing-extensions~=4.0 in /usr/local/lib/python3.11/dist-packages (from gradio) (4.13.2)\n", + "Collecting uvicorn>=0.14.0 (from gradio)\n", + " Downloading uvicorn-0.34.2-py3-none-any.whl.metadata (6.5 kB)\n", + "Requirement already satisfied: fsspec in /usr/local/lib/python3.11/dist-packages (from gradio-client==1.10.0->gradio) (2025.3.2)\n", + "Requirement already satisfied: websockets<16.0,>=10.0 in /usr/local/lib/python3.11/dist-packages (from gradio-client==1.10.0->gradio) (15.0.1)\n", + "Requirement already satisfied: idna>=2.8 in /usr/local/lib/python3.11/dist-packages (from anyio<5.0,>=3.0->gradio) (3.10)\n", + "Requirement already satisfied: sniffio>=1.1 in /usr/local/lib/python3.11/dist-packages (from anyio<5.0,>=3.0->gradio) (1.3.1)\n", + "Requirement already satisfied: certifi in /usr/local/lib/python3.11/dist-packages (from httpx>=0.24.1->gradio) (2025.4.26)\n", + "Requirement already satisfied: httpcore==1.* in /usr/local/lib/python3.11/dist-packages (from httpx>=0.24.1->gradio) (1.0.9)\n", + "Requirement already satisfied: h11>=0.16 in /usr/local/lib/python3.11/dist-packages (from httpcore==1.*->httpx>=0.24.1->gradio) (0.16.0)\n", + "Requirement already satisfied: filelock in /usr/local/lib/python3.11/dist-packages (from huggingface-hub>=0.28.1->gradio) (3.18.0)\n", + "Requirement already satisfied: requests in /usr/local/lib/python3.11/dist-packages (from huggingface-hub>=0.28.1->gradio) (2.32.3)\n", + "Requirement already satisfied: tqdm>=4.42.1 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub>=0.28.1->gradio) (4.67.1)\n", + "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.11/dist-packages (from pandas<3.0,>=1.0->gradio) (2.9.0.post0)\n", + "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.11/dist-packages (from pandas<3.0,>=1.0->gradio) (2025.2)\n", + "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.11/dist-packages (from pandas<3.0,>=1.0->gradio) (2025.2)\n", + "Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.11/dist-packages (from pydantic<2.12,>=2.0->gradio) (0.7.0)\n", + "Requirement already satisfied: pydantic-core==2.33.1 in /usr/local/lib/python3.11/dist-packages (from pydantic<2.12,>=2.0->gradio) (2.33.1)\n", + "Requirement already satisfied: typing-inspection>=0.4.0 in /usr/local/lib/python3.11/dist-packages (from pydantic<2.12,>=2.0->gradio) (0.4.0)\n", + "Requirement already satisfied: click>=8.0.0 in /usr/local/lib/python3.11/dist-packages (from typer<1.0,>=0.12->gradio) (8.1.8)\n", + "Requirement already satisfied: shellingham>=1.3.0 in /usr/local/lib/python3.11/dist-packages (from typer<1.0,>=0.12->gradio) (1.5.4)\n", + "Requirement already satisfied: rich>=10.11.0 in /usr/local/lib/python3.11/dist-packages (from typer<1.0,>=0.12->gradio) (13.9.4)\n", + "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.11/dist-packages (from python-dateutil>=2.8.2->pandas<3.0,>=1.0->gradio) (1.17.0)\n", + "Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.11/dist-packages (from rich>=10.11.0->typer<1.0,>=0.12->gradio) (3.0.0)\n", + "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.11/dist-packages (from rich>=10.11.0->typer<1.0,>=0.12->gradio) (2.19.1)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub>=0.28.1->gradio) (3.4.1)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub>=0.28.1->gradio) (2.4.0)\n", + "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.11/dist-packages (from markdown-it-py>=2.2.0->rich>=10.11.0->typer<1.0,>=0.12->gradio) (0.1.2)\n", + "Downloading gradio-5.29.0-py3-none-any.whl (54.1 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m54.1/54.1 MB\u001b[0m \u001b[31m46.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading gradio_client-1.10.0-py3-none-any.whl (322 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m322.9/322.9 kB\u001b[0m \u001b[31m34.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading aiofiles-24.1.0-py3-none-any.whl (15 kB)\n", + "Downloading fastapi-0.115.12-py3-none-any.whl (95 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m95.2/95.2 kB\u001b[0m \u001b[31m10.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading groovy-0.1.2-py3-none-any.whl (14 kB)\n", + "Downloading python_multipart-0.0.20-py3-none-any.whl (24 kB)\n", + "Downloading ruff-0.11.8-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (11.5 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m11.5/11.5 MB\u001b[0m \u001b[31m131.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading safehttpx-0.1.6-py3-none-any.whl (8.7 kB)\n", + "Downloading semantic_version-2.10.0-py2.py3-none-any.whl (15 kB)\n", + "Downloading starlette-0.46.2-py3-none-any.whl (72 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m72.0/72.0 kB\u001b[0m \u001b[31m7.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading tomlkit-0.13.2-py3-none-any.whl (37 kB)\n", + "Downloading uvicorn-0.34.2-py3-none-any.whl (62 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m62.5/62.5 kB\u001b[0m \u001b[31m6.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading ffmpy-0.5.0-py3-none-any.whl (6.0 kB)\n", + "Downloading pydub-0.25.1-py2.py3-none-any.whl (32 kB)\n", + "Installing collected packages: pydub, uvicorn, tomlkit, semantic-version, ruff, python-multipart, groovy, ffmpy, aiofiles, starlette, safehttpx, gradio-client, fastapi, gradio\n", + "Successfully installed aiofiles-24.1.0 fastapi-0.115.12 ffmpy-0.5.0 gradio-5.29.0 gradio-client-1.10.0 groovy-0.1.2 pydub-0.25.1 python-multipart-0.0.20 ruff-0.11.8 safehttpx-0.1.6 semantic-version-2.10.0 starlette-0.46.2 tomlkit-0.13.2 uvicorn-0.34.2\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "import gradio as gr\n", + "import json\n", + "from transformers import AutoTokenizer, AutoModelForCausalLM\n", + "import torch\n", + "import re\n", + "\n", + "def generate_ui(role, n):\n", + " try:\n", + " raw_jobs = generate_jobs(role, n)\n", + " parsed_jobs = extract_json_objects_from_text_block(raw_jobs)\n", + "\n", + " if not isinstance(parsed_jobs, list) or not all(isinstance(item, dict) for item in parsed_jobs):\n", + " print(\"[ERROR] Parsed result is not a list of dicts\")\n", + " return gr.update(value=[], visible=True), None\n", + "\n", + " filename = f\"{role.replace(' ', '_').lower()}_jobs.json\"\n", + " with open(filename, \"w\") as f:\n", + " json.dump(parsed_jobs, f, indent=2)\n", + "\n", + " print(f\"[INFO] Returning {len(parsed_jobs)} jobs -> {filename}\")\n", + " return parsed_jobs, filename\n", + "\n", + " except Exception as e:\n", + " print(f\"[FATAL ERROR] {e}\")\n", + " return gr.update(value=[], visible=True), None\n", + "\n", + "if __name__ == \"__main__\":\n", + " with gr.Blocks() as demo:\n", + " gr.Markdown(\"# 🧠 Synthetic Job Dataset Generator\")\n", + " gr.Markdown(\"Generate a structured dataset of job postings for a specific role.\")\n", + "\n", + " with gr.Row():\n", + " role_input = gr.Textbox(label=\"Job Role\", placeholder=\"e.g. Software Engineer\", value=\"Software Engineer\")\n", + " n_input = gr.Number(label=\"Number of Samples\", value=5, precision=0)\n", + "\n", + " generate_button = gr.Button(\"🚀 Generate\")\n", + " output_table = gr.JSON(label=\"Generated Dataset\")\n", + " download_button = gr.File(label=\"Download JSON\")\n", + "\n", + " generate_button.click(\n", + " generate_ui,\n", + " inputs=[role_input, n_input],\n", + " outputs=[output_table, download_button]\n", + " )\n", + "\n", + " demo.launch(debug=True)\n" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 730, + "referenced_widgets": [ + "9f9defc39ac5437e9512e5fad810b409", + "1c126dfdc51c438b9b48c8a65e549ae2", + "741d800130ea4830b9266f467fa6a0bf", + "73c0a01f1693471c9c017143e9e9058b", + "ab8174c1337b43048e05aeca72ca18ef", + "5e5a992d86434e62a25fc9b7f75f4b16", + "1507b1310f5045c9b691fdb102cc1686", + "8a8e81f9d3a54ce49b367f8e984b4a06", + "bab02b1f092b40c8983cd6440f7eaf16", + "94f30dc2653a4f178c9c2ef454d24644", + "a508625ef12d4a639fa9773484507709" + ] + }, + "id": "FEByigZTo5cv", + "outputId": "e452754b-e155-4b57-eced-7af37996f1f0" + }, + "execution_count": 25, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "It looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).\n", + "\n", + "Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().\n", + "* Running on public URL: https://bf27145eb99f8caadd.gradio.live\n", + "\n", + "This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "" + ], + "text/html": [ + "
" + ] + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "Loading checkpoint shards: 0%| | 0/2 [00:00 software_engineer_jobs.json\n", + "Keyboard interruption in main thread... closing server.\n", + "Killing tunnel 127.0.0.1:7860 <> https://bf27145eb99f8caadd.gradio.live\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "import os\n", + "\n", + "# Get list of all .ipynb files in /content\n", + "notebooks = [f for f in os.listdir(\"/content\") if f.endswith(\".ipynb\")]\n", + "print(notebooks)\n" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "ZfPQJw0Z5UD9", + "outputId": "0e4ba82b-e23b-4faa-8b29-eaf87fdf9500" + }, + "execution_count": 27, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "[]\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "id": "Y88jqI_u5WEL" + }, + "execution_count": null, + "outputs": [] + } + ] +} \ No newline at end of file diff --git a/week4/community-contributions/Week4-Comments-Generator-DP.ipynb b/week4/community-contributions/Week4-Comments-Generator-DP.ipynb new file mode 100644 index 0000000..6b3b698 --- /dev/null +++ b/week4/community-contributions/Week4-Comments-Generator-DP.ipynb @@ -0,0 +1,400 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "3e473bbd-a0c2-43bd-bf99-c749784d00c3", + "metadata": {}, + "outputs": [], + "source": [ + "import gradio as gr\n", + "import openai\n", + "import anthropic\n", + "import google.generativeai as genai\n", + "import requests\n", + "import json\n", + "import os\n", + "from typing import Dict, Any, Optional\n", + "import asyncio\n", + "from dotenv import load_dotenv" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "16210512-41f1-4de3-8348-2cd7129e023f", + "metadata": {}, + "outputs": [], + "source": [ + "# load API\n", + "load_dotenv(override=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6747e275-91eb-4d2b-90b6-805f2bd9b6b7", + "metadata": {}, + "outputs": [], + "source": [ + "class CodeCommenter:\n", + " def __init__(self):\n", + " # Initialize API clients\n", + " self.openai_client = None\n", + " self.anthropic_client = None\n", + " self.gemini_client = None\n", + " \n", + " # Load API keys from environment variables\n", + " self.setup_clients()\n", + " \n", + " def setup_clients(self):\n", + " \"\"\"Initialize API clients with keys from environment variables\"\"\"\n", + " try:\n", + " # OpenAI\n", + " openai_key = os.getenv('OPENAI_API_KEY')\n", + " if openai_key:\n", + " self.openai_client = openai.OpenAI(api_key=openai_key)\n", + " \n", + " # Anthropic\n", + " anthropic_key = os.getenv('ANTHROPIC_API_KEY')\n", + " if anthropic_key:\n", + " self.anthropic_client = anthropic.Anthropic(api_key=anthropic_key)\n", + " \n", + " # Google Gemini\n", + " gemini_key = os.getenv('GOOGLE_API_KEY')\n", + " if gemini_key:\n", + " genai.configure(api_key=gemini_key)\n", + " self.gemini_client = genai.GenerativeModel('gemini-2.0-flash-exp')\n", + " \n", + " except Exception as e:\n", + " print(f\"Warning: Error setting up API clients: {e}\")\n", + " \n", + " def create_prompt(self, code: str, language: str) -> str:\n", + " \"\"\"Create a prompt for the LLM to add comments and docstrings\"\"\"\n", + " return f\"\"\"Please add detailed and helpful comments and docstrings to the following {language} code. \n", + " \n", + "Guidelines:\n", + "1. Add comprehensive docstrings for functions, classes, and modules\n", + "2. Add inline comments explaining complex logic\n", + "3. Follow the commenting conventions for {language}\n", + "4. Maintain the original code structure and functionality\n", + "5. Make comments clear and professional\n", + "6. Don't change the actual code logic, only add comments\n", + "7. Do not add code markdown delimiters like ```python\n", + "\n", + "Here's the code to comment:\n", + "\n", + "{code}\n", + "\n", + "Please return only the commented code without any additional explanation or markdown formatting.\"\"\"\n", + "\n", + " def call_openai(self, prompt: str, model: str = \"gpt-4o-mini\") -> str:\n", + " \"\"\"Make API call to OpenAI\"\"\"\n", + " if not self.openai_client:\n", + " return \"Error: OpenAI API key not configured. Please set OPENAI_API_KEY environment variable.\"\n", + " \n", + " try:\n", + " response = self.openai_client.chat.completions.create(\n", + " model=model,\n", + " messages=[\n", + " {\"role\": \"system\", \"content\": \"You are a helpful coding assistant that adds detailed comments and docstrings to code.\"},\n", + " {\"role\": \"user\", \"content\": prompt}\n", + " ],\n", + " max_tokens=4000,\n", + " temperature=0.1\n", + " )\n", + " return response.choices[0].message.content.strip()\n", + " except Exception as e:\n", + " return f\"Error calling OpenAI API: {str(e)}\"\n", + " \n", + " def call_anthropic(self, prompt: str, model: str = \"claude-3-5-haiku-20241022\") -> str:\n", + " \"\"\"Make API call to Anthropic Claude\"\"\"\n", + " if not self.anthropic_client:\n", + " return \"Error: Anthropic API key not configured. Please set ANTHROPIC_API_KEY environment variable.\"\n", + " \n", + " try:\n", + " response = self.anthropic_client.messages.create(\n", + " model=model,\n", + " max_tokens=4000,\n", + " temperature=0.1,\n", + " messages=[\n", + " {\"role\": \"user\", \"content\": prompt}\n", + " ]\n", + " )\n", + " return response.content[0].text.strip()\n", + " except Exception as e:\n", + " return f\"Error calling Anthropic API: {str(e)}\"\n", + " \n", + " def call_gemini(self, prompt: str) -> str:\n", + " \"\"\"Make API call to Google Gemini\"\"\"\n", + " if not self.gemini_client:\n", + " return \"Error: Google API key not configured. Please set GOOGLE_API_KEY environment variable.\"\n", + " \n", + " try:\n", + " response = self.gemini_client.generate_content(\n", + " prompt,\n", + " generation_config=genai.types.GenerationConfig(\n", + " max_output_tokens=4000,\n", + " temperature=0.1,\n", + " )\n", + " )\n", + " return response.text.strip()\n", + " except Exception as e:\n", + " return f\"Error calling Gemini API: {str(e)}\"\n", + " \n", + " def call_ollama(self, prompt: str, model: str = \"llama3.2:latest\") -> str:\n", + " \"\"\"Make API call to Ollama (local)\"\"\"\n", + " try:\n", + " url = \"http://localhost:11434/api/generate\"\n", + " data = {\n", + " \"model\": model,\n", + " \"prompt\": prompt,\n", + " \"stream\": False,\n", + " \"options\": {\n", + " \"temperature\": 0.1,\n", + " \"num_predict\": 4000\n", + " }\n", + " }\n", + " \n", + " response = requests.post(url, json=data, timeout=60)\n", + " if response.status_code == 200:\n", + " result = response.json()\n", + " return result.get('response', '').strip()\n", + " else:\n", + " return f\"Error calling Ollama API: HTTP {response.status_code}\"\n", + " except requests.exceptions.ConnectionError:\n", + " return \"Error: Could not connect to Ollama. Make sure Ollama is running locally on port 11434.\"\n", + " except Exception as e:\n", + " return f\"Error calling Ollama API: {str(e)}\"\n", + "\n", + " def generate_comments(self, language: str, code: str, llm: str) -> str:\n", + " \"\"\"Generate comments for the given code using the specified LLM\"\"\"\n", + " if not code.strip():\n", + " return \"Error: Please provide code to comment.\"\n", + " \n", + " prompt = self.create_prompt(code, language)\n", + " \n", + " # Route to appropriate LLM\n", + " if llm == \"gpt-4o-mini\":\n", + " return self.call_openai(prompt, \"gpt-4o-mini\")\n", + " elif llm == \"claude-3-5-haiku-20241022\":\n", + " return self.call_anthropic(prompt, \"claude-3-5-haiku-20241022\")\n", + " elif llm == \"gemini-2.0-flash\":\n", + " return self.call_gemini(prompt)\n", + " elif llm == \"ollama:llama3.2:latest\":\n", + " return self.call_ollama(prompt, \"llama3.2:latest\")\n", + " else:\n", + " return f\"Error: Unsupported LLM: {llm}\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "813f0911-d53f-4887-9341-656712e32d8f", + "metadata": {}, + "outputs": [], + "source": [ + "def create_gradio_interface():\n", + " \"\"\"Create and configure the Gradio interface\"\"\"\n", + " commenter = CodeCommenter()\n", + " \n", + " # Define the main function for the interface\n", + " def process_code(language, code, llm):\n", + " \"\"\"Process the code and return commented version\"\"\"\n", + " if not code.strip():\n", + " return \"Please enter some code to comment.\"\n", + " \n", + " # Show processing message\n", + " processing_msg = f\"Processing {language} code with {llm}...\"\n", + " print(processing_msg)\n", + " \n", + " # Generate comments\n", + " result = commenter.generate_comments(language, code, llm)\n", + " return result\n", + " \n", + " # Define default code\n", + " default_code = \"\"\"import pyodbc\n", + "from tabulate import tabulate\n", + "def connect_to_sql_server(server_name, database, username=None, password=None):\n", + " try:\n", + " if username and password:\n", + " connection_string = f\"DRIVER={{ODBC Driver 17 for SQL Server}};SERVER={server_name};DATABASE={database};UID={username};PWD={password}\"\n", + " else:\n", + " connection_string = f\"DRIVER={{ODBC Driver 17 for SQL Server}};SERVER={server_name};DATABASE={database};Trusted_Connection=yes\"\n", + " connection = pyodbc.connect(connection_string)\n", + " print(f\"Successfully connected to {server_name}/{database}\")\n", + " return connection\n", + " except Exception as e:\n", + " print(f\"Failed to connect to {server_name}/{database}: {str(e)}\")\n", + " return None\n", + "def get_record_count(connection, table_name):\n", + " try:\n", + " cursor = connection.cursor()\n", + " query = f\"SELECT COUNT(*) FROM {table_name}\"\n", + " cursor.execute(query)\n", + " count = cursor.fetchone()[0]\n", + " cursor.close()\n", + " print(f\"Record count for {table_name}: {count}\")\n", + " return count\n", + " except Exception as e:\n", + " print(f\"Failed to get record count for {table_name}: {str(e)}\")\n", + " return None\n", + "def select_top_records(connection, table_name, n):\n", + " try:\n", + " cursor = connection.cursor()\n", + " query = f\"SELECT TOP {n} * FROM {table_name}\"\n", + " cursor.execute(query)\n", + " records = cursor.fetchall()\n", + " columns = [column[0] for column in cursor.description]\n", + " cursor.close()\n", + " print(f\"Top {n} records from {table_name}\")\n", + " if records:\n", + " print(tabulate(records, headers=columns, tablefmt=\"grid\"))\n", + " return records\n", + " except Exception as e:\n", + " print(f\"Failed to retrieve top {n} records from {table_name}: {str(e)}\")\n", + " return None\n", + "conn = connect_to_sql_server(\"localhost\", \"AdventureWorks_lite\")\n", + "if conn:\n", + " total_records = get_record_count(conn, \"Sales.SalesOrderDetail\")\n", + " top_records = select_top_records(conn, \"Production.Product\", 10)\n", + " conn.close()\n", + " print(\"Connection closed successfully\")\"\"\"\n", + "\n", + " css = \"\"\"\n", + "textarea[rows]:not([rows=\"1\"]) {\n", + " overflow-y: auto !important;\n", + " scrollbar-width: thin !important;\n", + "}\n", + "textarea[rows]:not([rows=\"1\"])::-webkit-scrollbar {\n", + " all: initial !important;\n", + " background: #f1f1f1 !important;\n", + "}\n", + "textarea[rows]:not([rows=\"1\"])::-webkit-scrollbar-thumb {\n", + " all: initial !important;\n", + " background: #a8a8a8 !important;\n", + "}\n", + "\"\"\"\n", + "\n", + " # Create the interface\n", + " with gr.Blocks(title=\"Code Commenter\", theme=gr.themes.Base(), css=css) as interface:\n", + " gr.Markdown(\"# 🔧 Code Commenter\")\n", + " gr.Markdown(\"Add detailed comments and docstrings to your code using various LLM models.\")\n", + " \n", + " with gr.Row():\n", + " with gr.Column():\n", + " code_input = gr.Textbox(\n", + " label=\"Input Code\",\n", + " value=default_code,\n", + " lines=15,\n", + " max_lines=20,\n", + " info=\"Enter the code you want to add comments to\"\n", + " )\n", + " \n", + " with gr.Column():\n", + " code_output = gr.Textbox(\n", + " label=\"Commented Code\",\n", + " lines=20,\n", + " max_lines=20,\n", + " info=\"Your code with added comments and docstrings\"\n", + " )\n", + " \n", + " with gr.Row():\n", + " with gr.Column(scale=1):\n", + " language_dropdown = gr.Dropdown(\n", + " choices=[\"Python\", \"Ruby\", \"Rust\", \"C++\", \"Java\"],\n", + " value=\"Python\",\n", + " label=\"Programming Language\",\n", + " info=\"Select the programming language of your code\"\n", + " )\n", + " \n", + " llm_dropdown = gr.Dropdown(\n", + " choices=[\n", + " \"gpt-4o-mini\",\n", + " \"claude-3-5-haiku-20241022\", \n", + " \"gemini-2.0-flash\",\n", + " \"ollama:llama3.2:latest\"\n", + " ],\n", + " value=\"gpt-4o-mini\",\n", + " label=\"LLM Model\",\n", + " info=\"Choose the language model to use\"\n", + " )\n", + " \n", + " generate_btn = gr.Button(\n", + " \"🚀 Generate Comments\", \n", + " variant=\"primary\",\n", + " size=\"lg\"\n", + " )\n", + " \n", + " # Add some API setup information\n", + " gr.Markdown(\"## 📝 API Setup Instructions\")\n", + " gr.Markdown(\"\"\"\n", + " To use this tool, you need to set up API keys as environment variables:\n", + " \n", + " - **OpenAI**: Set `OPENAI_API_KEY`\n", + " - **Anthropic**: Set `ANTHROPIC_API_KEY` \n", + " - **Google Gemini**: Set `GOOGLE_API_KEY`\n", + " - **Ollama**: Make sure Ollama is running locally on port 11434\n", + " \"\"\")\n", + " \n", + " # Connect the button to the processing function\n", + " generate_btn.click(\n", + " fn=process_code,\n", + " inputs=[language_dropdown, code_input, llm_dropdown],\n", + " outputs=code_output,\n", + " show_progress=True\n", + " )\n", + " \n", + " return interface" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ef461e08-c1d5-406d-b7d2-a4329f16486e", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"🚀 Starting Code Commenter...\")\n", + "print(\"📋 Setting up Gradio interface...\")\n", + "\n", + "# Create and launch the interface\n", + "interface = create_gradio_interface()\n", + "\n", + "print(\"🌐 Launching interface...\")\n", + "print(\"💡 The interface will open in your default browser\")\n", + "print(\"🔧 Make sure to set up your API keys as environment variables\")\n", + "\n", + "# Launch with auto-opening in browser\n", + "interface.launch(\n", + " server_name=\"127.0.0.1\",\n", + " server_port=7860,\n", + " share=False,\n", + " inbrowser=True,\n", + " show_error=True\n", + ")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/week4/community-contributions/Week4_generate_comments_and_tests-DP.ipynb b/week4/community-contributions/Week4_generate_comments_and_tests-DP.ipynb new file mode 100644 index 0000000..09efe1d --- /dev/null +++ b/week4/community-contributions/Week4_generate_comments_and_tests-DP.ipynb @@ -0,0 +1,538 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "3e473bbd-a0c2-43bd-bf99-c749784d00c3", + "metadata": {}, + "outputs": [], + "source": [ + "import gradio as gr\n", + "import openai\n", + "import anthropic\n", + "import google.generativeai as genai\n", + "import requests\n", + "import json\n", + "import os\n", + "from typing import Dict, Any, Optional\n", + "import asyncio\n", + "from dotenv import load_dotenv" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "16210512-41f1-4de3-8348-2cd7129e023f", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# load API\n", + "load_dotenv(override=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "6747e275-91eb-4d2b-90b6-805f2bd9b6b7", + "metadata": {}, + "outputs": [], + "source": [ + "class CodeCommenter:\n", + " def __init__(self):\n", + " # Initialize API clients\n", + " self.openai_client = None\n", + " self.anthropic_client = None\n", + " self.gemini_client = None\n", + " \n", + " # Load API keys from environment variables\n", + " self.setup_clients()\n", + " \n", + " def setup_clients(self):\n", + " \"\"\"Initialize API clients with keys from environment variables\"\"\"\n", + " try:\n", + " # OpenAI\n", + " openai_key = os.getenv('OPENAI_API_KEY')\n", + " if openai_key:\n", + " self.openai_client = openai.OpenAI(api_key=openai_key)\n", + " \n", + " # Anthropic\n", + " anthropic_key = os.getenv('ANTHROPIC_API_KEY')\n", + " if anthropic_key:\n", + " self.anthropic_client = anthropic.Anthropic(api_key=anthropic_key)\n", + " \n", + " # Google Gemini\n", + " gemini_key = os.getenv('GOOGLE_API_KEY')\n", + " if gemini_key:\n", + " genai.configure(api_key=gemini_key)\n", + " self.gemini_client = genai.GenerativeModel('gemini-2.0-flash-exp')\n", + " \n", + " except Exception as e:\n", + " print(f\"Warning: Error setting up API clients: {e}\")\n", + " \n", + " def create_comments_prompt(self, code: str, language: str) -> str:\n", + " \"\"\"Create a prompt for the LLM to add comments and docstrings\"\"\"\n", + " return f\"\"\"Please add detailed and helpful comments and docstrings to the following {language} code. \n", + " \n", + "Guidelines:\n", + "1. Add comprehensive docstrings for functions, classes, and modules\n", + "2. Add inline comments explaining complex logic\n", + "3. Follow the commenting conventions for {language}\n", + "4. Maintain the original code structure and functionality\n", + "5. Make comments clear and professional\n", + "6. Don't change the actual code logic, only add comments\n", + "7. Do not add code markdown delimiters like ```python\n", + "\n", + "Here's the code to comment:\n", + "\n", + "{code}\n", + "\n", + "Please return only the commented code without any additional explanation or markdown formatting.\"\"\"\n", + "\n", + " def create_tests_prompt(self, code: str, language: str) -> str:\n", + " \"\"\"Create a prompt for the LLM to generate unit tests\"\"\"\n", + " return f\"\"\"Please generate comprehensive unit tests for the following {language} code.\n", + " \n", + "Guidelines:\n", + "1. Use appropriate testing framework for {language} (pytest for Python, JUnit for Java, etc.)\n", + "2. Create tests for all functions and methods\n", + "3. Include both positive and negative test cases\n", + "4. Test edge cases and error conditions\n", + "5. Use meaningful test names that describe what is being tested\n", + "6. Include setup and teardown methods if needed\n", + "7. Add mock objects for external dependencies (like database connections)\n", + "8. Do not add code markdown delimiters like ```python\n", + "9. Follow testing best practices for {language}\n", + "\n", + "Here's the code to test:\n", + "\n", + "{code}\n", + "\n", + "Please return only the unit test code without any additional explanation or markdown formatting.\"\"\"\n", + "\n", + " def create_combined_prompt(self, code: str, language: str) -> str:\n", + " \"\"\"Create a prompt for the LLM to add both comments and unit tests\"\"\"\n", + " return f\"\"\"Please add detailed comments and docstrings to the following {language} code AND generate comprehensive unit tests for it.\n", + " \n", + "For Comments:\n", + "1. Add comprehensive docstrings for functions, classes, and modules\n", + "2. Add inline comments explaining complex logic\n", + "3. Follow the commenting conventions for {language}\n", + "4. Don't change the actual code logic, only add comments\n", + "\n", + "For Unit Tests:\n", + "1. Use appropriate testing framework for {language} (pytest for Python, JUnit for Java, etc.)\n", + "2. Create tests for all functions and methods\n", + "3. Include both positive and negative test cases\n", + "4. Test edge cases and error conditions\n", + "5. Add mock objects for external dependencies (like database connections)\n", + "6. Follow testing best practices for {language}\n", + "\n", + "Structure your response as:\n", + "1. First, provide the original code with added comments and docstrings \n", + "2. Then, provide the unit tests as a separate section\n", + "3. Do not add code markdown delimiters like ```python\n", + "4. The 2 separated portions of code, comments and unit test should be clearly demarcated by comments specifying the following section purpose\n", + "\n", + "Here's the code:\n", + "\n", + "{code}\n", + "\n", + "Please return the commented code followed by the unit tests, clearly separated.\"\"\"\n", + "\n", + " def call_openai(self, prompt: str, model: str = \"gpt-4o-mini\") -> str:\n", + " \"\"\"Make API call to OpenAI\"\"\"\n", + " if not self.openai_client:\n", + " return \"Error: OpenAI API key not configured. Please set OPENAI_API_KEY environment variable.\"\n", + " \n", + " try:\n", + " response = self.openai_client.chat.completions.create(\n", + " model=model,\n", + " messages=[\n", + " {\"role\": \"system\", \"content\": \"You are a helpful coding assistant that adds detailed comments, docstrings, and generates unit tests for code.\"},\n", + " {\"role\": \"user\", \"content\": prompt}\n", + " ],\n", + " max_tokens=4000,\n", + " temperature=0.1\n", + " )\n", + " return response.choices[0].message.content.strip()\n", + " except Exception as e:\n", + " return f\"Error calling OpenAI API: {str(e)}\"\n", + " \n", + " def call_anthropic(self, prompt: str, model: str = \"claude-3-5-haiku-20241022\") -> str:\n", + " \"\"\"Make API call to Anthropic Claude\"\"\"\n", + " if not self.anthropic_client:\n", + " return \"Error: Anthropic API key not configured. Please set ANTHROPIC_API_KEY environment variable.\"\n", + " \n", + " try:\n", + " response = self.anthropic_client.messages.create(\n", + " model=model,\n", + " max_tokens=4000,\n", + " temperature=0.1,\n", + " messages=[\n", + " {\"role\": \"user\", \"content\": prompt}\n", + " ]\n", + " )\n", + " return response.content[0].text.strip()\n", + " except Exception as e:\n", + " return f\"Error calling Anthropic API: {str(e)}\"\n", + " \n", + " def call_gemini(self, prompt: str) -> str:\n", + " \"\"\"Make API call to Google Gemini\"\"\"\n", + " if not self.gemini_client:\n", + " return \"Error: Google API key not configured. Please set GOOGLE_API_KEY environment variable.\"\n", + " \n", + " try:\n", + " response = self.gemini_client.generate_content(\n", + " prompt,\n", + " generation_config=genai.types.GenerationConfig(\n", + " max_output_tokens=4000,\n", + " temperature=0.1,\n", + " )\n", + " )\n", + " return response.text.strip()\n", + " except Exception as e:\n", + " return f\"Error calling Gemini API: {str(e)}\"\n", + " \n", + " def call_ollama(self, prompt: str, model: str = \"llama3.2:latest\") -> str:\n", + " \"\"\"Make API call to Ollama (local)\"\"\"\n", + " try:\n", + " url = \"http://localhost:11434/api/generate\"\n", + " data = {\n", + " \"model\": model,\n", + " \"prompt\": prompt,\n", + " \"stream\": False,\n", + " \"options\": {\n", + " \"temperature\": 0.1,\n", + " \"num_predict\": 4000\n", + " }\n", + " }\n", + " \n", + " response = requests.post(url, json=data, timeout=60)\n", + " if response.status_code == 200:\n", + " result = response.json()\n", + " return result.get('response', '').strip()\n", + " else:\n", + " return f\"Error calling Ollama API: HTTP {response.status_code}\"\n", + " except requests.exceptions.ConnectionError:\n", + " return \"Error: Could not connect to Ollama. Make sure Ollama is running locally on port 11434.\"\n", + " except Exception as e:\n", + " return f\"Error calling Ollama API: {str(e)}\"\n", + "\n", + " def process_code(self, language: str, code: str, llm: str, generate_comments: bool, generate_tests: bool) -> str:\n", + " \"\"\"Process the given code based on selected options\"\"\"\n", + " if not code.strip():\n", + " return \"Error: Please provide code to process.\"\n", + " \n", + " if not generate_comments and not generate_tests:\n", + " return \"Error: Please select at least one option (Generate comments or Generate test units).\"\n", + " \n", + " # Determine which prompt to use\n", + " if generate_comments and generate_tests:\n", + " prompt = self.create_combined_prompt(code, language)\n", + " elif generate_comments:\n", + " prompt = self.create_comments_prompt(code, language)\n", + " else: # generate_tests only\n", + " prompt = self.create_tests_prompt(code, language)\n", + " \n", + " # Route to appropriate LLM\n", + " if llm == \"gpt-4o-mini\":\n", + " return self.call_openai(prompt, \"gpt-4o-mini\")\n", + " elif llm == \"claude-3-5-haiku-20241022\":\n", + " return self.call_anthropic(prompt, \"claude-3-5-haiku-20241022\")\n", + " elif llm == \"gemini-2.0-flash\":\n", + " return self.call_gemini(prompt)\n", + " elif llm == \"ollama:llama3.2:latest\":\n", + " return self.call_ollama(prompt, \"llama3.2:latest\")\n", + " else:\n", + " return f\"Error: Unsupported LLM: {llm}\"" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "813f0911-d53f-4887-9341-656712e32d8f", + "metadata": {}, + "outputs": [], + "source": [ + "def create_gradio_interface():\n", + " \"\"\"Create and configure the Gradio interface\"\"\"\n", + " commenter = CodeCommenter()\n", + " \n", + " # Define the main function for the interface\n", + " def process_code_interface(language, code, llm, generate_comments, generate_tests):\n", + " \"\"\"Process the code and return processed version based on selected options\"\"\"\n", + " if not code.strip():\n", + " return \"Please enter some code to process.\"\n", + " \n", + " if not generate_comments and not generate_tests:\n", + " return \"Please select at least one option: Generate comments or Generate test units.\"\n", + " \n", + " # Show processing message\n", + " options = []\n", + " if generate_comments:\n", + " options.append(\"comments\")\n", + " if generate_tests:\n", + " options.append(\"unit tests\")\n", + " \n", + " processing_msg = f\"Processing {language} code with {llm} to generate {' and '.join(options)}...\"\n", + " print(processing_msg)\n", + " \n", + " # Process the code\n", + " result = commenter.process_code(language, code, llm, generate_comments, generate_tests)\n", + " return result\n", + " \n", + " # Define default code\n", + " default_code = \"\"\"import pyodbc\n", + "from tabulate import tabulate\n", + "def connect_to_sql_server(server_name, database, username=None, password=None):\n", + " try:\n", + " if username and password:\n", + " connection_string = f\"DRIVER={{ODBC Driver 17 for SQL Server}};SERVER={server_name};DATABASE={database};UID={username};PWD={password}\"\n", + " else:\n", + " connection_string = f\"DRIVER={{ODBC Driver 17 for SQL Server}};SERVER={server_name};DATABASE={database};Trusted_Connection=yes\"\n", + " connection = pyodbc.connect(connection_string)\n", + " print(f\"Successfully connected to {server_name}/{database}\")\n", + " return connection\n", + " except Exception as e:\n", + " print(f\"Failed to connect to {server_name}/{database}: {str(e)}\")\n", + " return None\n", + "def get_record_count(connection, table_name):\n", + " try:\n", + " cursor = connection.cursor()\n", + " query = f\"SELECT COUNT(*) FROM {table_name}\"\n", + " cursor.execute(query)\n", + " count = cursor.fetchone()[0]\n", + " cursor.close()\n", + " print(f\"Record count for {table_name}: {count}\")\n", + " return count\n", + " except Exception as e:\n", + " print(f\"Failed to get record count for {table_name}: {str(e)}\")\n", + " return None\n", + "def select_top_records(connection, table_name, n):\n", + " try:\n", + " cursor = connection.cursor()\n", + " query = f\"SELECT TOP {n} * FROM {table_name}\"\n", + " cursor.execute(query)\n", + " records = cursor.fetchall()\n", + " columns = [column[0] for column in cursor.description]\n", + " cursor.close()\n", + " print(f\"Top {n} records from {table_name}\")\n", + " if records:\n", + " print(tabulate(records, headers=columns, tablefmt=\"grid\"))\n", + " return records\n", + " except Exception as e:\n", + " print(f\"Failed to retrieve top {n} records from {table_name}: {str(e)}\")\n", + " return None\n", + "conn = connect_to_sql_server(\"localhost\", \"AdventureWorks_lite\")\n", + "if conn:\n", + " total_records = get_record_count(conn, \"Sales.SalesOrderDetail\")\n", + " top_records = select_top_records(conn, \"Production.Product\", 10)\n", + " conn.close()\n", + " print(\"Connection closed successfully\")\"\"\"\n", + "\n", + " css = \"\"\"\n", + "textarea[rows]:not([rows=\"1\"]) {\n", + " overflow-y: auto !important;\n", + " scrollbar-width: thin !important;\n", + "}\n", + "textarea[rows]:not([rows=\"1\"])::-webkit-scrollbar {\n", + " all: initial !important;\n", + " background: #f1f1f1 !important;\n", + "}\n", + "textarea[rows]:not([rows=\"1\"])::-webkit-scrollbar-thumb {\n", + " all: initial !important;\n", + " background: #a8a8a8 !important;\n", + "}\n", + "\"\"\"\n", + "\n", + " # Create the interface\n", + " with gr.Blocks(title=\"Code Commenter & Test Generator\", theme=gr.themes.Base(), css=css) as interface:\n", + " gr.Markdown(\"# 🔧 Code Commenter & Test Generator\")\n", + " gr.Markdown(\"Add detailed comments, docstrings, and/or generate unit tests for your code using various LLM models.\")\n", + " \n", + " with gr.Row():\n", + " with gr.Column():\n", + " code_input = gr.Textbox(\n", + " label=\"Input Code\",\n", + " value=default_code,\n", + " lines=15,\n", + " max_lines=20,\n", + " info=\"Enter the code you want to process\"\n", + " )\n", + " \n", + " with gr.Column():\n", + " code_output = gr.Textbox(\n", + " label=\"Processed Code\",\n", + " lines=20,\n", + " max_lines=20,\n", + " info=\"Your code with added comments, docstrings, and/or unit tests\"\n", + " )\n", + " \n", + " # Add checkboxes below the textboxes\n", + " with gr.Row():\n", + " with gr.Column():\n", + " generate_comments_checkbox = gr.Checkbox(\n", + " label=\"Generate comments\",\n", + " value=True,\n", + " info=\"Add detailed comments and docstrings to the code\"\n", + " )\n", + " generate_tests_checkbox = gr.Checkbox(\n", + " label=\"Generate test units\",\n", + " value=False,\n", + " info=\"Generate comprehensive unit tests for the code\"\n", + " )\n", + " \n", + " with gr.Row():\n", + " with gr.Column(scale=1):\n", + " language_dropdown = gr.Dropdown(\n", + " choices=[\"Python\", \"Ruby\", \"Rust\", \"C++\", \"Java\"],\n", + " value=\"Python\",\n", + " label=\"Programming Language\",\n", + " info=\"Select the programming language of your code\"\n", + " )\n", + " \n", + " llm_dropdown = gr.Dropdown(\n", + " choices=[\n", + " \"gpt-4o-mini\",\n", + " \"claude-3-5-haiku-20241022\", \n", + " \"gemini-2.0-flash\",\n", + " \"ollama:llama3.2:latest\"\n", + " ],\n", + " value=\"gpt-4o-mini\",\n", + " label=\"LLM Model\",\n", + " info=\"Choose the language model to use\"\n", + " )\n", + " \n", + " generate_btn = gr.Button(\n", + " \"🚀 Process Code\", \n", + " variant=\"primary\",\n", + " size=\"lg\"\n", + " )\n", + " \n", + " # Add some API setup information\n", + " gr.Markdown(\"## 📝 API Setup Instructions\")\n", + " gr.Markdown(\"\"\"\n", + " To use this tool, you need to set up API keys as environment variables:\n", + " \n", + " - **OpenAI**: Set `OPENAI_API_KEY`\n", + " - **Anthropic**: Set `ANTHROPIC_API_KEY` \n", + " - **Google Gemini**: Set `GOOGLE_API_KEY`\n", + " - **Ollama**: Make sure Ollama is running locally on port 11434\n", + " \"\"\")\n", + " \n", + " gr.Markdown(\"## ✨ Features\")\n", + " gr.Markdown(\"\"\"\n", + " - **Generate Comments**: Add detailed docstrings and inline comments\n", + " - **Generate Unit Tests**: Create comprehensive test suites with mocking for external dependencies\n", + " - **Combined Mode**: Generate both comments and unit tests in one go\n", + " - **Multiple LLMs**: Choose from OpenAI, Anthropic, Google Gemini, or local Ollama models\n", + " - **Multiple Languages**: Support for Python, Ruby, Rust, C++, and Java\n", + " \"\"\")\n", + " \n", + " # Connect the button to the processing function\n", + " generate_btn.click(\n", + " fn=process_code_interface,\n", + " inputs=[language_dropdown, code_input, llm_dropdown, generate_comments_checkbox, generate_tests_checkbox],\n", + " outputs=code_output,\n", + " show_progress=True\n", + " )\n", + " \n", + " return interface" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "ef461e08-c1d5-406d-b7d2-a4329f16486e", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "🚀 Starting Code Commenter & Test Generator...\n", + "📋 Setting up Gradio interface...\n", + "🌐 Launching interface...\n", + "💡 The interface will open in your default browser\n", + "🔧 Make sure to set up your API keys as environment variables\n", + "* Running on local URL: http://127.0.0.1:7860\n", + "\n", + "To create a public link, set `share=True` in `launch()`.\n" + ] + }, + { + "data": { + "text/html": [ + "
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "print(\"🚀 Starting Code Commenter & Test Generator...\")\n", + "print(\"📋 Setting up Gradio interface...\")\n", + "\n", + "# Create and launch the interface\n", + "interface = create_gradio_interface()\n", + "\n", + "print(\"🌐 Launching interface...\")\n", + "print(\"💡 The interface will open in your default browser\")\n", + "print(\"🔧 Make sure to set up your API keys as environment variables\")\n", + "\n", + "# Launch with auto-opening in browser\n", + "interface.launch(\n", + " server_name=\"127.0.0.1\",\n", + " server_port=7860,\n", + " share=False,\n", + " inbrowser=True,\n", + " show_error=True\n", + ")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/week4/community-contributions/day5_java_code_commenter.ipynb b/week4/community-contributions/day5_java_code_commenter.ipynb new file mode 100644 index 0000000..49ef719 --- /dev/null +++ b/week4/community-contributions/day5_java_code_commenter.ipynb @@ -0,0 +1,300 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "45ca91c2", + "metadata": {}, + "source": [ + "# AI tool to add comments to the provided Java code\n", + "\n", + "Here we build a Gradio App that uses the frontier models to add comments to a java code. For testing purposes I have used the *cheaper* versions of the models, not the ones the leaderboards indicate as the best ones." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f44901f5", + "metadata": {}, + "outputs": [], + "source": [ + "# imports\n", + "\n", + "import os\n", + "from dotenv import load_dotenv\n", + "from openai import OpenAI\n", + "import google.generativeai as genai\n", + "import anthropic\n", + "import gradio as gr" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c47706b3", + "metadata": {}, + "outputs": [], + "source": [ + "# environment\n", + "\n", + "load_dotenv(override=True)\n", + "openai_api_key = os.getenv('OPENAI_API_KEY')\n", + "anthropic_api_key = os.getenv('ANTHROPIC_API_KEY')\n", + "google_api_key = os.getenv('GOOGLE_API_KEY')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "35446b9a", + "metadata": {}, + "outputs": [], + "source": [ + "openai = OpenAI()\n", + "claude = anthropic.Anthropic()\n", + "genai.configure()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0e899efd", + "metadata": {}, + "outputs": [], + "source": [ + "OPENAI_MODEL = \"gpt-4o-mini\"\n", + "CLAUDE_MODEL = \"claude-3-haiku-20240307\"\n", + "GEMINI_MODEL = 'gemini-2.0-flash-lite'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "47640f53", + "metadata": {}, + "outputs": [], + "source": [ + "system_message = \"You are an assistant that adds comments to java code. \"\n", + "system_message += \"Do not make any changes to the code itself.\"\n", + "system_message += \"Use comments sparingly. Only add them in places where they help to undestand how the code works. Do not comment every single line of the code.\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f41ccbf0", + "metadata": {}, + "outputs": [], + "source": [ + "def user_prompt_for(code):\n", + " user_prompt = \"Add helpful comments to this java code. \"\n", + " user_prompt += \"Do not change the code itself.\\n\\n\"\n", + " user_prompt += code\n", + " return user_prompt" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c57c0000", + "metadata": {}, + "outputs": [], + "source": [ + "test_code = \"\"\"\n", + "package com.hma.kafkaproducertest.producer;\n", + "\n", + "import com.hma.kafkaproducertest.model.TestDTO;\n", + "import org.springframework.cloud.stream.function.StreamBridge;\n", + "import org.springframework.messaging.Message;\n", + "import org.springframework.messaging.support.MessageBuilder;\n", + "import org.springframework.stereotype.Component;\n", + "\n", + "import java.util.Arrays;\n", + "import java.util.Comparator;\n", + "import java.util.StringJoiner;\n", + "import java.util.stream.Collectors;\n", + "import java.util.stream.IntStream;\n", + "\n", + "@Component\n", + "public class TestProducer {\n", + "\n", + " public static final String EVENT_TYPE_HEADER = \"event-type\";\n", + " private static final String BINDING_NAME = \"testProducer-out-0\";\n", + "\n", + " private final StreamBridge streamBridge;\n", + "\n", + " public TestProducer(StreamBridge streamBridge) {\n", + " this.streamBridge = streamBridge;\n", + " }\n", + "\n", + " public void sendMessage(TestDTO payload, String eventType){\n", + " Message message = MessageBuilder\n", + " .withPayload(payload)\n", + " .setHeader(EVENT_TYPE_HEADER, eventType)\n", + " .build();\n", + "\n", + " streamBridge.send(BINDING_NAME, message);\n", + " }\n", + "\n", + " public void test(String t1, String t2) {\n", + " var s = t1.length() > t2.length() ? t2 : t1;\n", + " var l = t1.length() > t2.length() ? t1 : t2;\n", + " var res = true;\n", + " for (int i = 0; i < s.length(); i++) {\n", + " if (s.charAt(i) == l.charAt(i)) {\n", + " res = false;\n", + " break;\n", + " }\n", + " }\n", + " System.out.println(res);\n", + " }\n", + "}\n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "00c71128", + "metadata": {}, + "outputs": [], + "source": [ + "def stream_gpt(code):\n", + " messages = [\n", + " {\"role\": \"system\", \"content\": system_message},\n", + " {\"role\": \"user\", \"content\": user_prompt_for(code)}\n", + " ]\n", + " stream = openai.chat.completions.create(\n", + " model=OPENAI_MODEL,\n", + " messages=messages,\n", + " stream=True\n", + " )\n", + " result = \"\"\n", + " for chunk in stream:\n", + " result += chunk.choices[0].delta.content or \"\"\n", + " yield result" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ca92f8a8", + "metadata": {}, + "outputs": [], + "source": [ + "def stream_claude(code):\n", + " result = claude.messages.stream(\n", + " model=CLAUDE_MODEL,\n", + " max_tokens=2000,\n", + " system=system_message,\n", + " messages=[\n", + " {\"role\": \"user\", \"content\": user_prompt_for(code)},\n", + " ],\n", + " )\n", + " response = \"\"\n", + " with result as stream:\n", + " for text in stream.text_stream:\n", + " response += text or \"\"\n", + " yield response" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9dffed4b", + "metadata": {}, + "outputs": [], + "source": [ + "def stream_gemini(code):\n", + " gemini = genai.GenerativeModel(\n", + " model_name=GEMINI_MODEL,\n", + " system_instruction=system_message\n", + " )\n", + " stream = gemini.generate_content(user_prompt_for(code), stream=True)\n", + " result = \"\"\n", + " for chunk in stream:\n", + " result += chunk.text or \"\"\n", + " yield result" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "31f9c267", + "metadata": {}, + "outputs": [], + "source": [ + "def comment_code(code, model):\n", + " if model==\"GPT\":\n", + " result = stream_gpt(code)\n", + " elif model==\"Claude\":\n", + " result = stream_claude(code)\n", + " elif model==\"Gemini\":\n", + " result = stream_gemini(code)\n", + " else:\n", + " raise ValueError(\"Unknown model\")\n", + " yield from result" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c04c0a1b", + "metadata": {}, + "outputs": [], + "source": [ + "with gr.Blocks() as ui:\n", + " with gr.Row():\n", + " original_code = gr.Textbox(label=\"Java code:\", lines=10, value=test_code)\n", + " commented_code = gr.Markdown(label=\"Commented code:\")\n", + " with gr.Row():\n", + " model = gr.Dropdown([\"GPT\", \"Claude\", \"Gemini\"], label=\"Select model\", value=\"GPT\")\n", + " comment = gr.Button(\"Comment code\")\n", + "\n", + " comment.click(comment_code, inputs=[original_code, model], outputs=[commented_code])\n", + "\n", + "ui.launch(inbrowser=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "84d33a5f", + "metadata": {}, + "outputs": [], + "source": [ + "ui.close()" + ] + }, + { + "cell_type": "markdown", + "id": "bbd50bf7", + "metadata": {}, + "source": [ + "## Conclusion\n", + "\n", + "In my personal opinion, at least when using these *cheaper* versions of the models, the result provided by Claude is the best. ChatGPT adds way too many comments even if the system message discourages that. Gemini provides a good result also, but maybe adds a tad too few comments -- although that certainly depends on your personal preferences." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "llms", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/week4/community-contributions/day5_java_unit_test_generator.ipynb b/week4/community-contributions/day5_java_unit_test_generator.ipynb new file mode 100644 index 0000000..39e30e3 --- /dev/null +++ b/week4/community-contributions/day5_java_unit_test_generator.ipynb @@ -0,0 +1,281 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "45ca91c2", + "metadata": {}, + "source": [ + "# AI tool to generate unit tests for the provided Java code\n", + "\n", + "Here we build a Gradio App that uses the frontier models to generate unit tests for a java code. For testing purposes I have used the *cheaper* versions of the models, not the ones the leaderboards indicate as the best ones." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f44901f5", + "metadata": {}, + "outputs": [], + "source": [ + "# imports\n", + "\n", + "import os\n", + "from dotenv import load_dotenv\n", + "from openai import OpenAI\n", + "import google.generativeai as genai\n", + "import anthropic\n", + "import gradio as gr" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c47706b3", + "metadata": {}, + "outputs": [], + "source": [ + "# environment\n", + "\n", + "load_dotenv(override=True)\n", + "openai_api_key = os.getenv('OPENAI_API_KEY')\n", + "anthropic_api_key = os.getenv('ANTHROPIC_API_KEY')\n", + "google_api_key = os.getenv('GOOGLE_API_KEY')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "35446b9a", + "metadata": {}, + "outputs": [], + "source": [ + "openai = OpenAI()\n", + "claude = anthropic.Anthropic()\n", + "genai.configure()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0e899efd", + "metadata": {}, + "outputs": [], + "source": [ + "OPENAI_MODEL = \"gpt-4o-mini\"\n", + "CLAUDE_MODEL = \"claude-3-haiku-20240307\"\n", + "GEMINI_MODEL = 'gemini-2.0-flash-lite'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "47640f53", + "metadata": {}, + "outputs": [], + "source": [ + "system_message = \"You are an assistant that generates unit test for java code. \"\n", + "system_message += \"Generate one JUnit5 test class with all the relevant test cases in it.\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f41ccbf0", + "metadata": {}, + "outputs": [], + "source": [ + "def user_prompt_for(code):\n", + " user_prompt = \"Generate unit tests for this java code.\\n\\n\"\n", + " user_prompt += code\n", + " return user_prompt" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c57c0000", + "metadata": {}, + "outputs": [], + "source": [ + "test_code = \"\"\"\n", + "package com.hma.kafkaproducertest.rest;\n", + "\n", + "import com.hma.kafkaproducertest.model.TestDTO;\n", + "import com.hma.kafkaproducertest.producer.TestProducer;\n", + "import org.springframework.web.bind.annotation.*;\n", + "\n", + "@RestController\n", + "@RequestMapping(\"/api\")\n", + "public class TestController {\n", + "\n", + " private final TestProducer producer;\n", + "\n", + " public TestController(TestProducer producer) {\n", + " this.producer = producer;\n", + " }\n", + "\n", + " @PostMapping(\"/event\")\n", + " public TestDTO triggerKafkaEvent(@RequestBody TestDTO payload) {\n", + " producer.sendMessage(payload, \"test\");\n", + " return payload;\n", + " }\n", + "\n", + "}\n", + "\n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "00c71128", + "metadata": {}, + "outputs": [], + "source": [ + "def stream_gpt(code):\n", + " messages = [\n", + " {\"role\": \"system\", \"content\": system_message},\n", + " {\"role\": \"user\", \"content\": user_prompt_for(code)}\n", + " ]\n", + " stream = openai.chat.completions.create(\n", + " model=OPENAI_MODEL,\n", + " messages=messages,\n", + " stream=True\n", + " )\n", + " result = \"\"\n", + " for chunk in stream:\n", + " result += chunk.choices[0].delta.content or \"\"\n", + " yield result" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ca92f8a8", + "metadata": {}, + "outputs": [], + "source": [ + "def stream_claude(code):\n", + " result = claude.messages.stream(\n", + " model=CLAUDE_MODEL,\n", + " max_tokens=2000,\n", + " system=system_message,\n", + " messages=[\n", + " {\"role\": \"user\", \"content\": user_prompt_for(code)},\n", + " ],\n", + " )\n", + " response = \"\"\n", + " with result as stream:\n", + " for text in stream.text_stream:\n", + " response += text or \"\"\n", + " yield response" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9dffed4b", + "metadata": {}, + "outputs": [], + "source": [ + "def stream_gemini(code):\n", + " gemini = genai.GenerativeModel(\n", + " model_name=GEMINI_MODEL,\n", + " system_instruction=system_message\n", + " )\n", + " stream = gemini.generate_content(user_prompt_for(code), stream=True)\n", + " result = \"\"\n", + " for chunk in stream:\n", + " result += chunk.text or \"\"\n", + " yield result" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "31f9c267", + "metadata": {}, + "outputs": [], + "source": [ + "def generate_tests(code, model):\n", + " if model==\"GPT\":\n", + " result = stream_gpt(code)\n", + " elif model==\"Claude\":\n", + " result = stream_claude(code)\n", + " elif model==\"Gemini\":\n", + " result = stream_gemini(code)\n", + " else:\n", + " raise ValueError(\"Unknown model\")\n", + " yield from result" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c04c0a1b", + "metadata": {}, + "outputs": [], + "source": [ + "with gr.Blocks() as ui:\n", + " with gr.Row():\n", + " original_code = gr.Textbox(label=\"Java code:\", lines=10, value=test_code)\n", + " generated_code = gr.Markdown(label=\"Unit tests:\")\n", + " with gr.Row():\n", + " model = gr.Dropdown([\"GPT\", \"Claude\", \"Gemini\"], label=\"Select model\", value=\"GPT\")\n", + " generate = gr.Button(\"Generate tests\")\n", + "\n", + " generate.click(generate_tests, inputs=[original_code, model], outputs=[generated_code])\n", + "\n", + "ui.launch(inbrowser=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "84d33a5f", + "metadata": {}, + "outputs": [], + "source": [ + "ui.close()" + ] + }, + { + "cell_type": "markdown", + "id": "bbd50bf7", + "metadata": {}, + "source": [ + "## Conclusion\n", + "\n", + "The models are missing some information as the `TestDTO` is not defined in the code provided as an input.\n", + "\n", + "Results:\n", + "- Gemini: Generates a well constructed test class with multiple test cases covering scenarios with valid and invalid inputs. It makes assumptions about the content of `TestDTO` and adds a note about those as a comment.\n", + "- Claude: Similar approach to unknown format of `TestDTO`, although no comment added about the assumptions made. The test cases are strutured differently, and they don't cover any case of invalid input, which in my opinion is an important test for a REST endpoint.\n", + "- GPT: While the other two generated *real* unit tests using the mockito extension, GPT generated a *webMVC* test. The other two relied on the equality impelemntation of `TestDTO`, while GPT checks separately each field in the response. As this type of test spins up the application context, the test won't run without additional configuration. In addition, some imports are missing from the test file.\n", + "\n", + "It comes down to personal preferences, but I would give the point to Gemini for this one." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "llms", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/week5/community-contributions/08_rag_qa_assistant.ipynb b/week5/community-contributions/08_rag_qa_assistant.ipynb new file mode 100644 index 0000000..2d0affb --- /dev/null +++ b/week5/community-contributions/08_rag_qa_assistant.ipynb @@ -0,0 +1,710 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "3f78498d-dbc6-4e1b-a629-9ac9e44c8dd8", + "metadata": {}, + "source": [ + "# RAG-powered Q&A agent for Insurellm employees\n", + "---\n", + "\n", + "An internal expert knowledge assistant for Insurellm employees, using Retrieval-Augmented Generation (RAG) to deliver fast, accurate, and cost-efficient answers to a wide range of internal queries,\n", + "\n", + "- 🌍 Task: Answer questions about Insurellm using naive RAG\n", + "- 🧠 Models: OpenAI GPT via LangChain\n", + "- 🔍 Retrieval: ChromaDB + OpenAI embeddings\n", + "- 🚀 Tools:\n", + " - langchain: 0.3.21\n", + " - openai: 1.69.0\n", + " - chromadb: 0.6.3\n", + " - gradio: 5.23.1\n", + " - python: 3.11.11\n", + "\n", + "- ✨ Features:\n", + "\n", + " - Loads PDF, text, and markdown files automatically\n", + " - Only updates when files actually change (saves time)\n", + " - Breaks documents into small, overlapping pieces for better search\n", + " - Finds the most relevant information using smart matching\n", + " - Remembers conversation history and shows where answers come from\n", + " - Only answers based on your documents (no made-up information\n", + " - Web chat interface with streaming responses\n", + " - Handles errors gracefully and detects duplicate content\n", + " - Tracks document details and keeps everything organized\n", + " - Ready for business use with built-in quality checks\n", + "\n", + "- 📤 Output: Streaming response with sources retrieved from the knowledge base\n", + "- 🧑‍💻 Skill Level: Intermediate\n", + "- ⚙️ Hardware: ✅ CPU is sufficient — no GPU required\n", + "\n", + "🛠️ **Requirements**: 🔑 OpenAI API Key \n", + "\n", + "⚙️ **Customizable by user**\n", + "- 📝 Modify system and expansion prompts\n", + "- 📁 Drop in new company documents\n", + "- 🎯 Adjust retrieval top-k and similarity threshold\n", + "\n", + "This project currently uses a naive RAG approach, which limits the assistant's performance and accuracy. To improve response quality and reliability, more advanced RAG techniques will be needed — a more refined and powerful version is planned for future release.\n", + "\n", + "![](https://github.com/lisekarimi/lexo/blob/main/assets/08_naive_rag.png?raw=true)\n", + "\n", + "---\n", + "📢 Find more LLM notebooks on my [GitHub repository](https://github.com/lisekarimi/lexo)" + ], + "outputs": [] + }, + { + "cell_type": "markdown", + "id": "b9abf112-72ca-431a-b7cf-b126e0a69a4d", + "metadata": {}, + "source": [ + "## 📥 Imports" + ], + "outputs": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "abdef4fe-5055-4259-99c7-82a0525c0d35", + "metadata": {}, + "outputs": [], + "source": [ + "# Standard library imports\n", + "import os\n", + "import hashlib\n", + "from pathlib import Path\n", + "from typing import List\n", + "\n", + "# Third-party imports\n", + "import numpy as np\n", + "import plotly.graph_objects as go\n", + "from dotenv import load_dotenv\n", + "from pydantic import Field\n", + "from sklearn.manifold import TSNE\n", + "import gradio as gr\n", + "\n", + "# LangChain core imports\n", + "from langchain.document_loaders import TextLoader, PyPDFLoader\n", + "from langchain.text_splitter import CharacterTextSplitter\n", + "from langchain.schema import BaseRetriever, Document\n", + "from langchain.schema.vectorstore import VectorStoreRetriever\n", + "from langchain.callbacks.manager import CallbackManagerForRetrieverRun\n", + "from langchain.memory import ConversationBufferMemory\n", + "from langchain.chains import ConversationalRetrievalChain\n", + "from langchain.prompts import PromptTemplate\n", + "\n", + "# LangChain integrations\n", + "from langchain_openai import OpenAIEmbeddings, ChatOpenAI\n", + "from langchain_chroma import Chroma" + ] + }, + { + "cell_type": "markdown", + "id": "79875c2d-4193-4fa8-95b8-ad128b1c84fb", + "metadata": {}, + "source": [ + "## 🔐 Load env variables and configuration" + ], + "outputs": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7b5ca4dd-a1c2-4fc6-844f-7b0c83008c99", + "metadata": {}, + "outputs": [], + "source": [ + "# Load environment variables\n", + "load_dotenv(override=True)\n", + "\n", + "# Configuration\n", + "DATA_PATH = \"data/knowledge-base/\" # Use your path\n", + "MODEL = \"gpt-4o-mini\"\n", + "CHROMA_PATH = \"vector_db/chroma_insurellm\"\n", + "\n", + "# Explicitly access the OpenAI API key\n", + "openai_api_key = os.getenv('OPENAI_API_KEY')\n", + "if not openai_api_key:\n", + " print(\"❌ OPENAI_API_KEY is missing\")" + ] + }, + { + "cell_type": "markdown", + "id": "18e5b9a1-dca8-4b42-8517-174f653f30a7", + "metadata": {}, + "source": [ + "## 📄 Load files as Document objects into memory" + ], + "outputs": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ae72f98b-05a5-4758-9503-424a93055323", + "metadata": {}, + "outputs": [], + "source": [ + "# Load .pdf, .txt, and .md documents with metadata, excluding Jupyter checkpoints.\n", + "\n", + "documents = []\n", + "\n", + "def add_metadata(doc, file_path):\n", + " doc.metadata[\"doc_type\"] = file_path.parent.name\n", + " doc.metadata[\"file_name\"] = file_path.name\n", + " if not doc.page_content.strip():\n", + " print(f\"⚠️ Empty content in {file_path}\")\n", + " # else:\n", + " # print(doc)\n", + " # print(\"-\" * 40)\n", + " return doc\n", + "\n", + "for file_path in Path(DATA_PATH).rglob(\"*\"):\n", + " if \".ipynb_checkpoints\" in file_path.parts:\n", + " continue\n", + "\n", + " try:\n", + " if file_path.name.endswith(\".pdf\"):\n", + " docs = PyPDFLoader(str(file_path)).load()\n", + " elif file_path.name.endswith((\".txt\", \".md\")):\n", + " docs = TextLoader(str(file_path), encoding=\"utf-8\").load()\n", + " else:\n", + " continue\n", + " except Exception as e:\n", + " print(f\"❌ Skipped {file_path}: {e}\")\n", + " continue\n", + "\n", + " documents.extend([add_metadata(doc, file_path) for doc in docs])\n", + "\n", + "print(f\"{len(documents)} documents loaded.\" if documents else \"No documents loaded.\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "ed0fcc85-ca14-430a-bde2-db3d77f79143", + "metadata": {}, + "source": [ + "## ✂️ Splitting documents into chunks" + ], + "outputs": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e40d0487-6db4-4c3f-b0c6-e9aaf4e14b37", + "metadata": {}, + "outputs": [], + "source": [ + "# Split documents into smaller chunks with overlapping characters for better context.\n", + "text_splitter = CharacterTextSplitter(\n", + " chunk_size=1000,\n", + " chunk_overlap=200,\n", + " add_start_index=True # Maintain chunk order (useful for context tracking)\n", + ")\n", + "\n", + "# Load and split documents\n", + "chunks = text_splitter.split_documents(documents)\n", + "\n", + "print(f\"Split {len(documents)} documents into {len(chunks)} chunks.\")\n", + "\n", + "def generate_chunk_id(text):\n", + " return hashlib.md5(text.encode(\"utf-8\")).hexdigest()\n", + "\n", + "# Add chunk_id to each chunk's metadata\n", + "for chunk in chunks:\n", + " chunk.metadata[\"chunk_id\"] = generate_chunk_id(chunk.page_content) # Create an MD5 hash of the chunk's content\n", + " if not chunk.page_content.strip():\n", + " print(f\"⚠️ Empty chunk from: {chunk.metadata['file_name']}\")\n", + "\n", + "# Debug: print a few chunk metadatas to verify chunk_id is added\n", + "for i, chunk in enumerate(chunks[:2]):\n", + " print(f\"Chunk {i+1} metadata:\", chunk.metadata)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1faa604c-ddd9-4475-aaff-f4456629d77d", + "metadata": {}, + "outputs": [], + "source": [ + "# Check for duplicate chunk IDs\n", + "chunk_ids = [chunk.metadata[\"chunk_id\"] for chunk in chunks]\n", + "duplicate_ids = [chunk_id for chunk_id in chunk_ids if chunk_ids.count(chunk_id) > 1]\n", + "\n", + "if duplicate_ids:\n", + " print(f\"Duplicate chunk IDs found: {duplicate_ids}\")\n", + "else:\n", + " print(\"No duplicate chunks.\")" + ] + }, + { + "cell_type": "markdown", + "id": "d73f6bee-5df5-422a-a03f-e117a858370b", + "metadata": {}, + "source": [ + "## 🧠 Chuncks Embedding" + ], + "outputs": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a0135e85-6e0b-45a4-ab0d-b9fec56b63ac", + "metadata": {}, + "outputs": [], + "source": [ + "embedding_function = OpenAIEmbeddings()\n", + "# By default, OpenAIEmbeddings() uses OpenAI's text-embedding-ada-002 model - a multilingual model" + ] + }, + { + "cell_type": "markdown", + "id": "dbdb70eb-9902-4065-92b7-c72c2b8e15f7", + "metadata": {}, + "source": [ + "## 💾 Save embedded chunks to Chroma database" + ], + "outputs": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "093959f0-6826-4594-9338-598094e24923", + "metadata": {}, + "outputs": [], + "source": [ + "os.makedirs(CHROMA_PATH, exist_ok=True)\n", + "\n", + "def get_existing_chunk_ids(db_path):\n", + " try:\n", + " db_existing = Chroma(persist_directory=db_path)\n", + " results = db_existing._collection.get(include=[\"metadatas\"])\n", + " return set(\n", + " m[\"chunk_id\"] for m in results[\"metadatas\"]\n", + " if isinstance(m, dict) and \"chunk_id\" in m\n", + " )\n", + " except Exception as e:\n", + " print(\"❌ Error loading existing chunk IDs:\", e)\n", + " return set()\n", + "\n", + "# Get chunk_ids of current chunks\n", + "new_chunk_ids = set([chunk.metadata[\"chunk_id\"] for chunk in chunks])\n", + "\n", + "# Get existing chunk_ids from Chroma\n", + "existing_chunk_ids = get_existing_chunk_ids(CHROMA_PATH)\n", + "\n", + "# Compare\n", + "if new_chunk_ids != existing_chunk_ids:\n", + " print(\"Chunk changes detected. Rebuilding Chroma DB.\")\n", + " db = Chroma.from_documents(documents=chunks, embedding=embedding_function, persist_directory=CHROMA_PATH)\n", + " print(f\"Saved {len(chunks)} chunks to {CHROMA_PATH}.\")\n", + "else:\n", + " db = Chroma(persist_directory=CHROMA_PATH, embedding_function=embedding_function)\n", + " print(\"Chroma DB is up to date. Skipping regeneration.\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "670b049a-0eca-41c1-a5a8-8ed4561168b2", + "metadata": {}, + "source": [ + "## 📊 Visualizing the Vector Store" + ], + "outputs": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1179ca76-2502-4bea-a4d8-4cbe149e92fa", + "metadata": {}, + "outputs": [], + "source": [ + "collection = db._collection\n", + "result = collection.get(include=['embeddings', 'documents', 'metadatas'])\n", + "vectors = np.array(result['embeddings'])\n", + "documents = result['documents']\n", + "metadatas = result['metadatas']\n", + "doc_types = [metadata['doc_type'] for metadata in metadatas]\n", + "colors = [['blue', '#4B0082', 'red', '#8B4513'][['products', 'employees', 'contracts', 'company'].index(t)] for t in doc_types]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c18bd18c-0b6c-4206-b2d0-a1bb59b3d39e", + "metadata": {}, + "outputs": [], + "source": [ + "# We humans find it easier to visalize things in 2D!\n", + "# Reduce the dimensionality of the vectors to 2D using t-SNE\n", + "# (t-distributed stochastic neighbor embedding)\n", + "\n", + "tsne = TSNE(n_components=2, random_state=42)\n", + "reduced_vectors = tsne.fit_transform(vectors)\n", + "\n", + "# Create the 2D scatter plot\n", + "fig = go.Figure(data=[go.Scatter(\n", + " x=reduced_vectors[:, 0],\n", + " y=reduced_vectors[:, 1],\n", + " mode='markers',\n", + " marker=dict(size=8, color=colors, opacity=0.8),\n", + " text=[f\"Type: {t}
Text: {d[:100]}...\" for t, d in zip(doc_types, documents)],\n", + " hoverinfo='text'\n", + ")])\n", + "\n", + "fig.update_layout(\n", + " title='2D Chroma Vector Store Visualization',\n", + " plot_bgcolor='black',\n", + " paper_bgcolor='black',\n", + " font=dict(color='black'),\n", + " xaxis=dict(gridcolor='lightgray', zerolinecolor='lightgray'),\n", + " yaxis=dict(gridcolor='lightgray', zerolinecolor='lightgray'),\n", + " width=800,\n", + " height=600,\n", + " margin=dict(r=20, b=10, l=10, t=40),\n", + ")\n", + "\n", + "\n", + "fig.show()" + ] + }, + { + "attachments": { + "image.png": { + "image/png": "" + } + }, + "cell_type": "markdown", + "id": "b7d527de", + "metadata": {}, + "source": [ + "![image.png](attachment:image.png)" + ], + "outputs": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "71364356-7edb-4e72-a7ba-f6284d4a998d", + "metadata": {}, + "outputs": [], + "source": [ + "# Let's try 3D!\n", + "\n", + "tsne = TSNE(n_components=3, random_state=42)\n", + "reduced_vectors = tsne.fit_transform(vectors)\n", + "\n", + "# Create the 3D scatter plot\n", + "fig = go.Figure(data=[go.Scatter3d(\n", + " x=reduced_vectors[:, 0],\n", + " y=reduced_vectors[:, 1],\n", + " z=reduced_vectors[:, 2],\n", + " mode='markers',\n", + " marker=dict(size=8, color=colors, opacity=0.8),\n", + " text=[f\"Type: {t}
Text: {d[:100]}...\" for t, d in zip(doc_types, documents)],\n", + " hoverinfo='text'\n", + ")])\n", + "\n", + "fig.update_layout(\n", + " title='3D Chroma Vector Store Visualization',\n", + " plot_bgcolor='black',\n", + " paper_bgcolor='black',\n", + " font=dict(color='white'),\n", + " scene=dict(\n", + " xaxis=dict(color='white', backgroundcolor='black', showbackground=True),\n", + " yaxis=dict(color='white', backgroundcolor='black', showbackground=True),\n", + " zaxis=dict(color='white', backgroundcolor='black', showbackground=True)\n", + " ),\n", + " width=900,\n", + " height=700,\n", + " margin=dict(r=20, b=10, l=10, t=40)\n", + ")\n", + "\n", + "fig.show()" + ] + }, + { + "attachments": { + "image.png": { + "image/png": "" + } + }, + "cell_type": "markdown", + "id": "163a82aa", + "metadata": {}, + "source": [ + "![image.png](attachment:image.png)" + ], + "outputs": [] + }, + { + "cell_type": "markdown", + "id": "c19187ba-1ac9-400e-ae9b-684682349e8b", + "metadata": {}, + "source": [ + "## 🔍 Query Chroma" + ], + "outputs": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6f8d1524-c822-4303-b1a0-a3440cc90f82", + "metadata": {}, + "outputs": [], + "source": [ + "similarity_threshold = 0.5\n", + "\n", + "class MyVectorStoreRetriever(VectorStoreRetriever):\n", + " def _get_relevant_documents(\n", + " self, query: str, *, run_manager: CallbackManagerForRetrieverRun\n", + " ) -> List[Document]:\n", + " docs_and_similarities = (\n", + " self.vectorstore.similarity_search_with_relevance_scores(\n", + " query, **self.search_kwargs\n", + " )\n", + " )\n", + "\n", + " # Make the score part of the document metadata\n", + " for doc, similarity in docs_and_similarities:\n", + " doc.metadata[\"score\"] = similarity\n", + "\n", + " docs = [doc for doc, sim in docs_and_similarities if sim >= self.search_kwargs.get(\"score_threshold\", 0)]\n", + " return docs\n", + "\n", + "retriever = MyVectorStoreRetriever(\n", + " vectorstore=db,\n", + " search_type=\"similarity_score_threshold\",\n", + " search_kwargs={\"score_threshold\": similarity_threshold, \"k\": 20},\n", + ")\n", + "\n", + "\n", + "# Add metadata to the context sentto the LLM\n", + "def inject_metadata(doc: Document) -> Document:\n", + " doc_type = doc.metadata.get(\"doc_type\", \"Unknown\")\n", + " file_name = doc.metadata.get(\"file_name\", \"Unknown\")\n", + " content = f\"[SOURCE: {doc_type} - {file_name}]\\n{doc.page_content}\"\n", + " return Document(page_content=content, metadata=doc.metadata)\n", + "\n", + "class MetadataInjectingRetriever(BaseRetriever):\n", + " base_retriever: BaseRetriever = Field()\n", + "\n", + " def _get_relevant_documents(self, query: str):\n", + " docs = self.base_retriever.get_relevant_documents(query)\n", + " return [inject_metadata(doc) for doc in docs]\n", + "\n", + "retriever = MetadataInjectingRetriever(base_retriever=retriever)" + ] + }, + { + "cell_type": "markdown", + "id": "7446b2e0-23ca-4ad5-935d-1944f29b53cf", + "metadata": {}, + "source": [ + "## 🗣️ LLM and answers" + ], + "outputs": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6ab9093f-a6be-4ade-98f0-6911f47cb091", + "metadata": {}, + "outputs": [], + "source": [ + "llm = ChatOpenAI(temperature=0.7, model_name=MODEL)\n", + "memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c4830b80-5d43-4d23-9ac6-410fc110b74b", + "metadata": {}, + "outputs": [], + "source": [ + "# Define your question\n", + "question = \"Who are the top 3 earners in 2023 with base, bonus, and total. Include names.\"\n", + "\n", + "# Define the system prompt\n", + "system_prompt = \"\"\"\n", + "You are an assistant that answers questions about the company Insurellm.\n", + "\n", + "Use the following chat history and retrieved documents to answer.\n", + "\n", + "Always base your answers strictly on the retrieved documents. If documents contain partial info, respond with what’s available. If there is no info, say so.\n", + "\n", + "Do not invent names, roles, or facts.\n", + "\n", + "You can use the document source information shown in the format [SOURCE: doc_type - file_name] if it helps you answer the question accurately.\n", + "\n", + "Always extract exact numbers (like number of employees, years, revenue, etc.) from the documents if they are mentioned.\n", + "\n", + "\n", + "Chat History:\n", + "{chat_history}\n", + "\n", + "Documents:\n", + "{context}\n", + "\n", + "Question:\n", + "{question}\n", + "\"\"\"\n", + "\n", + "# Create the prompt template\n", + "prompt = PromptTemplate(\n", + " input_variables=[\"chat_history\", \"context\", \"question\"],\n", + " template=system_prompt\n", + ")\n", + "\n", + "# Set up LLM, memory, and conversation chain\n", + "llm = ChatOpenAI(temperature=0.7, model_name=MODEL)\n", + "memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True, output_key=\"answer\")\n", + "\n", + "conversation_chain = ConversationalRetrievalChain.from_llm(\n", + " llm=llm,\n", + " retriever=retriever,\n", + " memory=memory,\n", + " return_source_documents=True,\n", + " combine_docs_chain_kwargs={\"prompt\": prompt}\n", + ")\n", + "\n", + "# Format chat history\n", + "chat_history_text = \"\\n\".join([f\"{msg.type.upper()}: {msg.content}\" for msg in memory.chat_memory.messages])\n", + "\n", + "# Retrieve docs using the original question\n", + "retrieved_docs = retriever.get_relevant_documents(question)\n", + "# print(\"\\n📦 Context sent to LLM:\\n\")\n", + "# for i, doc in enumerate(retriever.get_relevant_documents(question), 1):\n", + "# print(f\"--- Document {i} ---\")\n", + "# print(doc.page_content) # preview\n", + "# print()\n", + "\n", + "# Invoke the chain\n", + "response = conversation_chain.invoke({\"question\": question})\n", + "\n", + "print(\"\\n🧠 Answer:\", response[\"answer\"])" + ] + }, + { + "cell_type": "markdown", + "id": "794f74c2-9b85-4d2c-8476-f4b29a001752", + "metadata": {}, + "source": [ + "## 🎛️ Gradio interface" + ], + "outputs": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fa85878e-04e4-457e-8775-523194c26409", + "metadata": {}, + "outputs": [], + "source": [ + "# 1. Define your system prompt\n", + "\n", + "system_prompt = \"\"\"\n", + "You are an assistant that answers questions about the company Insurellm.\n", + "\n", + "Use the following chat history and retrieved documents to answer. Always base your answers strictly on the retrieved documents. If documents contain partial info, respond with what’s available. If there is no info, say so.\n", + "\n", + "You can use the document source information shown in the format [SOURCE: doc_type - file_name] if it helps answer the question accurately.\n", + "\n", + "Extract exact numbers (like number of employees, years, revenue, etc.) from the documents if mentioned. Do not invent names, roles, or facts.\n", + "\n", + "Behavior Guidelines:\n", + "- Respond only when the user asks a question or requests clarification.\n", + "- If the user greets you or expresses gratitude, respond warmly, but **avoid repeating the previous answer** unless explicitly requested for more details.\n", + "- If the user asks \"thank you\" or similar, acknowledge it with gratitude, but **do not provide the same answer again** unless further information is requested.\n", + "- If the user shares feedback, acknowledge it, thank them, and offer further assistance.\n", + "- If the user expresses frustration or confusion, empathize, clarify, and offer further support.\n", + "- If the user doesn't find a clear answer, encourage them to ask for clarification or provide additional details, and offer further assistance.\n", + "\n", + "Chat History:\n", + "{chat_history}\n", + "\n", + "Documents:\n", + "{context}\n", + "\n", + "Question:\n", + "{question}\n", + "\"\"\"\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e9c1276d-abd0-4766-88d0-a710c030014d", + "metadata": {}, + "outputs": [], + "source": [ + "# 2. Create the prompt template\n", + "\n", + "prompt = PromptTemplate(\n", + " input_variables=[\"chat_history\", \"context\", \"question\"],\n", + " template=system_prompt\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c0fe0466-7e87-4a40-b398-29d7e821f48f", + "metadata": {}, + "outputs": [], + "source": [ + "# 3. Set up LLM, memory, retriever, and the updated chain\n", + "\n", + "llm = ChatOpenAI(temperature=0.7, model_name=MODEL)\n", + "memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True, output_key=\"answer\")\n", + "conversation_chain = ConversationalRetrievalChain.from_llm(\n", + " llm=llm,\n", + " retriever=retriever,\n", + " memory=memory,\n", + " return_source_documents=True,\n", + " combine_docs_chain_kwargs={\"prompt\": prompt}\n", + ")\n", + "\n", + "def chat(question, history):\n", + " result = conversation_chain.invoke({\"question\": question})\n", + " answer = \"\"\n", + " for chunk in result[\"answer\"]:\n", + " answer += chunk\n", + " yield answer" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2243e89c-c49b-416a-8152-f3679a9e2c05", + "metadata": {}, + "outputs": [], + "source": [ + "view = gr.ChatInterface(chat, type=\"messages\").launch(inbrowser=True)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.7" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} \ No newline at end of file diff --git a/week5/community-contributions/Personal Knowledge Worker/Project_GPT.ipynb b/week5/community-contributions/Personal Knowledge Worker/Project_GPT.ipynb new file mode 100644 index 0000000..4bafbb0 --- /dev/null +++ b/week5/community-contributions/Personal Knowledge Worker/Project_GPT.ipynb @@ -0,0 +1,388 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "dfe37963-1af6-44fc-a841-8e462443f5e6", + "metadata": {}, + "source": [ + "## Personal Knowledge Worker for Sameer Khadatkar\n", + "\n", + "This project will use RAG (Retrieval Augmented Generation) to ensure our question/answering assistant has high accuracy.\n", + "\n", + "This first implementation will use a simple, brute-force type of RAG.." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ba2779af-84ef-4227-9e9e-6eaf0df87e77", + "metadata": {}, + "outputs": [], + "source": [ + "# imports\n", + "\n", + "import os\n", + "import glob\n", + "from dotenv import load_dotenv\n", + "import gradio as gr" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "802137aa-8a74-45e0-a487-d1974927d7ca", + "metadata": {}, + "outputs": [], + "source": [ + "# imports for langchain, plotly and Chroma\n", + "\n", + "from langchain.document_loaders import DirectoryLoader, TextLoader\n", + "from langchain.text_splitter import CharacterTextSplitter\n", + "from langchain.schema import Document\n", + "from langchain_openai import OpenAIEmbeddings, ChatOpenAI\n", + "from langchain_chroma import Chroma\n", + "import matplotlib.pyplot as plt\n", + "from sklearn.manifold import TSNE\n", + "import numpy as np\n", + "import plotly.graph_objects as go\n", + "from langchain.memory import ConversationBufferMemory\n", + "from langchain.chains import ConversationalRetrievalChain\n", + "from langchain.embeddings import HuggingFaceEmbeddings" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "58c85082-e417-4708-9efe-81a5d55d1424", + "metadata": {}, + "outputs": [], + "source": [ + "# price is a factor, so we're going to use a low cost model\n", + "\n", + "MODEL = \"gpt-4o-mini\"\n", + "db_name = \"vector_db\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ee78efcb-60fe-449e-a944-40bab26261af", + "metadata": {}, + "outputs": [], + "source": [ + "# Load environment variables in a file called .env\n", + "\n", + "load_dotenv(override=True)\n", + "os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY', 'your-key-if-not-using-env')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "730711a9-6ffe-4eee-8f48-d6cfb7314905", + "metadata": {}, + "outputs": [], + "source": [ + "# Read in documents using LangChain's loaders\n", + "# Take everything in all the sub-folders of our knowledgebase\n", + "\n", + "folders = glob.glob(\"sameer-db/*\")\n", + "\n", + "def add_metadata(doc, doc_type):\n", + " doc.metadata[\"doc_type\"] = doc_type\n", + " return doc\n", + "\n", + "text_loader_kwargs = {'encoding': 'utf-8'}\n", + "\n", + "documents = []\n", + "for folder in folders:\n", + " doc_type = os.path.basename(folder)\n", + " loader = DirectoryLoader(folder, glob=\"**/*.md\", loader_cls=TextLoader, loader_kwargs=text_loader_kwargs)\n", + " folder_docs = loader.load()\n", + " documents.extend([add_metadata(doc, doc_type) for doc in folder_docs])\n", + "\n", + "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=200)\n", + "chunks = text_splitter.split_documents(documents)\n", + "\n", + "print(f\"Total number of chunks: {len(chunks)}\")\n", + "print(f\"Document types found: {set(doc.metadata['doc_type'] for doc in documents)}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "78998399-ac17-4e28-b15f-0b5f51e6ee23", + "metadata": {}, + "outputs": [], + "source": [ + "# Put the chunks of data into a Vector Store that associates a Vector Embedding with each chunk\n", + "# Chroma is a popular open source Vector Database based on SQLLite\n", + "\n", + "embeddings = OpenAIEmbeddings()\n", + "\n", + "if os.path.exists(db_name):\n", + " Chroma(persist_directory=db_name, embedding_function=embeddings).delete_collection()\n", + "\n", + "# Create vectorstore\n", + "vectorstore = Chroma.from_documents(documents=chunks, embedding=embeddings, persist_directory=db_name)\n", + "print(f\"Vectorstore created with {vectorstore._collection.count()} documents\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ff2e7687-60d4-4920-a1d7-a34b9f70a250", + "metadata": {}, + "outputs": [], + "source": [ + "# Let's investigate the vectors\n", + "\n", + "collection = vectorstore._collection\n", + "count = collection.count()\n", + "\n", + "sample_embedding = collection.get(limit=1, include=[\"embeddings\"])[\"embeddings\"][0]\n", + "dimensions = len(sample_embedding)\n", + "print(f\"There are {count:,} vectors with {dimensions:,} dimensions in the vector store\")" + ] + }, + { + "cell_type": "markdown", + "id": "b0d45462-a818-441c-b010-b85b32bcf618", + "metadata": {}, + "source": [ + "## Visualizing the Vector Store\n", + "\n", + "Let's take a minute to look at the documents and their embedding vectors to see what's going on." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b98adf5e-d464-4bd2-9bdf-bc5b6770263b", + "metadata": {}, + "outputs": [], + "source": [ + "result = collection.get(include=['embeddings', 'documents', 'metadatas'])\n", + "vectors = np.array(result['embeddings'])\n", + "documents = result['documents']\n", + "metadatas = result['metadatas']\n", + "doc_types = [metadata['doc_type'] for metadata in metadatas]\n", + "colors = [['green', 'red'][['personal', 'profile'].index(t)] for t in doc_types]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "427149d5-e5d8-4abd-bb6f-7ef0333cca21", + "metadata": {}, + "outputs": [], + "source": [ + "# We humans find it easier to visalize things in 2D!\n", + "# Reduce the dimensionality of the vectors to 2D using t-SNE\n", + "# (t-distributed stochastic neighbor embedding)\n", + "\n", + "tsne = TSNE(n_components=2, random_state=42,perplexity=5)\n", + "reduced_vectors = tsne.fit_transform(vectors)\n", + "\n", + "# Create the 2D scatter plot\n", + "fig = go.Figure(data=[go.Scatter(\n", + " x=reduced_vectors[:, 0],\n", + " y=reduced_vectors[:, 1],\n", + " mode='markers',\n", + " marker=dict(size=5, color=colors, opacity=0.8),\n", + " text=[f\"Type: {t}
Text: {d[:100]}...\" for t, d in zip(doc_types, documents)],\n", + " hoverinfo='text'\n", + ")])\n", + "\n", + "fig.update_layout(\n", + " title='2D Chroma Vector Store Visualization',\n", + " scene=dict(xaxis_title='x',yaxis_title='y'),\n", + " width=800,\n", + " height=600,\n", + " margin=dict(r=20, b=10, l=10, t=40)\n", + ")\n", + "\n", + "fig.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e1418e88-acd5-460a-bf2b-4e6efc88e3dd", + "metadata": {}, + "outputs": [], + "source": [ + "# Let's try 3D!\n", + "\n", + "tsne = TSNE(n_components=3, random_state=42,perplexity=5)\n", + "reduced_vectors = tsne.fit_transform(vectors)\n", + "\n", + "# Create the 3D scatter plot\n", + "fig = go.Figure(data=[go.Scatter3d(\n", + " x=reduced_vectors[:, 0],\n", + " y=reduced_vectors[:, 1],\n", + " z=reduced_vectors[:, 2],\n", + " mode='markers',\n", + " marker=dict(size=5, color=colors, opacity=0.8),\n", + " text=[f\"Type: {t}
Text: {d[:100]}...\" for t, d in zip(doc_types, documents)],\n", + " hoverinfo='text'\n", + ")])\n", + "\n", + "fig.update_layout(\n", + " title='3D Chroma Vector Store Visualization',\n", + " scene=dict(xaxis_title='x', yaxis_title='y', zaxis_title='z'),\n", + " width=900,\n", + " height=700,\n", + " margin=dict(r=20, b=10, l=10, t=40)\n", + ")\n", + "\n", + "fig.show()" + ] + }, + { + "cell_type": "markdown", + "id": "9468860b-86a2-41df-af01-b2400cc985be", + "metadata": {}, + "source": [ + "## Time to use LangChain to bring it all together" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b3942a10-9977-4ae7-9acf-968c43ad0d4a", + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.schema import SystemMessage" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "45c0fb93-0a16-4e55-857b-1f9fd61ec24c", + "metadata": {}, + "outputs": [], + "source": [ + "# create a new Chat with OpenAI\n", + "llm = ChatOpenAI(temperature=0.7, model_name=MODEL)\n", + "\n", + "# set up the conversation memory for the chat\n", + "memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)\n", + "memory.chat_memory.messages.insert(0, SystemMessage(\n", + " content=\"\"\"You are an AI Assistant specialized in providing accurate information about Sameer Khadatkar. Only respond when the question explicitly asks for information. \n", + " Keep your answers brief, factual, and based solely on the information provided. Do not speculate or fabricate details. \n", + " For example, if the user simply says \"hi,\" respond with: \"How can I help you?\"\n", + " \"\"\"\n", + "))\n", + "\n", + "# the retriever is an abstraction over the VectorStore that will be used during RAG\n", + "retriever = vectorstore.as_retriever(k=4)\n", + "\n", + "# putting it together: set up the conversation chain with the GPT 3.5 LLM, the vector store and memory\n", + "conversation_chain = ConversationalRetrievalChain.from_llm(llm=llm, retriever=retriever, memory=memory)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "968e7bf2-e862-4679-a11f-6c1efb6ec8ca", + "metadata": {}, + "outputs": [], + "source": [ + "# Let's try a simple question\n", + "\n", + "query = \"Who are you?\"\n", + "result = conversation_chain.invoke({\"question\": query})\n", + "print(result[\"answer\"])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5b5a9013-d5d4-4e25-9e7c-cdbb4f33e319", + "metadata": {}, + "outputs": [], + "source": [ + "# set up a new conversation memory for the chat\n", + "memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)\n", + "\n", + "# putting it together: set up the conversation chain with the GPT 4o-mini LLM, the vector store and memory\n", + "conversation_chain = ConversationalRetrievalChain.from_llm(llm=llm, retriever=retriever, memory=memory)" + ] + }, + { + "cell_type": "markdown", + "id": "bbbcb659-13ce-47ab-8a5e-01b930494964", + "metadata": {}, + "source": [ + "## Now we will bring this up in Gradio using the Chat interface -\n", + "\n", + "A quick and easy way to prototype a chat with an LLM" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c3536590-85c7-4155-bd87-ae78a1467670", + "metadata": {}, + "outputs": [], + "source": [ + "# Wrapping that in a function\n", + "\n", + "def chat(question, history):\n", + " result = conversation_chain.invoke({\"question\": question})\n", + " return result[\"answer\"]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b252d8c1-61a8-406d-b57a-8f708a62b014", + "metadata": {}, + "outputs": [], + "source": [ + "# And in Gradio:\n", + "\n", + "view = gr.ChatInterface(chat, type=\"messages\").launch(inbrowser=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e23270cf-2d46-4f9e-aeb3-de1673900d2f", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3476931e-7d94-4b4d-8cc6-67a1bd5fa79c", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.11" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/week5/community-contributions/Personal Knowledge Worker/Project_PHI.ipynb b/week5/community-contributions/Personal Knowledge Worker/Project_PHI.ipynb new file mode 100644 index 0000000..b1ad1b8 --- /dev/null +++ b/week5/community-contributions/Personal Knowledge Worker/Project_PHI.ipynb @@ -0,0 +1,927 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "fOxyiqtzKqLg", + "outputId": "714d12c5-775e-42c8-b51c-979a9112b808" + }, + "outputs": [], + "source": [ + "!pip install -q datasets requests torch peft bitsandbytes transformers trl accelerate sentencepiece tiktoken matplotlib gradio modal ollama langchain langchain-core langchain-text-splitters langchain-openai langchain-chroma langchain-community faiss-cpu feedparser" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "zyxwwUw6LWXK" + }, + "outputs": [], + "source": [ + "# imports\n", + "\n", + "import os\n", + "import glob\n", + "from dotenv import load_dotenv\n", + "import gradio as gr" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Zzqc9nk1L_5w", + "outputId": "0af5e1bb-2ccb-4838-b7a5-76c19285d094" + }, + "outputs": [], + "source": [ + "from langchain.document_loaders import DirectoryLoader, TextLoader, UnstructuredPDFLoader\n", + "from langchain.text_splitter import CharacterTextSplitter\n", + "from langchain.schema import Document\n", + "from langchain_openai import OpenAIEmbeddings, ChatOpenAI\n", + "from langchain_chroma import Chroma\n", + "import matplotlib.pyplot as plt\n", + "from sklearn.manifold import TSNE\n", + "import numpy as np\n", + "import plotly.graph_objects as go\n", + "from langchain.memory import ConversationBufferMemory\n", + "from langchain.chains import ConversationalRetrievalChain\n", + "from langchain.embeddings import HuggingFaceEmbeddings\n", + "from huggingface_hub import login\n", + "import torch\n", + "from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, TrainingArguments, set_seed\n", + "from google.colab import userdata\n", + "from google.colab import drive\n", + "drive.mount('/content/drive')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "u_vbe1itNZ2n" + }, + "outputs": [], + "source": [ + "base_path = \"/content/drive/MyDrive/sameer-db\"\n", + "folders = glob.glob(os.path.join(base_path, \"*\"))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "f0lJBMjhMrLO", + "outputId": "5cdc6327-3a3a-4d5b-ca05-4c1383c020e2" + }, + "outputs": [], + "source": [ + "def add_metadata(doc, doc_type):\n", + " doc.metadata[\"doc_type\"] = doc_type\n", + " return doc\n", + "\n", + "# With thanks to CG and Jon R, students on the course, for this fix needed for some users\n", + "text_loader_kwargs = {'encoding': 'utf-8'}\n", + "# If that doesn't work, some Windows users might need to uncomment the next line instead\n", + "# text_loader_kwargs={'autodetect_encoding': True}\n", + "\n", + "documents = []\n", + "for folder in folders:\n", + " doc_type = os.path.basename(folder)\n", + " loader = DirectoryLoader(folder, glob=\"**/*.md\", loader_cls=TextLoader, loader_kwargs=text_loader_kwargs)\n", + " folder_docs = loader.load()\n", + " documents.extend([add_metadata(doc, doc_type) for doc in folder_docs])\n", + "\n", + "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=200)\n", + "chunks = text_splitter.split_documents(documents)\n", + "\n", + "print(f\"Total number of chunks: {len(chunks)}\")\n", + "print(f\"Document types found: {set(doc.metadata['doc_type'] for doc in documents)}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "zSjwqZ3YNBLp" + }, + "outputs": [], + "source": [ + "hf_token = userdata.get('HF_TOKEN')\n", + "login(hf_token, add_to_git_credential=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "t7rraUyHNkdP" + }, + "outputs": [], + "source": [ + "Phi_4 = \"microsoft/Phi-4-mini-instruct\"\n", + "db_name = \"/content/drive/MyDrive/phi_vector_db\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "pDjj2S5ZPzF1" + }, + "outputs": [], + "source": [ + "quant_config = BitsAndBytesConfig(\n", + " load_in_4bit=True,\n", + " bnb_4bit_use_double_quant=True,\n", + " bnb_4bit_compute_dtype=torch.bfloat16,\n", + " bnb_4bit_quant_type=\"nf4\"\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 66, + "referenced_widgets": [ + "2a0377fc1e0c4c08944be1857c4e2409", + "7c8335e0c3f8459d89f3b9815a896e39", + "0fcb91f0551a4871b747f82e5fa6ff38", + "fa5c6cf8395840e08e2743d6e88190be", + "8613224ada934e7ba57fd5184ea61044", + "1180c8fe49e94873a024d38d33649852", + "4395c417cc854fc48da18d0ddd62671e", + "d678106a6601478cb5712991604788f0", + "5c4a8d25dbc942d5a596c8fa8580a785", + "c1b076c063e04536831d68e5e48f1692", + "9bcee7f185434cd0b1a998448236548c" + ] + }, + "id": "qzQzgir5VUBF", + "outputId": "1e7198a3-4857-49ab-f368-d430beddbf42" + }, + "outputs": [], + "source": [ + "tokenizer = AutoTokenizer.from_pretrained(Phi_4, trust_remote_code=True)\n", + "tokenizer.pad_token = tokenizer.eos_token\n", + "tokenizer.padding_side = \"right\"\n", + "\n", + "base_model = AutoModelForCausalLM.from_pretrained(\n", + " Phi_4,\n", + " quantization_config=quant_config,\n", + " device_map=\"auto\",\n", + ")\n", + "base_model.generation_config.pad_token_id = tokenizer.pad_token_id\n", + "\n", + "print(f\"Memory footprint: {base_model.get_memory_footprint() / 1e9:.1f} GB\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "MjK3mBKHQBra" + }, + "outputs": [], + "source": [ + "from langchain.embeddings.base import Embeddings\n", + "from typing import List\n", + "import torch.nn.functional as F" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Q1BIMVW4Pf0A" + }, + "outputs": [], + "source": [ + "class PHI4Embeddings(Embeddings):\n", + " def __init__(self, tokenizer, model):\n", + " self.tokenizer = tokenizer\n", + " self.model = model\n", + " self.model.eval()\n", + "\n", + " def embed_documents(self, texts: List[str]) -> List[List[float]]:\n", + " embeddings = []\n", + " for text in texts:\n", + " with torch.no_grad():\n", + " inputs = self.tokenizer(text, return_tensors=\"pt\", truncation=True, max_length=512).to(self.model.device)\n", + " outputs = self.model(**inputs, output_hidden_states=True)\n", + " hidden_states = outputs.hidden_states[-1] # Last layer\n", + " attention_mask = inputs[\"attention_mask\"].unsqueeze(-1)\n", + " pooled = (hidden_states * attention_mask).sum(dim=1) / attention_mask.sum(dim=1)\n", + " normalized = F.normalize(pooled, p=2, dim=1)\n", + " embeddings.append(normalized[0].cpu().tolist())\n", + " return embeddings\n", + "\n", + " def embed_query(self, text: str) -> List[float]:\n", + " return self.embed_documents([text])[0]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "7aUTue_mMxof" + }, + "outputs": [], + "source": [ + "# Put the chunks of data into a Vector Store that associates a Vector Embedding with each chunk\n", + "\n", + "embeddings = PHI4Embeddings(tokenizer, base_model)\n", + "\n", + "# Delete if already exists\n", + "\n", + "if os.path.exists(db_name):\n", + " Chroma(persist_directory=db_name, embedding_function=embeddings).delete_collection()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "uWSe-8mATUag", + "outputId": "296804af-2283-435a-908c-48adaa6b4fd9" + }, + "outputs": [], + "source": [ + "# Create vectorstore\n", + "vectorstore = Chroma.from_documents(documents=chunks, embedding=embeddings, persist_directory=db_name)\n", + "print(f\"Vectorstore created with {vectorstore._collection.count()} documents\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "1ZQ6agxtSLp5", + "outputId": "8e5bf8a7-fbaf-427b-9a67-369945aba80e" + }, + "outputs": [], + "source": [ + "# Let's investigate the vectors\n", + "\n", + "collection = vectorstore._collection\n", + "count = collection.count()\n", + "\n", + "sample_embedding = collection.get(limit=1, include=[\"embeddings\"])[\"embeddings\"][0]\n", + "dimensions = len(sample_embedding)\n", + "print(f\"There are {count:,} vectors with {dimensions:,} dimensions in the vector store\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "qBIOPr2YT5FM" + }, + "outputs": [], + "source": [ + "# Prework\n", + "result = collection.get(include=['embeddings', 'documents', 'metadatas'])\n", + "vectors = np.array(result['embeddings'])\n", + "documents = result['documents']\n", + "metadatas = result['metadatas']\n", + "doc_types = [metadata['doc_type'] for metadata in metadatas]\n", + "colors = [['blue', 'red'][['personal', 'profile'].index(t)] for t in doc_types]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 617 + }, + "id": "fnuul36bUB3h", + "outputId": "f6cf1650-910a-4a03-f92d-9c200fb37de7" + }, + "outputs": [], + "source": [ + "# We humans find it easier to visalize things in 2D!\n", + "# Reduce the dimensionality of the vectors to 2D using t-SNE\n", + "# (t-distributed stochastic neighbor embedding)\n", + "\n", + "tsne = TSNE(n_components=2, random_state=42, perplexity=4)\n", + "reduced_vectors = tsne.fit_transform(vectors)\n", + "\n", + "# Create the 2D scatter plot\n", + "fig = go.Figure(data=[go.Scatter(\n", + " x=reduced_vectors[:, 0],\n", + " y=reduced_vectors[:, 1],\n", + " mode='markers',\n", + " marker=dict(size=5, color=colors, opacity=0.8),\n", + " text=[f\"Type: {t}
Text: {d[:100]}...\" for t, d in zip(doc_types, documents)],\n", + " hoverinfo='text'\n", + ")])\n", + "\n", + "fig.update_layout(\n", + " title='2D Chroma Vector Store Visualization',\n", + " scene=dict(xaxis_title='x',yaxis_title='y'),\n", + " width=800,\n", + " height=600,\n", + " margin=dict(r=20, b=10, l=10, t=40)\n", + ")\n", + "\n", + "fig.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 717 + }, + "id": "Dgaeb7aRUF5d", + "outputId": "47546459-e169-4d2b-d0d7-4ebd135556e0" + }, + "outputs": [], + "source": [ + "# Let's try 3D!\n", + "\n", + "tsne = TSNE(n_components=3, random_state=42, perplexity=4)\n", + "reduced_vectors = tsne.fit_transform(vectors)\n", + "\n", + "# Create the 3D scatter plot\n", + "fig = go.Figure(data=[go.Scatter3d(\n", + " x=reduced_vectors[:, 0],\n", + " y=reduced_vectors[:, 1],\n", + " z=reduced_vectors[:, 2],\n", + " mode='markers',\n", + " marker=dict(size=5, color=colors, opacity=0.8),\n", + " text=[f\"Type: {t}
Text: {d[:100]}...\" for t, d in zip(doc_types, documents)],\n", + " hoverinfo='text'\n", + ")])\n", + "\n", + "fig.update_layout(\n", + " title='3D Chroma Vector Store Visualization',\n", + " scene=dict(xaxis_title='x', yaxis_title='y', zaxis_title='z'),\n", + " width=900,\n", + " height=700,\n", + " margin=dict(r=20, b=10, l=10, t=40)\n", + ")\n", + "\n", + "fig.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "BZcCyGI3YEwJ", + "outputId": "fd03e6ee-2ec1-4c6b-c14b-986255ca070c" + }, + "outputs": [], + "source": [ + "from langchain.llms import HuggingFacePipeline\n", + "from transformers import pipeline\n", + "\n", + "pipe = pipeline(\n", + " \"text-generation\",\n", + " model=base_model,\n", + " tokenizer=tokenizer,\n", + " max_new_tokens=4069,\n", + " return_full_text=False,\n", + " temperature=0.7\n", + ")\n", + "\n", + "llm = HuggingFacePipeline(pipeline=pipe)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "WDY8-1gJUM1v" + }, + "outputs": [], + "source": [ + "# set up the conversation memory for the chat\n", + "from langchain.schema import SystemMessage\n", + "memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)\n", + "# memory.chat_memory.add_message(SystemMessage(content='''You are a helpful assistant that answers questions about Sameer Khadatkar **in English only**, based only on the retrieved documents.\n", + "# Do not respond in any other language.'''))\n", + "\n", + "# the retriever is an abstraction over the VectorStore that will be used during RAG\n", + "retriever = vectorstore.as_retriever(k=2)\n", + "\n", + "# putting it together: set up the conversation chain with the GPT 3.5 LLM, the vector store and memory\n", + "conversation_chain = ConversationalRetrievalChain.from_llm(llm=llm, retriever=retriever, memory=memory)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "dkuv5wD6jCrX" + }, + "outputs": [], + "source": [ + "def extract_first_helpful_answer(output: str) -> str:\n", + " if \"Helpful Answer:\" in output:\n", + " parts = output.split(\"Helpful Answer:\")\n", + " return parts[0].strip().split(\"\\n\")[0].strip() # Take only the first line after it\n", + " return output.strip()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ZY5BH4C3UY1E" + }, + "outputs": [], + "source": [ + "query = \"Who is Sameer\"\n", + "result = conversation_chain.invoke({\"question\": query})" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "7n5PcQw0iRjO", + "outputId": "794c4dad-efde-4220-a9bd-50a1ae156229" + }, + "outputs": [], + "source": [ + "print(extract_first_helpful_answer(result[\"answer\"]))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "vW025q5Tkwc3", + "outputId": "e57d34e5-a64c-4e0b-e29b-d887214331c4" + }, + "outputs": [], + "source": [ + "result" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "JIev764VkCht" + }, + "outputs": [], + "source": [ + "# set up a new conversation memory for the chat\n", + "memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)\n", + "\n", + "# putting it together: set up the conversation chain with the GPT 4o-mini LLM, the vector store and memory\n", + "conversation_chain = ConversationalRetrievalChain.from_llm(llm=llm, retriever=retriever, memory=memory)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "OO9o_VBholCx" + }, + "outputs": [], + "source": [ + "# Wrapping that in a function\n", + "\n", + "def chat(question, history):\n", + " result = conversation_chain.invoke({\"question\": question})\n", + " return extract_first_helpful_answer(result[\"answer\"])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 646 + }, + "id": "zOqiuWqCo04a", + "outputId": "fcb89961-1687-4d54-fcdd-ca5c590d69de" + }, + "outputs": [], + "source": [ + "# And in Gradio:\n", + "\n", + "view = gr.ChatInterface(chat, type=\"messages\").launch(inbrowser=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "qIYSDiQUo5WX" + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "gpuType": "T4", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.11" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "0fcb91f0551a4871b747f82e5fa6ff38": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_d678106a6601478cb5712991604788f0", + "max": 2, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_5c4a8d25dbc942d5a596c8fa8580a785", + "value": 2 + } + }, + "1180c8fe49e94873a024d38d33649852": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "2a0377fc1e0c4c08944be1857c4e2409": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_7c8335e0c3f8459d89f3b9815a896e39", + "IPY_MODEL_0fcb91f0551a4871b747f82e5fa6ff38", + "IPY_MODEL_fa5c6cf8395840e08e2743d6e88190be" + ], + "layout": "IPY_MODEL_8613224ada934e7ba57fd5184ea61044" + } + }, + "4395c417cc854fc48da18d0ddd62671e": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "5c4a8d25dbc942d5a596c8fa8580a785": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "7c8335e0c3f8459d89f3b9815a896e39": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_1180c8fe49e94873a024d38d33649852", + "placeholder": "​", + "style": "IPY_MODEL_4395c417cc854fc48da18d0ddd62671e", + "value": "Loading checkpoint shards: 100%" + } + }, + "8613224ada934e7ba57fd5184ea61044": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "9bcee7f185434cd0b1a998448236548c": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "c1b076c063e04536831d68e5e48f1692": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "d678106a6601478cb5712991604788f0": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "fa5c6cf8395840e08e2743d6e88190be": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_c1b076c063e04536831d68e5e48f1692", + "placeholder": "​", + "style": "IPY_MODEL_9bcee7f185434cd0b1a998448236548c", + "value": " 2/2 [00:41<00:00, 19.69s/it]" + } + } + } + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/week5/community-contributions/Personal Knowledge Worker/sameer-db/personal/sameer.md b/week5/community-contributions/Personal Knowledge Worker/sameer-db/personal/sameer.md new file mode 100644 index 0000000..c585424 --- /dev/null +++ b/week5/community-contributions/Personal Knowledge Worker/sameer-db/personal/sameer.md @@ -0,0 +1,23 @@ +# Sameer Khadatkar + +Hi, I am **Sameer Khadatkar**, born and brought up in **Nagpur**. + +I completed my schooling from **Dinanath Junior College and High School, Nagpur** up to 12th standard. After that, I moved to **Amravati** for my Bachelor's degree. + +### Academic Journey +I prepared for the **GATE Mechanical Engineering (ME)** exam: +- **2020**: Rank **377** + +With this rank, I secured admission to the prestigious **Indian Institute of Science (IISc), Bangalore**. + +### Career +I later got placed at **Wells Fargo**, Hyderabad. + +### Personal Life +- I got married to my batchmate from Government College of Engineering Amravati. + +### Hobbies & Interests +I played **Cycle Polo** up to my 8th standard and even competed at the **national level**. + +### Family +- Parents, elder sister and wife. diff --git a/week5/community-contributions/Personal Knowledge Worker/sameer-db/profile/Profile.md b/week5/community-contributions/Personal Knowledge Worker/sameer-db/profile/Profile.md new file mode 100644 index 0000000..d9853cd --- /dev/null +++ b/week5/community-contributions/Personal Knowledge Worker/sameer-db/profile/Profile.md @@ -0,0 +1,145 @@ +# Sameer Raju Khadatkar + +**Quant AI/ML @ Wells Fargo | M.Tech. (CDS) @ IISc, Bangalore | B.Tech. (Mechanical) @ GCOE, Amravati** +📍 Hyderabad, Telangana, India +📧 sameer123khadatkar@gmail.com +🔗 [LinkedIn](https://www.linkedin.com/in/sameer-khadatkar/) + +--- + +## Summary + +I currently serve as a Quantitative Analytics Specialist within Wells Fargo's Model Risk Management (MRM) team at India and Philippines. My primary responsibility involves validating AI/ML models, with a focus on fraud detection, as well as models used in marketing, credit scoring, and natural language processing (NLP). In this role, I ensure the conceptual soundness of models, conduct performance testing, conduct explainability analysis and rigorously challenge models by developing challenger models to detect weaknesses. + +Additionally, I ensure compliance with regulatory standards set by Wells Fargo, in alignment with guidelines from the Federal Reserve and the OCC. I work closely with model development and risk management teams, providing validation feedback and recommending improvements. I also contribute to documentation and reporting, preparing validation reports, and ensuring the ongoing monitoring of model performance. + +With a strong foundation in Machine Learning, Deep Learning, and High-Performance Computing gained during my graduate studies at the Indian Institute of Science, Bangalore, and a Bachelor's degree in Mechanical Engineering, I bring a unique blend of skills at the intersection of advanced technology and engineering. My expertise allows me to tackle complex challenges, drive innovation, and contribute to cutting-edge solutions in diverse industries. + +--- + +## Professional Experience + +### Wells Fargo International Solutions Private Ltd +**Quantitative Analytics Specialist – AVP** +📍 Hyderabad, Telangana, India +📅 August 2022 – September 2023 + +- Collaborating with a team overseeing an inventory of ∼300 models focused on Fraud Detection, primarily utilizing Logistic Regression, Extreme Gradient Boosting (XGBoost), and Neural Network models. +- Conduct validation of AI/ML models by ensuring conceptual soundness, performing performance testing, carrying out explainability analysis, and developing surrogate, challenger, and offset models to uncover potential weaknesses. +- Joined the team during its expansion in India, playing a key role in building trust with US stakeholders. Recognized with the **Manager’s Spotlight Award** for outstanding dedication and contributions. +- Developing a module to assist Validators in benchmarking anomaly detection models (Isolation Forest, Extended Isolation Forest, Autoencoders, Histogram-Based Outlier Score (HBOS), etc.) and assessing them using clustering performance metrics. +- Created a validation playbook for fraud detection vendor models and developed an Excel-based policy library to facilitate quick reference for team members. + +--- + +## Highlighted Projects at Wells Fargo + +### ✅ Check Authorization Model | Validation + +- Validated a high-impact machine learning model for check authorization, ensuring compliance with regulatory and bank's MRM standards. +- Reviewed model objectives, assumptions, architecture, and data pipeline. +- Assessed performance using AUC, recall, KS statistic, and PSI across time. +- Performed explainability analysis using multicollinearity checks, surrogate models (overall and segment level), SHAP, PDP, H-Statistic, 2D-PDPs, and sensitivity analysis. +- Identified local weaknesses through segmentation and built offset models to detect missed signals. +- Developed challenger models using YOLOv5, SigNet, TrOCR (Transformer-based OCR), XGBoost model, and pixel-based feature engineering. + +### 🧠 Word Embedding Explainability Research + +- Collaborated with the Bank’s Chief Model Risk Officer on a research project focused on the explainability of word embeddings using clustering techniques such as Spectral Clustering, HDBSCAN, and analysis of ReLU neural network activation patterns. +- Utilized Sentence Transformer embeddings (SBERT) and applied dimensionality reduction methods including PCA, UMAP, and t-SNE for cluster interpretation and visualization. +- Extended the research by developing a Mixture of Experts model leveraging XGBoost. + +--- + +## Education + +**Indian Institute of Science (IISc), Bangalore** +📅 2020 – 2022 +🎓 Master of Technology (M.Tech.), Computational and Data Sciences +📍 Bengaluru, Karnataka +**CGPA:** 9.1 / 10.0 + +**Government College of Engineering, Amravati (GCoEA)** +📅 2015 – 2019 +🎓 Bachelor of Technology (B.Tech.), Mechanical Engineering +📍 Amravati, Maharashtra +**CGPA:** 8.29 / 10.0 + +--- + +## Certifications + +- Advanced Data Science with IBM (Coursera) +- HYPERMESH (SHELL MESH AND SOLID MESH) +- Introduction to Big Data (Coursera) +- MASTERCAM (Design, Turning and Milling) +- CREO PARAMETRIC + +--- + +## Research Publication + +**Subspace Recursive Fermi-Operator Expansion Strategies for Large-Scale DFT Eigenvalue Problems on HPC Architectures** +📝 Sameer Khadatkar, Phani Motamarri (MATRIX Lab) +📅 July 20, 2023 +📚 *Journal of Chemical Physics, 159, 031102 (2023)* +🔗 [Publication Link](https://pubs.aip.org/aip/jcp/article/159/3/031102/2903241/Subspace-recursive-Fermi-operator-expansion) + +- Implemented recursive Fermi-operator expansion methods on multi-node CPU (PARAM Pravega) and GPU (ORNL Summit) systems for large-scale DFT problems. +- Applied mixed-precision strategies achieving 2× to 4× speedup over diagonalization. +- Benchmarked using MPI and SLATE for distributed dense linear algebra. + +--- + +## Academic, Independent and Other Projects + +- **LLM-Powered Multimodal Airline Chatbot**: Built a chatbot with GPT-4o-mini, supporting both text and voice, generating pop-art city images. Stack: Python, Gradio, custom tools. +- **Future Stock Price Prediction for MAANG**: Used yfinance, Stateful LSTM vs XGBoost. LSTM outperformed with ~0.02 MAE. +- **Duplicate Question Detection**: LSTM Siamese Network with Word2Vec and GloVe. GloVe performed better. +- **Music Genre Classification**: Used MFCCs and spectral features. Best result: 76% ± 3% accuracy with SVM. +- **Algorithm Implementation from Scratch**: PCA, LDA, GMM, TF-IDF, and backpropagation for DNNs. + +--- + +## Skills + +**Knowledge Areas:** +Model Risk Management, Machine Learning, Deep Learning, High-Performance Computing + +**Programming Languages:** +Python, C, C++ (OpenMP, MPI, CUDA), SQL + +**Python Libraries & Tools:** +Numpy, Pandas, Scikit-Learn, PyTorch, TensorFlow (Keras), PySpark, Matplotlib + +--- + +## Relevant Courses + +- Machine Learning for Signal Processing (IISc) +- Advanced Data Science with IBM (Coursera) +- Deep Learning (NPTEL) +- Pattern Recognition and Neural Networks (NPTEL) +- Numerical Linear Algebra (IISc) +- Data Analysis and Visualization (IISc) +- Numerical Solution of Differential Equations (IISc) +- Parallel Programming (IISc) +- Introduction to Big Data (Coursera) +- LLM Engineering: Master AI, Large Language Models & Agents (Udemy) + +--- + +## Extracurricular Activities + +- **Project Associate** at MATRIX Lab, CDS Department, IISc. +- **Teaching Assistant** for “DS284: Numerical Linear Algebra” at IISc. +- Led suspension operations for SAE BAJA Team at GCoE Amravati. +- Organized Annual Social Gathering as Joint Secretary at GCoE Amravati. + +--- + +## Top Skills + +- Data Reporting +- SQL +- Microsoft Excel diff --git a/week6/community-contributions/lisekarimi/09_part1_data_curation.ipynb b/week6/community-contributions/lisekarimi/09_part1_data_curation.ipynb new file mode 100644 index 0000000..cee891a --- /dev/null +++ b/week6/community-contributions/lisekarimi/09_part1_data_curation.ipynb @@ -0,0 +1,716 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "40978455-23da-4159-bf08-15d9e8f79984", + "metadata": {}, + "source": [ + "# 🔍 Predicting Item Prices from Descriptions (Part 1)\n", + "A complete pipeline from raw text to fine-tuned frontier and open source models\n", + "\n", + "---\n", + "In this project, we aim to **predict item prices based solely on their textual descriptions**. \n", + "\n", + "We approach the problem with a structured 8-part pipeline:\n", + "\n", + "- 🧩 **Part 1: Data Curation & Preprocessing** : We aggregate, clean, analyze, and balance the dataset — then export it in .pkl format and save it in the HuggingFace Hub for the next step: model training and evaluation.\n", + "\n", + "- ⚔️ **Part 2: Traditional ML vs Frontier LLMs** : We compare traditional machine learning models (LR, SVR, XGBoost) using vectorized text inputs (BoW, Word2Vec) against LLMs like GPT-4o, LLaMA, Deepseek ... ❗ Who will predict better: handcrafted features or massive pretraining?\n", + "\n", + "- 🧠 **Part 3: E5 Embeddings & RAG** : We compare XGBoost on **contextual dense embeddings** vs. Word2Vec, and test if **RAG** boosts GPT-4o Mini’s price predictions. 📦 Do contextual embeddings and retrieval improve price prediction?\n", + "\n", + "- 🔧 **Part 4: Fine-Tuning GPT-4o Mini** : We fine-tune GPT-4o Mini on our curated dataset and compare performance before and after.\n", + "🤖 Can a fine-tuned GPT-4o Mini beat its own zero-shot performance?\n", + "\n", + "- 🦙 **Part 5: Evaluating LLaMA 3.1 8B Quantized** : We run LLaMA 3.1 (8B, quantized) using the same evaluation setup to see how well an open-source base model performs with no fine-tuning.\n", + "\n", + "- ⚙️ **Part 6: Fine-Tuning LLaMA 3.1 with QLoRA** : We fine-tune LLaMA 3.1 using QLoRA and explore key hyperparameters, tracking **training and validation loss** to monitor overfitting and select the best configuration.\n", + "\n", + "- 🧪 **Part 7: Evaluating Fine-Tuned LLaMA 3.1 8B (Quantized)** : After fine-tuning LLaMA 3.1, it's time to evaluate its performance and see how it stacks up against other models. Let's dive into the results.\n", + "\n", + "- 🏆**Part 8: Summary & Leaderboard** : Who comes out on top? Let’s find out. We wrap up with final model rankings and key insights across ML, embeddings, RAG, and fine-tuned frontier and open-source models.\n", + "\n", + "---\n", + "- ➡️ Data Curation & Preprocessing\n", + "- Model Benchmarking – Traditional ML vs LLMs\n", + "- E5 Embeddings & RAG\n", + "- Fine-Tuning GPT-4o Mini\n", + "- Evaluating LLaMA 3.1 8B Quantized\n", + "- Fine-Tuning LLaMA 3.1 with QLoRA\n", + "- Evaluating Fine-Tuned LLaMA \n", + "- Summary & Leaderboard\n", + "\n", + "---\n", + "\n", + "Let’s begin with Part 1.\n", + "\n", + "# 🧩 Part 1: Data Curation & Preprocessing\n", + "\n", + "- Tasks:\n", + " - Load and filter dataset, then prepare each datapoint\n", + " - Explore, visualize, balance price distribution\n", + " - Export .pkl, upload to HF Hub\n", + "- 🧑‍💻 Skill Level: Advanced\n", + "- ⚙️ Hardware: ✅ CPU is sufficient — no GPU required\n", + "- 🛠️ Requirements: 🔑 Hugging Face Token\n", + "\n", + "---\n", + "📢 Find more LLM notebooks on my [GitHub repository](https://github.com/lisekarimi/lexo)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "dcf2f470", + "metadata": {}, + "outputs": [], + "source": [ + "!uv pip install transformers" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ddbb5eb0-9ab7-4675-b195-0bf4055b9320", + "metadata": {}, + "outputs": [], + "source": [ + "# imports\n", + "\n", + "import os\n", + "import sys\n", + "import random\n", + "import pickle\n", + "import importlib\n", + "from dotenv import load_dotenv\n", + "from huggingface_hub import login\n", + "from datasets import Dataset, DatasetDict\n", + "from collections import Counter, defaultdict\n", + "import matplotlib.pyplot as plt\n", + "%matplotlib inline\n", + "import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fa916b7a-9044-4461-b29a-815d47973e75", + "metadata": {}, + "outputs": [], + "source": [ + "# import datasets\n", + "# print(datasets.__version__)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e6cf6e19-1276-4b37-8f9b-6acf1473a7c6", + "metadata": {}, + "outputs": [], + "source": [ + "# environment\n", + "\n", + "load_dotenv(override=True)\n", + "hf_token = os.getenv('HF_TOKEN')\n", + "if not hf_token:\n", + " print(\"❌ HF_TOKEN is missing\")\n", + "\n", + "login(hf_token, add_to_git_credential=True)" + ] + }, + { + "cell_type": "markdown", + "id": "a1637a14-b2df-4286-a8d6-ddae413f4a8a", + "metadata": {}, + "source": [ + "## ⚙️ Data Loading & Curation (Simultaneously)\n", + "We load and curate the data at the same time using loaders.py and items.py.\n", + "- Datasets come from: https://huggingface.co/datasets/McAuley-Lab/Amazon-Reviews-2023/tree/main/raw/meta_categories\n", + "- `loaders.py` handles parallel loading and filtering of products\n", + "- `items.py` defines the Item class to clean, validate, and prepare each datapoint (title, description, price...) for modeling.\n", + "\n", + "\n", + "🛠️ Note: Data is filtered to include items priced between 1 and 999 USD.\n", + "\n", + "💡 Comments have been added in both files to clarify the processing logic.\n", + "\n", + "⚠️ Loading 2.8M+ items can take 40+ mins on a regular laptop.\n", + "\n", + "⚠️ Set WORKER wisely in `loaders.py` to match your system capacity. Too many may crash your machine." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8b89273c-e02f-4c15-8394-5d948a266bfc", + "metadata": {}, + "outputs": [], + "source": [ + "sys.path.append('./helpers')\n", + "import helpers.items\n", + "import helpers.loaders\n", + "\n", + "importlib.reload(helpers.items)\n", + "importlib.reload(helpers.loaders)\n", + "\n", + "from helpers.items import Item # noqa: E402\n", + "from helpers.loaders import ItemLoader # noqa: E402" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "260a123b-8f34-4c66-bcac-1c3b25e95d7f", + "metadata": {}, + "outputs": [], + "source": [ + "dataset_names = [\n", + " \"Automotive\",\n", + " \"Electronics\",\n", + " \"Office_Products\",\n", + " \"Tools_and_Home_Improvement\",\n", + " \"Cell_Phones_and_Accessories\",\n", + " \"Toys_and_Games\",\n", + " \"Appliances\",\n", + " \"Musical_Instruments\",\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9b482032-cba9-4ee9-9451-9b7dc9f41be6", + "metadata": {}, + "outputs": [], + "source": [ + "items = []\n", + "for dataset_name in dataset_names:\n", + " loader = ItemLoader(dataset_name)\n", + " items.extend(loader.load())\n", + "\n", + "# Now, time for a coffee break!!\n", + "# By the way, the larger datasets first... it speeds up the process." + ] + }, + { + "cell_type": "markdown", + "id": "145d0648-e01d-46b9-ad42-f10b69fccbc3", + "metadata": {}, + "source": [ + "## 🔍 Inspecting a Sample Datapoint" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0185985d-5f67-4e4b-ac66-95b5b293231f", + "metadata": {}, + "outputs": [], + "source": [ + "print(f\"A grand total of {len(items):,} items\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2b0c0ae8-c0ec-4f6f-b847-800da379c01b", + "metadata": {}, + "outputs": [], + "source": [ + "# Investigate the first item from the list\n", + "\n", + "datapoint = items[0]\n", + "\n", + "# Access various attributes\n", + "title = datapoint.title\n", + "details = datapoint.details\n", + "price = datapoint.price\n", + "category = datapoint.category\n", + "\n", + "print(f\"Datapoint: {datapoint}\")\n", + "print('*' * 40)\n", + "print(f\"Title: {title}\")\n", + "print('*' * 40)\n", + "print(f\"Detail: {details}\")\n", + "print('*' * 40)\n", + "print(f\"Price: ${price}\")\n", + "print('*' * 40)\n", + "print(f\"Category: {category}\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e05ed6e4-1cbc-46a4-be2f-4832b99e5ec3", + "metadata": {}, + "outputs": [], + "source": [ + "# The prompt that will be used during training\n", + "print(items[0].prompt)\n", + "print('*' * 40)\n", + "# The prompt that will be used during testing\n", + "print(items[0].test_prompt())" + ] + }, + { + "cell_type": "markdown", + "id": "f66e714d-2bae-458e-a0f6-1ce78d0696b3", + "metadata": {}, + "source": [ + "## 📊 Data Visualization" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "dd50ae2c-b34e-4be7-bd74-62055e4d5b2d", + "metadata": {}, + "outputs": [], + "source": [ + "plt.figure(figsize=(15, 6))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c736b038-2dcd-40b9-8ae9-d17271f1ff81", + "metadata": {}, + "outputs": [], + "source": [ + "# Plot the distribution of token counts\n", + "\n", + "tokens = [item.token_count for item in items]\n", + "plt.title(f\"Token counts: Avg {sum(tokens)/len(tokens):,.1f} and highest {max(tokens):,}\\n\")\n", + "plt.xlabel('Length (tokens)')\n", + "plt.ylabel('Count')\n", + "plt.hist(tokens, rwidth=0.7, color=\"blue\", bins=range(0, 300, 10))\n", + "plt.show()" + ] + }, + { + "attachments": { + "image.png": { + "image/png": "" + } + }, + "cell_type": "markdown", + "id": "940ba698", + "metadata": {}, + "source": [ + "![image.png](attachment:image.png)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "da33633a-7ad5-479c-8dff-f7a7a149d49c", + "metadata": {}, + "outputs": [], + "source": [ + "# Plot the distribution of prices\n", + "\n", + "prices = [item.price for item in items]\n", + "plt.title(f\"Prices: Avg {sum(prices)/len(prices):,.1f} and highest {max(prices):,}\\n\")\n", + "plt.xlabel('Price ($)')\n", + "plt.ylabel('Count')\n", + "plt.hist(prices, rwidth=0.7, color=\"blueviolet\", bins=range(0, 1000, 10))\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d0f494d7-349e-4878-929c-075ac97c6b6d", + "metadata": {}, + "outputs": [], + "source": [ + "# Plot the distribution of categories\n", + "\n", + "category_counts = Counter()\n", + "for item in items:\n", + " category_counts[item.category]+=1\n", + "\n", + "categories = category_counts.keys()\n", + "counts = [category_counts[category] for category in categories]\n", + "\n", + "# Bar chart by category\n", + "plt.bar(categories, counts, color=\"goldenrod\")\n", + "plt.title('How many items in each category')\n", + "plt.xlabel('Categories')\n", + "plt.ylabel('Count')\n", + "\n", + "plt.xticks(rotation=30, ha='right')\n", + "\n", + "# Add value labels on top of each bar\n", + "for i, v in enumerate(counts):\n", + " plt.text(i, v, f\"{v:,}\", ha='center', va='bottom')\n", + "\n", + "# Display the chart\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "d4fe384d-049b-4742-98e5-20d162db5151", + "metadata": {}, + "source": [ + "## 🎯 Data Sampling\n", + "\n", + "We sample to keep the dataset balanced but rich:\n", + "- 🎯 Keep all items if price ≥ $240 or group size ≤ 1200\n", + "- 🎯 For large groups, randomly sample 1200 items, favoring rare categories\n", + "\n", + "✅ This keeps valuable high-price items and avoids overrepresented classes" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "20330037-744d-4834-8ece-413a8dbe2030", + "metadata": {}, + "outputs": [], + "source": [ + "HEAVY_DATASET = \"Automative\"\n", + "\n", + "# Group items by rounded price\n", + "# Slots is a dictionary where the keys are rounded prices and the values are lists of items that have that rounded price\n", + "slots = defaultdict(list)\n", + "for item in items:\n", + " slots[round(item.price)].append(item)\n", + "\n", + "np.random.seed(42) # Set random seed for reproducibility\n", + "sample = [] # Final collection of items after our sampling process completes\n", + "\n", + "# Sampling loop\n", + "for price, items_at_price in slots.items():\n", + "\n", + " # Take all items if price ≥ 240 or small group\n", + " if price >= 240 or len(items_at_price) <= 1200:\n", + " sample.extend(items_at_price)\n", + "\n", + " # Otherwise sample 1200 items with weights\n", + " else:\n", + "\n", + " # Weight: 1 for toys, 5 for others\n", + " weights = [1 if item.category == HEAVY_DATASET else 5 for item in items_at_price]\n", + " weights = np.array(weights) / sum(weights)\n", + "\n", + " indices = np.random.choice(len(items_at_price), 1200, False, weights) # False = don't pick the same index twice\n", + " sample.extend([items_at_price[i] for i in indices])\n", + "\n", + "print(f\"There are {len(sample):,} items in the sample\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "21aed337-6f15-48e4-8155-70551ed1d5e0", + "metadata": {}, + "outputs": [], + "source": [ + "# Plot the distribution of prices in the sample\n", + "\n", + "prices = [float(item.price) for item in sample]\n", + "plt.title(f\"Avg {sum(prices)/len(prices):.2f} and highest {max(prices):,.2f}\\n\")\n", + "plt.xlabel('Price ($)')\n", + "plt.ylabel('Count')\n", + "plt.hist(prices, rwidth=0.7, color=\"darkblue\", bins=range(0, 1000, 10))\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "08a7353e-2752-4493-bb0b-6057d1eab16d", + "metadata": {}, + "outputs": [], + "source": [ + "# Plot the distribution of categories in the sample\n", + "\n", + "category_counts = Counter()\n", + "for item in sample:\n", + " category_counts[item.category]+=1\n", + "\n", + "categories = category_counts.keys()\n", + "counts = [category_counts[category] for category in categories]\n", + "\n", + "# Create bar chart\n", + "plt.bar(categories, counts, color=\"pink\")\n", + "\n", + "# Customize the chart\n", + "plt.title('How many in each category')\n", + "plt.xlabel('Categories')\n", + "plt.ylabel('Count')\n", + "\n", + "plt.xticks(rotation=30, ha='right')\n", + "\n", + "# Add value labels on top of each bar\n", + "for i, v in enumerate(counts):\n", + " plt.text(i, v, f\"{v:,}\", ha='center', va='bottom')\n", + "\n", + "# Display the chart\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "9bdb0c58-24e0-4ab5-8a28-2136b53ab915", + "metadata": {}, + "source": [ + "The HEAVY_DATASET still in the lead, but improved somewhat" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4ce8ff80-cd19-4c3b-965f-ce6af8ee347d", + "metadata": {}, + "outputs": [], + "source": [ + "# Create pie chart\n", + "\n", + "fig, ax = plt.subplots(figsize=(8, 8))\n", + "wedges, texts, autotexts = ax.pie(\n", + " counts,\n", + " # labels=categories,\n", + " autopct='%1.0f%%',\n", + " startangle=90,\n", + " pctdistance=0.85,\n", + " labeldistance=1.1\n", + ")\n", + "ax.legend(wedges, categories, title=\"Categories\", loc=\"lower center\", bbox_to_anchor=(0.5, 1.15), ncol=3)\n", + "\n", + "# Draw donut center\n", + "centre_circle = plt.Circle((0, 0), 0.70, fc='white')\n", + "fig.gca().add_artist(centre_circle)\n", + "\n", + "# Add center label\n", + "ax.text(0, 0, \"Categories\", ha='center', va='center', fontsize=14, fontweight='bold')\n", + "\n", + "# Equal aspect ratio\n", + "plt.axis('equal')\n", + "plt.title(\"Category Distribution\")\n", + "plt.tight_layout()\n", + "plt.show()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "acbc6beb-fab4-49ab-bc7e-243638c1fa99", + "metadata": {}, + "outputs": [], + "source": [ + "# How does the price vary with the character count of the prompt?\n", + "\n", + "sizes = [len(item.prompt) for item in sample]\n", + "prices = [item.price for item in sample]\n", + "\n", + "# Create the scatter plot\n", + "plt.scatter(sizes, prices, s=0.2, color=\"red\")\n", + "\n", + "# Add labels and title\n", + "plt.xlabel('Size')\n", + "plt.ylabel('Price')\n", + "plt.title('Is there a simple correlation between prompt length and item price?')\n", + "\n", + "# Display the plot\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "76b060a4-0b8d-495c-bb96-28cb7b7ec623", + "metadata": {}, + "source": [ + "There is no strong or simple correlation between prompt length and item price.\n", + "\n", + "In other words, longer prompts don’t clearly mean higher prices, and vice versa." + ] + }, + { + "cell_type": "markdown", + "id": "0f33211c-3548-4a21-990b-21aa55089186", + "metadata": {}, + "source": [ + "## ✅ Final Check Before Training" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "be8d0c68-ac6e-4a4d-a6c7-64e9c6763ec4", + "metadata": {}, + "outputs": [], + "source": [ + "# Ensure the price label is correctly placed by the end of the prompt\n", + "\n", + "def report(item):\n", + " prompt = item.prompt\n", + " tokens = Item.tokenizer.encode(item.prompt)\n", + " print(prompt)\n", + " print(tokens[-6:])\n", + " print(Item.tokenizer.batch_decode(tokens[-6:]))\n", + "\n", + "report(sample[50])" + ] + }, + { + "cell_type": "markdown", + "id": "656d523d-8297-4d75-a973-a7e5517d21bc", + "metadata": {}, + "source": [ + "LLaMA and GPT-4o both tokenize numbers from 1 to 999 as a single token, while models like Qwen2, Gemma, and Phi-3 split them into multiple tokens. This helps keep prices compact in our prompts — useful for our project, though not strictly required." + ] + }, + { + "cell_type": "markdown", + "id": "e36254ba-d20f-44ad-b991-1f1f3cdc4aaa", + "metadata": {}, + "source": [ + "## 📦 Creating Train/Test Datasets" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5cfb5092-c38d-4c14-8dd0-e1d97c06d7f6", + "metadata": {}, + "outputs": [], + "source": [ + "random.seed(42)\n", + "random.shuffle(sample)\n", + "train = sample[:400_000]\n", + "test = sample[400_000:402_000]\n", + "print(f\"Divided into a training set of {len(train):,} items and test set of {len(test):,} items\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2f084822-e489-4946-8cf5-f5b0ebd7a23c", + "metadata": {}, + "outputs": [], + "source": [ + "print(train[0].prompt)\n", + "print('*' * 40)\n", + "print(test[0].test_prompt())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d49a08ce-dd41-4af8-82f6-4701628e8152", + "metadata": {}, + "outputs": [], + "source": [ + "# Plot the distribution of prices in the first 250 test points\n", + "\n", + "prices = [float(item.price) for item in test[:250]]\n", + "plt.figure(figsize=(15, 6))\n", + "plt.title(f\"Avg {sum(prices)/len(prices):.2f} and highest {max(prices):,.2f}\\n\")\n", + "plt.xlabel('Price ($)')\n", + "plt.ylabel('Count')\n", + "plt.hist(prices, rwidth=0.7, color=\"darkblue\", bins=range(0, 1000, 10))\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0c581439-93f2-422a-924f-fd6c58ef8693", + "metadata": {}, + "outputs": [], + "source": [ + "# Extract prompts and prices\n", + "train_prompts = [item.prompt for item in train]\n", + "train_prices = [item.price for item in train]\n", + "test_prompts = [item.test_prompt() for item in test]\n", + "test_prices = [item.price for item in test]\n", + "\n", + "# Create Hugging Face datasets\n", + "train_dataset = Dataset.from_dict({\"text\": train_prompts, \"price\": train_prices})\n", + "test_dataset = Dataset.from_dict({\"text\": test_prompts, \"price\": test_prices})\n", + "dataset = DatasetDict({\n", + " \"train\": train_dataset,\n", + " \"test\": test_dataset\n", + "})\n", + "\n", + "# Save full Item objects\n", + "os.makedirs(\"data\", exist_ok=True) # Make sure the folder exists\n", + "\n", + "# Save full Item objects to the folder\n", + "with open('data/train.pkl', 'wb') as file:\n", + " pickle.dump(train, file)\n", + "\n", + "with open('data/test.pkl', 'wb') as file:\n", + " pickle.dump(test, file)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3914d029-350e-4140-a31f-e931fa289a41", + "metadata": {}, + "outputs": [], + "source": [ + "# Push to the Hugging Face Hub\n", + "USERNAME = \"lisekarimi\" # 🔧 Replace with your Hugging Face username\n", + "DATASET_NAME = f\"{USERNAME}/pricer-data\"\n", + "\n", + "dataset.push_to_hub(DATASET_NAME, private=True)" + ] + }, + { + "cell_type": "markdown", + "id": "3d8f3b33-41f8-4ee6-96ed-27677ffc8ec4", + "metadata": {}, + "source": [ + "**Note:** \n", + "- The dataset `pricer-data` on Hugging Face only contains `text` and `price`:\n", + "\n", + "\n", + "{\n", + " \"text\": \"How much does this cost...Price is $175.00\",\n", + " \"price\": 175.0\n", + "}\n", + "\n", + "- Full `Item` objects (with metadata) are available in `train.pkl` and `test.pkl`:\n", + "\n", + "Item(data={\n", + " \"title\": str,\n", + " \"description\": list[str],\n", + " \"features\": list[str],\n", + " \"details\": str\n", + "}, price=float)\n", + "\n", + "\n", + "Now, it’s time to move on to **Part 2: Model Benchmarking – Traditional ML vs Frontier LLMs.**\n", + "\n", + "🔜 See you in the [next notebook](https://github.com/lisekarimi/lexo/blob/main/09_part2_tradml_vs_frontier.ipynb)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.7" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/week6/community-contributions/lisekarimi/09_part2_tradml_vs_frontier.ipynb b/week6/community-contributions/lisekarimi/09_part2_tradml_vs_frontier.ipynb new file mode 100644 index 0000000..b4a8f14 --- /dev/null +++ b/week6/community-contributions/lisekarimi/09_part2_tradml_vs_frontier.ipynb @@ -0,0 +1,779 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "d9b9eaa6-a12f-4cf8-a4c5-e8ac2c15d15b", + "metadata": {}, + "source": [ + "# 🔍 Predicting Item Prices from Descriptions (Part 2)\n", + "---\n", + "- Data Curation & Preprocessing\n", + "- ➡️ Model Benchmarking – Traditional ML vs LLMs\n", + "- E5 Embeddings & RAG\n", + "- Fine-Tuning GPT-4o Mini\n", + "- Evaluating LLaMA 3.1 8B Quantized\n", + "- Fine-Tuning LLaMA 3.1 with QLoRA\n", + "- Evaluating Fine-Tuned LLaMA \n", + "- Summary & Leaderboard\n", + "\n", + "--- \n", + "\n", + "# ⚔️ Part 2: Traditional ML vs LLMs\n", + "\n", + "- Tasks:\n", + " - Vectorize text (BoW, Word2Vec)\n", + " - Train SVR, LR, XGBoost models\n", + " - Predict with LLMs (GPT-4o, Claude, LLaMA…)\n", + " - Compare traditional ML vs LLMs\n", + " \n", + "📊 Which model predicts prices best? Let’s find out.\n", + "\n", + "- 🧑‍💻 Skill Level: Advanced\n", + "- ⚙️ Hardware: ✅ CPU is sufficient — no GPU required\n", + "- 🛠️ Requirements: 🔑 HF Token, Open API Key, Anthropic API key, Groq API key\n", + "\n", + "⚠️ This notebook assumes you're familiar with NLP techniques (e.g., converting text to vectors using Bag-of-Words or Word2Vec) and traditional ML models (like SVR, Logistic Regression, XGBoost) along with basic evaluation metrics.\n", + "\n", + "---\n", + "📢 Find more LLM notebooks on my [GitHub repository](https://github.com/lisekarimi/lexo)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4ce6a892-b357-4132-b9c0-a3142a0244c8", + "metadata": {}, + "outputs": [], + "source": [ + "# imports\n", + "\n", + "import os\n", + "import importlib\n", + "import re\n", + "import csv\n", + "import tiktoken\n", + "import math\n", + "from datasets import load_dataset\n", + "import pandas as pd\n", + "import numpy as np\n", + "from sklearn.linear_model import LinearRegression\n", + "from sklearn.feature_extraction.text import CountVectorizer\n", + "from gensim.models import Word2Vec\n", + "from gensim.utils import simple_preprocess\n", + "from sklearn.svm import LinearSVR\n", + "import xgboost as xgb\n", + "from dotenv import load_dotenv\n", + "from openai import OpenAI\n", + "from anthropic import Anthropic" + ] + }, + { + "cell_type": "markdown", + "id": "6f82b230-2e03-4b1e-9be5-926fcd19acbe", + "metadata": {}, + "source": [ + "## 📥 Load Dataset" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4292a45d", + "metadata": {}, + "outputs": [], + "source": [ + "# #If you face NotImplementedError: Loading a dataset cached in a LocalFileSystem is not supported run:\n", + "# %pip install -U datasets" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "55f1495b-f343-4152-8739-3a99f5ac405d", + "metadata": {}, + "outputs": [], + "source": [ + "HF_USER = \"lisekarimi\"\n", + "DATASET_NAME = f\"{HF_USER}/pricer-data\"\n", + "\n", + "dataset = load_dataset(DATASET_NAME)\n", + "train = dataset['train']\n", + "test = dataset['test']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "85880d79-f1ba-4ee8-a039-b6acea84562c", + "metadata": {}, + "outputs": [], + "source": [ + "print(train[0][\"text\"])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "88842541-d73b-4fae-a550-6dedf8fab633", + "metadata": {}, + "outputs": [], + "source": [ + "print(train[0][\"price\"])" + ] + }, + { + "cell_type": "markdown", + "id": "1e3501c5-a52d-4ace-a988-b86b7e7dbb31", + "metadata": {}, + "source": [ + "## 🛠️ Prepare Data for models" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0a87cd82-127b-4a66-9ad9-90978a2376b5", + "metadata": {}, + "outputs": [], + "source": [ + "def mask_price_value(text):\n", + " return re.sub(r\"(\\n\\nPrice is \\$).*\", r\"\\1\", text)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "84ad6155-2708-4810-80a6-7efcf3bbd886", + "metadata": {}, + "outputs": [], + "source": [ + "# Extract prices\n", + "prices = np.array([float(datapoint[\"price\"]) for datapoint in train])\n", + "\n", + "# Extract cleaned prompts\n", + "documents = [mask_price_value(datapoint[\"text\"]) for datapoint in train]\n", + "\n", + "# Set random seed for reproducibility\n", + "np.random.seed(42)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c1c82371-5e92-4354-a064-38db1b6a8339", + "metadata": {}, + "outputs": [], + "source": [ + "print(documents[0])" + ] + }, + { + "cell_type": "markdown", + "id": "f05dd862-cc64-43d3-a0c3-c3a16d66e1bf", + "metadata": {}, + "source": [ + "## 📊 Model Evaluation with testing.py\n", + "\n", + "- Runs predictions and computes errors on test data\n", + "- Metrics: Absolute error, RMSLE, and hit rate\n", + "- Visual: Scatter plot of predicted vs. actual prices (color-coded)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "45965754-7107-4023-bb33-81730b73db2e", + "metadata": {}, + "outputs": [], + "source": [ + "import helpers.testing\n", + "importlib.reload(helpers.testing)\n", + "\n", + "from helpers.testing import Tester # noqa: E402\n", + "\n", + "results = {} # Store each model's tester to compare and find the best performer" + ] + }, + { + "cell_type": "markdown", + "id": "2d8b08a8-f0a3-468f-91ea-7da60aecc32a", + "metadata": {}, + "source": [ + "## 🎯 Price Prediction with Traditional ML" + ] + }, + { + "cell_type": "markdown", + "id": "35475efe-0751-443a-9605-89e2025c3eb4", + "metadata": {}, + "source": [ + "## Bag-of-Words + Linear Regression" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ded239d6-4dca-439b-8748-67aa2d2fa2a9", + "metadata": {}, + "outputs": [], + "source": [ + "# Use the CountVectorizer for a Bag of Words model\n", + "vectorizer = CountVectorizer(max_features=1000, stop_words='english')\n", + "X = vectorizer.fit_transform(documents)\n", + "regressor = LinearRegression()\n", + "regressor.fit(X, prices)\n", + "\n", + "def bow_lr_pricer(datapoint):\n", + " x = vectorizer.transform([mask_price_value(datapoint[\"text\"])])\n", + " return max(regressor.predict(x)[0], 0)\n", + "\n", + "tester = Tester(bow_lr_pricer, test)\n", + "tester.run()\n", + "results[\"Bag of Words LR\"] = tester" + ] + }, + { + "cell_type": "markdown", + "id": "4b861fe5", + "metadata": {}, + "source": [ + "Bow Lr Pricer Error=$121.23 RMSLE=0.98 Hits=27.2%" + ] + }, + { + "cell_type": "markdown", + "id": "25dfc7c6-a258-4b56-8c02-f01003c4674d", + "metadata": {}, + "source": [ + "## Word2Vec + Linear Regression" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "efa22fd1-e81d-4142-b0a1-f1399c7a98a3", + "metadata": {}, + "outputs": [], + "source": [ + "# Preprocess the documents\n", + "processed_docs = [simple_preprocess(doc) for doc in documents]\n", + "\n", + "# Train Word2Vec model\n", + "w2v_model = Word2Vec(sentences=processed_docs, vector_size=400, window=5, min_count=1, workers=4)\n", + "\n", + "# This step of averaging vectors across the document is a weakness in our approach\n", + "\n", + "def document_vector(doc):\n", + " doc_words = simple_preprocess(doc)\n", + " word_vectors = [w2v_model.wv[word] for word in doc_words if word in w2v_model.wv]\n", + " return np.mean(word_vectors, axis=0) if word_vectors else np.zeros(w2v_model.vector_size)\n", + "\n", + "# Create feature matrix\n", + "X_w2v = np.array([document_vector(doc) for doc in documents])\n", + "\n", + "# Run Linear Regression on word2vec\n", + "\n", + "word2vec_lr_regressor = LinearRegression()\n", + "word2vec_lr_regressor.fit(X_w2v, prices)\n", + "\n", + "def word2vec_lr_pricer(datapoint):\n", + " doc = mask_price_value(datapoint[\"text\"])\n", + " vec = document_vector(doc)\n", + " return max(0, word2vec_lr_regressor.predict([vec])[0])\n", + "\n", + "tester = Tester(word2vec_lr_pricer, test)\n", + "tester.run()\n", + "results[\"Word2Vec LR\"] = tester" + ] + }, + { + "cell_type": "markdown", + "id": "daaf6101", + "metadata": {}, + "source": [ + "Word2Vec Lr Pricer Error=$127.42 RMSLE=0.97 Hits=27.6%" + ] + }, + { + "cell_type": "markdown", + "id": "5f1fe808-f80e-4d15-8ec7-d31710cf68c5", + "metadata": {}, + "source": [ + "## Word2Vec + Linear SVR" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "35d01455-6619-4f29-8f95-90c03763407e", + "metadata": {}, + "outputs": [], + "source": [ + "svr_regressor = LinearSVR()\n", + "svr_regressor.fit(X_w2v, prices)\n", + "\n", + "def svr_pricer(datapoint):\n", + " np.random.seed(42)\n", + " doc = mask_price_value(datapoint[\"text\"])\n", + " doc_vector = document_vector(doc)\n", + " return max(float(svr_regressor.predict([doc_vector])[0]),0)\n", + "\n", + "tester = Tester(svr_pricer, test)\n", + "tester.run()\n", + "results[\"Word2Vec SVR\"] = tester" + ] + }, + { + "cell_type": "markdown", + "id": "48cb9c88", + "metadata": {}, + "source": [ + "Svr Pricer Error=$124.24 RMSLE=0.98 Hits=28.4%" + ] + }, + { + "cell_type": "markdown", + "id": "469ca205-3e5e-4aca-8b77-53f6acd92e40", + "metadata": {}, + "source": [ + "## Word2Vec + XGBoost " + ] + }, + { + "cell_type": "markdown", + "id": "a55acfe0-9633-45aa-a4c4-96b434a5a43b", + "metadata": {}, + "source": [ + "I initially tried Random Forest, but it struggled with high training time and didn’t scale well with this data.\n", + "That’s why I opted for XGBoost — it’s faster, handles large datasets efficiently, and often delivers better performance on structured data." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e3e1d7-2e62-4866-924e-7ed4483db8bc", + "metadata": {}, + "outputs": [], + "source": [ + "xgb_model = xgb.XGBRegressor(n_estimators=100, random_state=42, n_jobs=-1, verbosity=0)\n", + "xgb_model.fit(X_w2v, prices)\n", + "\n", + "def xgboost_pricer(datapoint):\n", + " doc = mask_price_value(datapoint[\"text\"])\n", + " doc_vector = document_vector(doc)\n", + " return max(0, xgb_model.predict([doc_vector])[0])\n", + "\n", + "tester = Tester(xgboost_pricer, test)\n", + "tester.run()\n", + "results[\"Word2Vec XGBoost\"] = tester\n" + ] + }, + { + "cell_type": "markdown", + "id": "d35050fa", + "metadata": {}, + "source": [ + "Xgboost Pricer Error=$107.97 RMSLE=0.84 Hits=29.2%" + ] + }, + { + "cell_type": "markdown", + "id": "4db1051d-9a7e-4cec-87fc-0d77fd858ced", + "metadata": {}, + "source": [ + "## 🚀 Price Prediction with Frontier LLMs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0ef3fa58-87b7-4c30-8088-1a4999f0d25a", + "metadata": {}, + "outputs": [], + "source": [ + "load_dotenv(override=True)\n", + "\n", + "# Get API keys from environment\n", + "openai_api_key = os.getenv('OPENAI_API_KEY')\n", + "if not openai_api_key:\n", + " print(\"❌ OPENAI_API_KEY is missing\")\n", + "\n", + "anthropic_api_key = os.getenv('ANTHROPIC_API_KEY')\n", + "if not anthropic_api_key:\n", + " print(\"❌ ANTHROPIC_API_KEY is missing\")\n", + "\n", + "groq_api_key = os.getenv('GROQ_API_KEY')\n", + "if not groq_api_key:\n", + " print(\"❌ GROQ_API_KEY is missing\")\n", + "\n", + "# Initialize clients\n", + "openai = OpenAI(api_key=openai_api_key)\n", + "claude = Anthropic(api_key=anthropic_api_key)\n", + "groq = OpenAI(api_key=groq_api_key, base_url=\"https://api.groq.com/openai/v1\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d86b3bca-513b-4621-8c66-4b89c134b895", + "metadata": {}, + "outputs": [], + "source": [ + "def messages_for(datapoint):\n", + " system_message = \"You estimate prices of items. Reply only with the price, no explanation\"\n", + " user_prompt = mask_price_value(datapoint[\"text\"]).replace(\" to the nearest dollar\", \"\").replace(\"\\n\\nPrice is $\",\"\")\n", + " return [\n", + " {\"role\": \"system\", \"content\": system_message},\n", + " {\"role\": \"user\", \"content\": user_prompt},\n", + " {\"role\": \"assistant\", \"content\": \"Price is $\"}\n", + " ]\n", + "\n", + "messages_for(train[0])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f502d428-98aa-4160-bebe-726efcce5c65", + "metadata": {}, + "outputs": [], + "source": [ + "# A utility function to extract the price from a string\n", + "\n", + "def get_price(s):\n", + " s = s.replace('$','').replace(',','')\n", + " match = re.search(r\"[-+]?\\d*\\.\\d+|\\d+\", s)\n", + " return float(match.group()) if match else 0\n", + "\n", + "get_price(\"The price is roughly $99.99 because blah blah\") # Testing" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3845eda0-d37d-4605-a00f-83b1d8fc6945", + "metadata": {}, + "outputs": [], + "source": [ + "# A utility function to Count the tokens before passing the prompt to the model\n", + "\n", + "def count_tokens(messages):\n", + " encoding = tiktoken.get_encoding(\"cl100k_base\")\n", + " token_count = sum(len(encoding.encode(message['content'])) for message in messages)\n", + " return token_count\n" + ] + }, + { + "cell_type": "markdown", + "id": "4737e678-5d57-4dee-984b-ae5c56f9542d", + "metadata": {}, + "source": [ + "### gpt-4o-mini" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "dca067d0-a4ff-4a48-bb74-d2914f3704b7", + "metadata": {}, + "outputs": [], + "source": [ + "# Count tokens once before running\n", + "total_tokens = 0\n", + "for datapoint in train:\n", + " messages = messages_for(datapoint)\n", + " total_tokens += count_tokens(messages)\n", + "print(f\"Total tokens: {total_tokens}\")\n", + "\n", + "def gpt_4o_mini(datapoint):\n", + " messages = messages_for(datapoint)\n", + " response = openai.chat.completions.create(\n", + " model=\"gpt-4o-mini\",\n", + " messages=messages,\n", + " seed=42,\n", + " max_tokens=5\n", + " )\n", + " reply = response.choices[0].message.content\n", + " return get_price(reply)\n", + "\n", + "tester = Tester(gpt_4o_mini, test)\n", + "tester.run()\n", + "results[\"gpt 4o mini\"] = tester" + ] + }, + { + "cell_type": "markdown", + "id": "5c4c8ee4", + "metadata": {}, + "source": [ + "Gpt 4o Mini Error=$99.30 RMSLE=0.75 Hits=44.8%" + ] + }, + { + "cell_type": "markdown", + "id": "00a72937-9cde-472c-bd22-84996a42ab4c", + "metadata": {}, + "source": [ + "### gpt 4o (the big guy 😎)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "20d18e1a-ccbf-4481-84ed-16b1c5760176", + "metadata": {}, + "outputs": [], + "source": [ + "def gpt_4o_frontier(datapoint):\n", + " response = openai.chat.completions.create(\n", + " model=\"gpt-4o\",\n", + " messages=messages_for(datapoint),\n", + " seed=42,\n", + " max_tokens=5\n", + " )\n", + " reply = response.choices[0].message.content\n", + " return get_price(reply)\n", + "\n", + "tester = Tester(gpt_4o_frontier, test)\n", + "tester.run()\n", + "results[\"gpt 4o (the big guy)\"] = tester" + ] + }, + { + "cell_type": "markdown", + "id": "0c307928", + "metadata": {}, + "source": [ + "Gpt 4O Frontier Error=$87.68 RMSLE=1.01 Hits=51.2%" + ] + }, + { + "cell_type": "markdown", + "id": "20af42a7-8889-4091-bee9-80aeaf63816f", + "metadata": {}, + "source": [ + "### claude 3.7 Sonnet" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f4e343ef-2097-4395-86b2-90c489f133fe", + "metadata": {}, + "outputs": [], + "source": [ + "def claude_3_point_7_sonnet(datapoint):\n", + " messages = messages_for(datapoint)\n", + " system_message = messages[0]['content']\n", + " messages = messages[1:]\n", + " response = claude.messages.create(\n", + " model=\"claude-3-7-sonnet-20250219\",\n", + " max_tokens=5,\n", + " system=system_message,\n", + " messages=messages\n", + " )\n", + " reply = response.content[0].text\n", + " return get_price(reply)\n", + "\n", + "tester = Tester(claude_3_point_7_sonnet, test)\n", + "tester.run()\n", + "results[\"claude 3.7 sonnet\"] = tester" + ] + }, + { + "cell_type": "markdown", + "id": "fdbba849", + "metadata": {}, + "source": [ + "Claude 3 Point 7 Sonnet Error=$110.26 RMSLE=0.60 Hits=46.0%" + ] + }, + { + "cell_type": "markdown", + "id": "0ff3a6bd-99b8-438e-abc1-295bf0bb9961", + "metadata": {}, + "source": [ + "### groq model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "58a0c852-0811-4156-9c08-fa5bf4b54cd2", + "metadata": {}, + "outputs": [], + "source": [ + "def llama3_groq_pricer(datapoint):\n", + " response = groq.chat.completions.create(\n", + " model=\"llama3-70b-8192\",\n", + " messages=messages_for(datapoint),\n", + " max_tokens=5,\n", + " seed=42\n", + " )\n", + " reply = response.choices[0].message.content\n", + " return get_price(reply)\n", + "\n", + "tester = Tester(llama3_groq_pricer, test)\n", + "tester.run()\n", + "results[\"llama3-70b-8192\"] = tester" + ] + }, + { + "cell_type": "markdown", + "id": "daf7f96c", + "metadata": {}, + "source": [ + "Llama3 Groq Pricer Error=$122.95 RMSLE=0.73 Hits=44.8%" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b4cd8f25-9a8d-4227-ba58-c163b4d601cb", + "metadata": {}, + "outputs": [], + "source": [ + "def deepseek_qwen_pricer(datapoint):\n", + " response = groq.chat.completions.create(\n", + " model=\"deepseek-r1-distill-qwen-32b\",\n", + " messages=messages_for(datapoint),\n", + " max_tokens=5,\n", + " seed=42\n", + " )\n", + " reply = response.choices[0].message.content\n", + " return get_price(reply)\n", + "\n", + "tester = Tester(deepseek_qwen_pricer, test)\n", + "tester.run()\n", + "results[\"deepseek-qwen-32b\"] = tester" + ] + }, + { + "cell_type": "markdown", + "id": "7280870e", + "metadata": {}, + "source": [ + "Deepseek Qwen Pricer Error=$178.96 RMSLE=0.83 Hits=33.2%" + ] + }, + { + "cell_type": "markdown", + "id": "af7d0190-d89b-4525-8a34-21033e99abb0", + "metadata": {}, + "source": [ + "## 🕵️ Human Judgement Baseline (Ed)\n", + "\n", + "We include a human baseline from our instructor Ed, who manually estimated prices based on item descriptions (💪 thanks Ed for taking on this exhausting task!). This allows us to compare model performance against human intuition." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4d63970d-a2d2-4329-8fe7-d0bdc2ff1bcb", + "metadata": {}, + "outputs": [], + "source": [ + "human_predictions = []\n", + "\n", + "with open('data/human_output.csv', 'r', encoding=\"utf-8\") as csvfile:\n", + " reader = csv.reader(csvfile)\n", + " for row in reader:\n", + " human_predictions.append(float(row[1]))\n", + "\n", + "def human_pricer(datapoint):\n", + " # `Tester` runs in order, so use the index from Tester itself\n", + " idx = human_pricer.counter\n", + " human_pricer.counter += 1\n", + " return human_predictions[idx]\n", + "\n", + "human_pricer.counter = 0 # initialize counter\n", + "\n", + "tester = Tester(human_pricer, test)\n", + "tester.run()\n", + "results[\"Human Predictions\"] = tester" + ] + }, + { + "cell_type": "markdown", + "id": "08c0d367-d596-43e6-81af-5889691fa34b", + "metadata": {}, + "source": [ + "## 🥇 Benchmark Showdown: ML, LLMs, and Ed" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "830ae9a5-a185-41af-b17f-8d6a3f3752b7", + "metadata": {}, + "outputs": [], + "source": [ + "def truncate(x, decimals=2):\n", + " factor = 10 ** decimals\n", + " return math.floor(x * factor) / factor\n", + "\n", + "df_results = []\n", + "\n", + "for model_name, tester in results.items():\n", + " avg_error = truncate(sum(tester.errors) / tester.size)\n", + " hit_percent = truncate(sum(1 for c in tester.colors if c == \"green\") / tester.size * 100)\n", + " rmsle = truncate(math.sqrt(sum(tester.sles) / tester.size))\n", + "\n", + " df_results.append({\n", + " \"model\": model_name,\n", + " \"avrg_error\": avg_error,\n", + " \"rmsle\": rmsle,\n", + " \"accuracy_%\": hit_percent\n", + " })\n", + "\n", + "df_results = pd.DataFrame(df_results)\n", + "df_results = df_results.sort_values(by=\"avrg_error\")\n", + "\n", + "# Display with .2f formatting\n", + "print(df_results.to_string(index=False, float_format=\"{:.2f}\".format))\n" + ] + }, + { + "attachments": { + "image.png": { + "image/png": "" + } + }, + "cell_type": "markdown", + "id": "e78ddc21-1ffc-431b-902b-4562bdd4e789", + "metadata": {}, + "source": [ + "![image.png](attachment:image.png)\n", + "\n", + "🏁 **GPT-4o, GPT-4o Mini and XGBoost** clearly outperformed both LLMs (like Claude 3.7, LLaMA3-70B, DeepSeek-32B) and traditional ML approaches (LR, SVR).\n", + "\n", + "Now let’s take the top-performing frontier LLM — **GPT-4o Mini** — to test if retrieval (RAG) boosts its performance, and the best ML model — **XGBoost** — to see if contextual embeddings enhance its predictions.\n", + "\n", + "Let’s find out.\n", + "\n", + "🔜 See you in the [next notebook](https://github.com/lisek75/nlp_llms_notebook/blob/main/09_part3_e5embeddings_rag.ipynb)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.7" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/week6/community-contributions/lisekarimi/09_part3_e5embeddings_rag.ipynb b/week6/community-contributions/lisekarimi/09_part3_e5embeddings_rag.ipynb new file mode 100644 index 0000000..5e6eea0 --- /dev/null +++ b/week6/community-contributions/lisekarimi/09_part3_e5embeddings_rag.ipynb @@ -0,0 +1,1080 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "d9b9eaa6-a12f-4cf8-a4c5-e8ac2c15d15b", + "metadata": { + "id": "d9b9eaa6-a12f-4cf8-a4c5-e8ac2c15d15b" + }, + "source": [ + "# 🔍 Predicting Item Prices from Descriptions (Part 3)\n", + "---\n", + "- Data Curation & Preprocessing\n", + "- Model Benchmarking – Traditional ML vs LLMs\n", + "- ➡️E5 Embeddings & RAG\n", + "- Fine-Tuning GPT-4o Mini\n", + "- Evaluating LLaMA 3.1 8B Quantized\n", + "- Fine-Tuning LLaMA 3.1 with QLoRA\n", + "- Evaluating Fine-Tuned LLaMA\n", + "- Summary & Leaderboard\n", + "\n", + "---\n", + "\n", + "# 🧠 Part 3: E5 Embeddings & RAG\n", + "\n", + "- 🧑‍💻 Skill Level: Advanced\n", + "- ⚙️ Hardware: ⚠️ GPU required for embeddings (400K items) - use Google Colab\n", + "- 🛠️ Requirements: 🔑 HF Token, Open API Key\n", + "- Tasks:\n", + " - Preprocessed item descriptions\n", + " - Generated and stored embeddings in ChromaDB\n", + " - Trained XGBoost on embeddings, pushed to HF Hub, and ran predictions\n", + " - Predicted prices with GPT-4o Mini using RAG\n", + "\n", + "Is Word2Vec enough for XGBoost, or do contextual E5 embeddings perform better?\n", + "\n", + "Does retrieval improve price prediction for GPT-4o Mini?\n", + "\n", + "Let’s find out.\n", + "\n", + "⚠️ This notebook assumes basic familiarity with RAG and contextual embeddings.\n", + "We use the same E5 embedding space for both XGBoost and GPT-4o Mini with RAG, enabling a fair comparison.\n", + "Embeddings are stored and queried via ChromaDB — no LangChain is used for creation or retrieval.\n", + "\n", + "---\n", + "📢 Find more LLM notebooks on my [GitHub repository](https://github.com/lisekarimi/lexo)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d8e2af5e-03cc-46dc-8a8b-37cb102d0e92", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "d8e2af5e-03cc-46dc-8a8b-37cb102d0e92", + "outputId": "905907cc-81c5-4a3b-e7c8-9e237e594a09" + }, + "outputs": [], + "source": [ + "# Install required packages in Google Colab\n", + "%pip install -q tqdm huggingface_hub numpy sentence-transformers datasets chromadb xgboost" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4ce6a892-b357-4132-b9c0-a3142a0244c8", + "metadata": { + "id": "4ce6a892-b357-4132-b9c0-a3142a0244c8" + }, + "outputs": [], + "source": [ + "# imports\n", + "\n", + "import math\n", + "import chromadb\n", + "import re\n", + "import joblib\n", + "import os\n", + "from tqdm import tqdm\n", + "import gc\n", + "from huggingface_hub import login, HfApi\n", + "import numpy as np\n", + "from sentence_transformers import SentenceTransformer\n", + "from datasets import load_dataset\n", + "from google.colab import userdata\n", + "from xgboost import XGBRegressor\n", + "from openai import OpenAI\n", + "import matplotlib.pyplot as plt\n", + "%matplotlib inline" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "yBH-mvV0QBiw", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "yBH-mvV0QBiw", + "outputId": "b4b6df10-dc05-4dbe-dd8b-55bae5a2b7af" + }, + "outputs": [], + "source": [ + "# Mount Google Drive to access persistent storage\n", + "\n", + "from google.colab import drive\n", + "drive.mount('/content/drive')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3OUI1jQYyaeX", + "metadata": { + "id": "3OUI1jQYyaeX" + }, + "outputs": [], + "source": [ + "# Google Colab User Data\n", + "# Ensure you have set the following in your Google Colab environment:\n", + "openai_api_key = userdata.get(\"OPENAI_API_KEY\")\n", + "hf_token = userdata.get('HF_TOKEN')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "99f6f632", + "metadata": {}, + "outputs": [], + "source": [ + "openai = OpenAI(api_key=openai_api_key)\n", + "login(hf_token, add_to_git_credential=True)\n", + "\n", + "# Configuration\n", + "ROOT = \"/content/drive/MyDrive/deal_finder\"\n", + "CHROMA_PATH = f\"{ROOT}/chroma\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "FF-HryRnDXm5", + "metadata": { + "id": "FF-HryRnDXm5" + }, + "outputs": [], + "source": [ + "# Helper class for evaluating model predictions\n", + "\n", + "GREEN = \"\\033[92m\"\n", + "YELLOW = \"\\033[93m\"\n", + "RED = \"\\033[91m\"\n", + "RESET = \"\\033[0m\"\n", + "COLOR_MAP = {\"red\":RED, \"orange\": YELLOW, \"green\": GREEN}\n", + "\n", + "class Tester:\n", + "\n", + " def __init__(self, predictor, data, title=None, size=250):\n", + " self.predictor = predictor\n", + " self.data = data\n", + " self.title = title or predictor.__name__.replace(\"_\", \" \").title()\n", + " self.size = size\n", + " self.guesses = []\n", + " self.truths = []\n", + " self.errors = []\n", + " self.sles = []\n", + " self.colors = []\n", + "\n", + " def color_for(self, error, truth):\n", + " if error<40 or error/truth < 0.2:\n", + " return \"green\"\n", + " elif error<80 or error/truth < 0.4:\n", + " return \"orange\"\n", + " else:\n", + " return \"red\"\n", + "\n", + " def run_datapoint(self, i):\n", + " datapoint = self.data[i]\n", + " guess = self.predictor(datapoint)\n", + " truth = datapoint[\"price\"]\n", + " error = abs(guess - truth)\n", + " log_error = math.log(truth+1) - math.log(guess+1)\n", + " sle = log_error ** 2\n", + " color = self.color_for(error, truth)\n", + " # title = datapoint[\"text\"].split(\"\\n\\n\")[1][:20] + \"...\"\n", + " self.guesses.append(guess)\n", + " self.truths.append(truth)\n", + " self.errors.append(error)\n", + " self.sles.append(sle)\n", + " self.colors.append(color)\n", + " # print(f\"{COLOR_MAP[color]}{i+1}: Guess: ${guess:,.2f} Truth: ${truth:,.2f} Error: ${error:,.2f} SLE: {sle:,.2f} Item: {title}{RESET}\")\n", + "\n", + " def chart(self, title):\n", + " # max_error = max(self.errors)\n", + " plt.figure(figsize=(12, 8))\n", + " max_val = max(max(self.truths), max(self.guesses))\n", + " plt.plot([0, max_val], [0, max_val], color='deepskyblue', lw=2, alpha=0.6)\n", + " plt.scatter(self.truths, self.guesses, s=3, c=self.colors)\n", + " plt.xlabel('Ground Truth')\n", + " plt.ylabel('Model Estimate')\n", + " plt.xlim(0, max_val)\n", + " plt.ylim(0, max_val)\n", + " plt.title(title)\n", + "\n", + " # Add color legend\n", + " from matplotlib.lines import Line2D\n", + " legend_elements = [\n", + " Line2D([0], [0], marker='o', color='w', label='Accurate (green)', markerfacecolor='green', markersize=8),\n", + " Line2D([0], [0], marker='o', color='w', label='Medium error (orange)', markerfacecolor='orange', markersize=8),\n", + " Line2D([0], [0], marker='o', color='w', label='High error (red)', markerfacecolor='red', markersize=8)\n", + " ]\n", + " plt.legend(handles=legend_elements, loc='upper right')\n", + "\n", + " plt.show()\n", + "\n", + "\n", + " def report(self):\n", + " average_error = sum(self.errors) / self.size\n", + " rmsle = math.sqrt(sum(self.sles) / self.size)\n", + " hits = sum(1 for color in self.colors if color==\"green\")\n", + " title = f\"{self.title} Error=${average_error:,.2f} RMSLE={rmsle:,.2f} Hits={hits/self.size*100:.1f}%\"\n", + " self.chart(title)\n", + "\n", + " def run(self):\n", + " self.error = 0\n", + " for i in range(self.size):\n", + " self.run_datapoint(i)\n", + " self.report()\n", + "\n", + " @classmethod\n", + " def test(cls, function, data):\n", + " cls(function, data).run()\n" + ] + }, + { + "cell_type": "markdown", + "id": "6f82b230-2e03-4b1e-9be5-926fcd19acbe", + "metadata": { + "id": "6f82b230-2e03-4b1e-9be5-926fcd19acbe" + }, + "source": [ + "## 📥 Load Dataset" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3ae00568", + "metadata": {}, + "outputs": [], + "source": [ + "# #If you face NotImplementedError: Loading a dataset cached in a LocalFileSystem is not supported run:\n", + "# %pip install -U datasets" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "55f1495b-f343-4152-8739-3a99f5ac405d", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 177, + "referenced_widgets": [ + "6e7c01d666f64fa58d6a059cc8d8f323", + "597b7155767441e6a0283a19edced00f", + "cf1360550eaa49a0867f55db8b8c4c77", + "94f26137cccf47f6a36d9325bc8f5b9c", + "a764b97f3dcd480c8860dde979e5e114", + "f1ec9a46c9ce4e038f3051bbd1b2c661", + "992f46ae91554731987b4baf79ba1bbd", + "b4abe22402fe40fd82b7fe93b4bc06f3", + "57ec058518734e3dbd27324cbba243c0", + "f101230e8a9a431d85ee2f8e51add7ad", + "e196658b093746588113240a60336437", + "cb06a4d26cb84c708857b683d1e84c12", + "e82ad07ba22e465cbe0232c504c3b693", + "c4e0ed1165f54393aaec24cd4624d562", + "295a3c6662034aaaab4d2e0192d1d1ce", + "c38aff0c91a849feb547e78156c2c347", + "69647c5595874c3185cebf6813ee908c", + "1036b1af4b154916a3d4f16f5ed799eb", + "e6347ff832cc4c04aef86594ea5a9e64", + "01c63224aa6a4f0c9c88a4d85527e767", + "1db34b9a4f1f42a897345b5a6630ced6", + "9293f2d745024d7facb68e04cc188850", + "26f6ec91efaf42909cec172fafe55987", + "c1131f0324b0498da9bc59720e867eb6", + "3e58017527a04634a489a33ed53fd312", + "06cd89f57d08466c875d179e79e3ecd2", + "2e0aa0aa87a04419a277f303f577f7ff", + "8fa0fe1992db42a997e7cd3ee08bd09e", + "accb1d5142a9498da0117f746fedd691", + "fcc2fc2f82e2441995b9e61b23b9b91e", + "da93fe316dd24cb48538b52ef2eaf6b5", + "5cea58775faf41829c04d2a84e3e2c31", + "1914ec7959d143d09a55da324bbcd47b", + "a3d3504148df46f59b6770fb377e2bb6", + "b088b9a503e24f179741d40d21a730d9", + "b77dcf4632954d0c9c3b6d441c5f684d", + "4cc8b3c4d9934f24a94b4601ab7816b5", + "c093f1c0806a43b79594ddac856a301c", + "9f4d9ac1aa074ed6b0248a4b18fde7db", + "c00785b8fdda409e9cb435abbb0466da", + "612e211af4cd46eb9d2f3148d1c7cb0b", + "86f93c663cc446adbc6366a528cb01b0", + "dd42911451ec48e086c1c99e76492321", + "5b942241f11c4f2ab086f0f289f99a03", + "d28a5c6172f74c0f8bbd2d949455f22e", + "0e67b2055f214eb691b4b54d9431bdd8", + "f81c4dc72b3b4b40a6a70528db732482", + "043a355b6a85471ba0142eb25e2c9eb0", + "8682bfab79a8409499797a3307e4d64d", + "55a837644bb643ac864fa1a674e665c8", + "33aae5a98bf5433b813ff8216e015089", + "56eedfc5ba6642dc8443ab60f5f09b8c", + "a1b710c227a84ea1a55c310084f13a93", + "0d4bc0d0e88a4c77a202f9c11b2ee2a9", + "20858379c2cd45d59070b18149d6e925" + ] + }, + "id": "55f1495b-f343-4152-8739-3a99f5ac405d", + "outputId": "37317fe6-b560-4ad0-c7d6-66517fd67c42" + }, + "outputs": [], + "source": [ + "HF_USER = \"lisekarimi\"\n", + "DATASET_NAME = f\"{HF_USER}/pricer-data\"\n", + "\n", + "dataset = load_dataset(DATASET_NAME)\n", + "train = dataset['train']\n", + "test = dataset['test']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "85880d79-f1ba-4ee8-a039-b6acea84562c", + "metadata": { + "id": "85880d79-f1ba-4ee8-a039-b6acea84562c" + }, + "outputs": [], + "source": [ + "print(train[0][\"text\"])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "88842541-d73b-4fae-a550-6dedf8fab633", + "metadata": { + "id": "88842541-d73b-4fae-a550-6dedf8fab633" + }, + "outputs": [], + "source": [ + "print(train[0][\"price\"])" + ] + }, + { + "cell_type": "markdown", + "id": "7b8a9a5b-f74d-487d-a400-d157fea8c979", + "metadata": { + "id": "7b8a9a5b-f74d-487d-a400-d157fea8c979" + }, + "source": [ + "## 📦 Embed + Save Training Data to Chroma\n", + "- No LangChain used.\n", + "- We use `intfloat/e5-small-v2` for embeddings:\n", + " - Fast, high-quality, retrieval-tuned\n", + " - **Requires 'passage:' prefix**\n", + "- We embed item descriptions and store them in ChromaDB, with price saved as metadata." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b95a87a8-2136-4e03-a36c-42e5d53a3e28", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 337, + "referenced_widgets": [ + "8216f5d45e9345e493a43b8cbbe6598a", + "ec3854658f8448fc8463e8635889f700", + "7a90822b2aff4d5cb926442f01a77a9b", + "9518c3af589744cfbbb51f87d68f216e", + "327044765c044384a14be4e660bb152f", + "0b773d68d2394d80a2baf73c1808752a", + "21568b9954c8411d863baa7385df624f", + "0a08828a0ba4430ea6e039949f220b5b", + "3d5a51cfb5f44eecbf80d46e2e4608fd", + "313f059a82104a9394182f6dcdb0bfb4", + "6a625748afc84fe89a8af7a4ef638675", + "ebe43cd30e414f31ab52614c6e9f9f2b", + "88c29992adaa44af857e3216f7e53e60", + "0528af78cef844e8a2b489dcb8fce049", + "8cbccd78a79447158f02caadfa7d805f", + "076ce072490c493ba5b3c431f6166eda", + "dd7780038f8a4cd3837972c78b6583bc", + "9e285e2b58934552b98edd998b82a678", + "338efda3245a4989a9b3ee0795949bb8", + "136dfb68394742ea98d9eb845730846c", + "891d821725b6457c9d06737bf75fe3ed", + "14feb4e20339465d966a6a80504eb819", + "c02b637785324b9eb88e6a2c00cb986b", + "3635da14e6f04e8f90548eb6381290a8", + "1314757f404e47f5b0f6fa4de8537863", + "9e5f2478e931476d882e471c7f66aaeb", + "4ad885d69d9f492c960ca53426189707", + "992d5e88d7844a52a283c0e19475ab78", + "43eaec936c774e3380ae4ff1a823f3dc", + "ceeb11b317ac4d37b59641024f77265f", + "5e0371de53164830b4e8c2b6954b5947", + "63a729492e8a4a759d75b769cbb3e1e7", + "14dde2c87b7b4c9ea16d48732108dcd7", + "f50717b099d142be95390ae8f1e99e6a", + "ffa64c304dab4ef18e9ef50ac1625cd6", + "f358351612004f64adffb931c3130603", + "7593358526ae4a87bf4be0eb1bcfc076", + "51536b45f5674d498272dc7b2def635d", + "8fbe2a3fc07943e7bf0fdc927bab795a", + "6b265cc65d5a42638572c1776faafdb1", + "39fa86a7760d43c793eb8ef27475af7d", + "eee5113e2dd1402faf76d00f07d8e0af", + "6792ed7123724b2d8091bc8d36255e68", + "e35094b24c154340bb1b3ebba7ac0a0d", + "dd63bb6ffed34b6687a0c79d8af93fb7", + "32080bc9381c449ab63794655ec6d714", + "eb7aa289fefc465d98edeed9ce2bff51", + "53fae218b4b74863af5fe53a66a5f7ef", + "35bc6d95c60f4c3d8ddc6b3b0845ff7e", + "f4765ca278ad4da4b465bd2920a21320", + "7ac6ead5baef4f30aff170a30a9a7977", + "e7adb5eb38d54b29b734d207982411c8", + "8f4f51b75af74daa9b9ad6696760109c", + "ae4db932b7544c6cb9ff668fa954addd", + "be63f07eedbd4d46ac4913df45216108", + "2e47d9e7b36a4ec69a9071930671ae8e", + "7b1c7f9bf0e8412abb66bcfc24cf9668", + "5c8742d3f663470e9977d006e83314b7", + "74ec67e07ee0477eb41e21093ae82858", + "4b60a8f023bc4d759bc197b11bf4e160", + "7a090f162fa84568a5e486ba935c3ed1", + "8b650428a6834f5d8ebe62ad327493e0", + "5c4d22bce82546d28a8b0c041895c8e3", + "16121b830a2948afb3ca8eb54e27a678", + "0305a4b4408f4562b87b58098148326d", + "68f07b5b7ad447ce9a87023d872c2e73", + "2156a5ced089414c99a1bb8dd3a0b3b7", + "2e6cd134c70e455a85c47b1575135883", + "f4264985b5cc4a0f970a088fb90b8bcf", + "71d790bf25324e6dbb5372f636c53da9", + "dac3ba29ee4d4083a9abca7eab632534", + "5c75c020a1914da680340fe826f3f58d", + "195e6dfb82c84f0191838acbbfe38126", + "b06adcaf8d4c497897ed3625f3afb4eb", + "d4ab3971183a4e8fa10402e3542e6466", + "444ca1f5213241c2bc71fa9ebe9ac3ca", + "34d571f76ef845f4bc272a5e05491c31", + "e8ee76b022d64b2cb24a2cb7b61aeef7", + "8c9ac87788b04ae6899f3b62fdc3ed0d", + "431b638c435444c38e50a09573b8f31b", + "0430f22e24d14171b83261faa090f349", + "0fa5ae935a554461b086a4b81470b9ad", + "f072e665d27e442ab4d0e2eb33c98db9", + "fd3b1885c39c4b70b083d7fddf74d4b6", + "f77051cb151645559223ecf835426688", + "0e17661f878948598703ee7942e5e1a2", + "fca913c6cfff48099d1744d5b091fc46", + "085baf51ecef46318ceafbaba2bb4490", + "52309039c2d8421bbb8e99f63f5ba91f", + "f4233cd960ea4f549734a5b1e1da5e2e", + "42ce1b7765f547cd9ecd8b428ec1c718", + "e72a08514d3b42d2b5fbf87a920bcdf0", + "ad05cf4c0ed44341aa3cd2cbd22b513d", + "db9915d53d784b85accebe1552c4e7e1", + "9519b6d9bf1b45e3b56da4c28d2aeb2e", + "cfeb0597708b49fa9b65342e1ac446ae", + "e29617eff6fd4199a74b670198ba2a69", + "1cea197a15d94654a0e792318435d707", + "89dcb96670a8433593e3452fad3c9210", + "0802085388be453b8fe5edee7e0a01ef", + "1ed257f19b8b44ee85f09e10178ae52f", + "04107981561149cba5baf74ccba87aa6", + "09afb010020e4b2f91d7cdbdca316962", + "b11b51beaa54474cb7682110bd2d24ae", + "47822470ddf842cd9e3368090549a2b5", + "835bce5d87a2417c9b6a5b27627447dc", + "5ca06dd536d44de784984a492d23573f", + "8e75bdb4469e497c8f021ebde7c6c9b3", + "7f4d4f8ece1d4651a2186f10a0cc25a5", + "92036442af5f4b698f2a54ecba4650e2" + ] + }, + "id": "b95a87a8-2136-4e03-a36c-42e5d53a3e28", + "outputId": "6094328e-8c33-4b40-80e9-08c5cfb3e277" + }, + "outputs": [], + "source": [ + "# Load embedding model\n", + "model_embedding = SentenceTransformer(\"intfloat/e5-small-v2\", device='cuda')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "733cf41d-e81e-4cfc-b597-67da02dbc3cf", + "metadata": { + "id": "733cf41d-e81e-4cfc-b597-67da02dbc3cf" + }, + "outputs": [], + "source": [ + "# Init Chroma\n", + "client = chromadb.PersistentClient(path=CHROMA_PATH)\n", + "collection = client.get_or_create_collection(name=\"price_items\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1f493c7d-1c72-40f9-a5c6-63c7f6b1cf2c", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 91 + }, + "id": "1f493c7d-1c72-40f9-a5c6-63c7f6b1cf2c", + "outputId": "72627732-4eee-4d9a-c8cb-0c42e2541a80" + }, + "outputs": [], + "source": [ + "# Format description function (no price in text)\n", + "def description(item):\n", + " text = item[\"text\"].replace(\"How much does this cost to the nearest dollar?\\n\\n\", \"\")\n", + " text = text.split(\"\\n\\nPrice is $\")[0]\n", + " return f\"passage: {text}\"\n", + "\n", + "description(train[0])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f44bf613-adf6-4993-bf7b-6aa9fad21a03", + "metadata": { + "id": "f44bf613-adf6-4993-bf7b-6aa9fad21a03" + }, + "outputs": [], + "source": [ + "batch_size = 300 # how many items to insert into Chroma at once\n", + "encode_batch_size = 1024 # how many items to encode at once in GPU memory\n", + "\n", + "for i in tqdm(range(0, len(train), batch_size), desc=\"Processing batches\"):\n", + "\n", + " end_idx = min(i + batch_size, len(train))\n", + "\n", + " # Collect documents and metadata\n", + " documents = [description(train[j]) for j in range(i, end_idx)]\n", + " metadatas = [{\"price\": train[j][\"price\"]} for j in range(i, end_idx)]\n", + " ids = [f\"doc_{j}\" for j in range(i, end_idx)]\n", + "\n", + " # GPU batch encoding\n", + " vectors = model_embedding.encode(\n", + " documents,\n", + " batch_size=encode_batch_size,\n", + " show_progress_bar=False,\n", + " normalize_embeddings=True\n", + " ).tolist()\n", + "\n", + " # Insert into Chroma\n", + " collection.add(\n", + " ids=ids,\n", + " documents=documents,\n", + " embeddings=vectors,\n", + " metadatas=metadatas\n", + " )\n", + "\n", + "print(\"✅ Embedding and storage to ChromaDB completed.\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f2e2ccc9-b772-45f7-8258-cbc4f9c3ed59", + "metadata": {}, + "outputs": [], + "source": [ + "# Now flush and clean\n", + "print(\"🧹 Cleaning up and saving ChromaDB...\")\n", + "client = None\n", + "gc.collect()" + ] + }, + { + "cell_type": "markdown", + "id": "c35d2fab-583f-4527-a7cc-9d31214b2f35", + "metadata": {}, + "source": [ + "Our ChromaDB is currently saved in a persistent Google Drive path; for a production-ready app, we recommend uploading it to AWS S3 for better reliability and scalability.\n", + "\n", + "🧩 Now that we've generated the E5 embeddings, let's use them for both **XGBoost regression** and **GPT-4o Mini with RAG** ." + ] + }, + { + "cell_type": "markdown", + "id": "40e4c587-211d-4bc0-91cf-6267f45405d6", + "metadata": { + "id": "40e4c587-211d-4bc0-91cf-6267f45405d6" + }, + "source": [ + "## 📈 Embedding-Based Regression with XGBoost" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f058ccac-3392-457d-b54c-6471960e9af3", + "metadata": { + "id": "f058ccac-3392-457d-b54c-6471960e9af3" + }, + "outputs": [], + "source": [ + "# Step 1: Load vectors and prices from Chroma\n", + "result = collection.get(include=['embeddings', 'documents', 'metadatas'])\n", + "vectors = np.array(result['embeddings'])\n", + "documents = result['documents']\n", + "prices = [meta['price'] for meta in result['metadatas']]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "JYQo0RaMb8Ql", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 254 + }, + "id": "JYQo0RaMb8Ql", + "outputId": "c1641347-1fd4-41bb-e060-147224fc6bed" + }, + "outputs": [], + "source": [ + "# Step 2: Train XGBoost model\n", + "xgb_model = XGBRegressor(n_estimators=100, random_state=42, n_jobs=-1, verbosity=0)\n", + "xgb_model.fit(vectors, prices)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "yaqG0z7jb919", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "yaqG0z7jb919", + "outputId": "6a2f9120-97e0-4436-aa12-40d94fbc5c64" + }, + "outputs": [], + "source": [ + "# Step 3: Serialize XGBoost model locally for Hugging Face upload\n", + "MODEL_DIR = os.path.join(ROOT, \"models\")\n", + "MODEL_FILENAME = \"xgboost_model.pkl\"\n", + "LOCAL_MODEL = os.path.join(MODEL_DIR, MODEL_FILENAME)\n", + "\n", + "os.makedirs(MODEL_DIR, exist_ok=True)\n", + "joblib.dump(xgb_model, LOCAL_MODEL)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "Z_17sQUdxIr3", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 104, + "referenced_widgets": [ + "2362f3121e5546b98e4623eb3680e96b", + "ef53ee3b68c840d6a3fe98386d26bbd9", + "a4768d0ecdd640a2a5bccd07a93c54b7", + "e177440016974bc699b666fa721c6490", + "2a9d0e5829174b738b4dfea1c71a3481", + "ee6dffc7b79e405d923940166ef10590", + "57bf3388622241869a5e9dab558dca72", + "aa87f4feddd6409fbfb81f417e5d6662", + "973a83ca118e4ed1b5a51821034ecc31", + "d5a3c955aba14b3ea8e9b5c90a3bf20a", + "daaa4f26bad545a394685e266f85a6ae" + ] + }, + "id": "Z_17sQUdxIr3", + "outputId": "68ebdbdb-d42e-4bc8-addc-85b42d418d1d" + }, + "outputs": [], + "source": [ + "# Step 4: Push serialized XGBoost model to Hugging Face Hub\n", + "api = HfApi(token=hf_token)\n", + "REPO_NAME = \"smart-deal-finder-models\"\n", + "REPO_ID = f\"{HF_USER}/{REPO_NAME}\"\n", + "\n", + "# Create the model repo if it doesn't exist\n", + "api.create_repo(repo_id=REPO_ID, repo_type=\"model\", private=True, exist_ok=True)\n", + "\n", + "# Upload the saved model\n", + "api.upload_file(\n", + " path_or_fileobj=LOCAL_MODEL,\n", + " path_in_repo=MODEL_FILENAME,\n", + " repo_id=REPO_ID,\n", + " repo_type=\"model\"\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3f59125d-9fa6-483b-957f-4423a9b2c900", + "metadata": { + "id": "3f59125d-9fa6-483b-957f-4423a9b2c900" + }, + "outputs": [], + "source": [ + "# Step 5: Define the predictor\n", + "def xgb_predictor(datapoint):\n", + " doc = description(datapoint)\n", + " vector = model_embedding.encode([doc], normalize_embeddings=True)[0]\n", + " return max(0, xgb_model.predict([vector])[0])" + ] + }, + { + "cell_type": "markdown", + "id": "a890f1f0-d827-472f-a7a9-6c2cbe3d8341", + "metadata": { + "id": "a890f1f0-d827-472f-a7a9-6c2cbe3d8341" + }, + "source": [ + "🔔 Reminder: In Part 2, XGBoost with Word2Vec (non-contextual embeddings) achieved:\n", + "- Avg. Error: ~$107\n", + "- RMSLE: 0.83\n", + "- Accuracy: 29.20%\n", + "\n", + "🧪 Now, let’s see if contextual embeddings improve XGBoost." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "q-tIbVilTPxP", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 718 + }, + "id": "q-tIbVilTPxP", + "outputId": "7c9043ef-a2c4-4933-b334-18d99690ba0f" + }, + "outputs": [], + "source": [ + "# Step 4: Run the Tester on a subset of test data\n", + "tester = Tester(xgb_predictor, test)\n", + "tester.run()" + ] + }, + { + "cell_type": "markdown", + "id": "dcb09db0-7d69-40e1-a6e3-b92263e38f1e", + "metadata": { + "id": "dcb09db0-7d69-40e1-a6e3-b92263e38f1e" + }, + "source": [ + "Xgb Predictor Error=$110.68 RMSLE=0.93 Hits=30.4%" + ] + }, + { + "cell_type": "markdown", + "id": "1ccd5d3f-98cd-45a8-951f-d6446062addc", + "metadata": { + "id": "1ccd5d3f-98cd-45a8-951f-d6446062addc" + }, + "source": [ + "Results are nearly the same. In this setup, switching to contextual embeddings didn’t yield performance gains for XGBoost." + ] + }, + { + "cell_type": "markdown", + "id": "4db1051d-9a7e-4cec-87fc-0d77fd858ced", + "metadata": { + "id": "4db1051d-9a7e-4cec-87fc-0d77fd858ced" + }, + "source": [ + "## 🚰 Retrieval-Augmented Pipeline – GPT-4o Mini\n", + "\n", + "- Preprocess: clean the input text (description(item))\n", + "- Embed: generate embedding vector (get_embedding(item))\n", + "- Retrieve: find similar items from ChromaDB (find_similar_items)\n", + "- Build Prompt: create the LLM prompt using context and masked target (build_messages)\n", + "- Predict: get price estimate from LLM (estimate_price)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "YPLxSn7eHp9N", + "metadata": { + "id": "YPLxSn7eHp9N" + }, + "outputs": [], + "source": [ + "test[1]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "eFxFKNroNiyD", + "metadata": { + "id": "eFxFKNroNiyD" + }, + "outputs": [], + "source": [ + "# Step 1: Preprocess test item text\n", + "# (uses the same `description(item)` function as during training)\n", + "description(test[1])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "lxIEtSWYHqCT", + "metadata": { + "id": "lxIEtSWYHqCT" + }, + "outputs": [], + "source": [ + "# Step 2: Embed a test item\n", + "def get_embedding(item):\n", + " return model_embedding.encode([description(item)])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "y43prQsuHp_w", + "metadata": { + "id": "y43prQsuHp_w" + }, + "outputs": [], + "source": [ + "# Step 3: Query Chroma for similar items\n", + "def find_similars(item):\n", + " results = collection.query(query_embeddings=get_embedding(item).astype(float).tolist(), n_results=5)\n", + " documents = results['documents'][0][:]\n", + " prices = [m['price'] for m in results['metadatas'][0][:]]\n", + " return documents, prices" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "nxAOUFRkHp6v", + "metadata": { + "id": "nxAOUFRkHp6v" + }, + "outputs": [], + "source": [ + "documents, prices = find_similars(test[1])\n", + "documents, prices" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "djPoSk6sHo84", + "metadata": { + "id": "djPoSk6sHo84" + }, + "outputs": [], + "source": [ + "# Step 4: Format similar items as context\n", + "def format_context(similars, prices):\n", + " message = \"To provide some context, here are some other items that might be similar to the item you need to estimate.\\n\\n\"\n", + " for similar, price in zip(similars, prices):\n", + " message += f\"Potentially related product:\\n{similar}\\nPrice is ${price:.2f}\\n\\n\"\n", + " return message" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "F3yxhnqSHp4C", + "metadata": { + "id": "F3yxhnqSHp4C" + }, + "outputs": [], + "source": [ + "print(format_context(documents, prices))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "pEJobsKNHqE8", + "metadata": { + "id": "pEJobsKNHqE8" + }, + "outputs": [], + "source": [ + "# Step 5: Mask the price in the test item\n", + "def mask_price_value(text):\n", + " return re.sub(r\"(\\n\\nPrice is \\$).*\", r\"\\1\", text)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "vLhBNVBNQAHS", + "metadata": { + "id": "vLhBNVBNQAHS" + }, + "outputs": [], + "source": [ + "# Step 6: Build LLM messages\n", + "def build_messages(datapoint, similars, prices):\n", + "\n", + " system_message = \"You estimate prices of items. Reply only with the price, no explanation.\"\n", + "\n", + " context = format_context(similars, prices)\n", + "\n", + " prompt = mask_price_value(datapoint[\"text\"])\n", + " prompt = prompt.replace(\" to the nearest dollar\", \"\").replace(\"\\n\\nPrice is $\", \"\")\n", + "\n", + " user_prompt = context + \"And now the question for you:\\n\\n\" + prompt\n", + "\n", + " return [\n", + " {\"role\": \"system\", \"content\": system_message},\n", + " {\"role\": \"user\", \"content\": user_prompt},\n", + " {\"role\": \"assistant\", \"content\": \"Price is $\"}\n", + " ]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "I94fNHfBHp1a", + "metadata": { + "id": "I94fNHfBHp1a" + }, + "outputs": [], + "source": [ + "build_messages(test[1], documents, prices)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5NfY_GAVHpy4", + "metadata": { + "id": "5NfY_GAVHpy4" + }, + "outputs": [], + "source": [ + "# Step 7: Run prediction\n", + "def get_price(s):\n", + " s = s.replace('$','').replace(',','')\n", + " match = re.search(r\"[-+]?\\d*\\.\\d+|\\d+\", s)\n", + " return float(match.group()) if match else 0\n", + "\n", + "def gpt_4o_mini_rag(item):\n", + " documents, prices = find_similars(item)\n", + " response = openai.chat.completions.create(\n", + " model=\"gpt-4o-mini\",\n", + " messages=build_messages(item, documents, prices),\n", + " seed=42,\n", + " max_tokens=5\n", + " )\n", + " reply = response.choices[0].message.content\n", + " return get_price(reply)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "Pg-GJTT0HpwV", + "metadata": { + "id": "Pg-GJTT0HpwV" + }, + "outputs": [], + "source": [ + "print(test[1][\"price\"])\n", + "print(gpt_4o_mini_rag(test[1]))" + ] + }, + { + "cell_type": "markdown", + "id": "54103ab4-d6dd-4c0b-add5-5d9741e934b4", + "metadata": { + "id": "54103ab4-d6dd-4c0b-add5-5d9741e934b4" + }, + "source": [ + "🔔 Reminder: In Part 2, GPT-4o Mini (without RAG) achieved:\n", + "- Avg. Error: ~$99\n", + "- RMSLE: 0.75\n", + "- Accuracy: 44.8%\n", + "\n", + "🧪 Let’s find out if RAG can boost GPT-4o Mini’s price prediction capabilities.\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "r0NGJupwHppF", + "metadata": { + "id": "r0NGJupwHppF" + }, + "outputs": [], + "source": [ + "Tester.test(gpt_4o_mini_rag, test)" + ] + }, + { + "cell_type": "markdown", + "id": "00545880-d9e1-4934-8008-b62c105d177b", + "metadata": { + "id": "00545880-d9e1-4934-8008-b62c105d177b" + }, + "source": [ + "Gpt 4O Mini Rag Error=$59.54 RMSLE=0.42 Hits=69.2%" + ] + }, + { + "cell_type": "markdown", + "id": "2b9f46ae-92b5-4189-89b0-df88a600bb89", + "metadata": { + "id": "2b9f46ae-92b5-4189-89b0-df88a600bb89" + }, + "source": [ + "🎉 **GPT-4o Mini + RAG shows clear gains:** \n", + "Average error dropped from **$99 → $59.54**, RMSLE from **0.75 → 0.42**, and accuracy rose from **48.8% → 69.2%**. \n", + "\n", + "Adding retrieval-based context led to a strong performance boost for GPT-4o Mini.\n", + "\n", + "Now the question is — can fine-tuning push it even further, surpass RAG, and challenge larger models?\n", + "\n", + "🔜 See you in the [next notebook](https://github.com/lisekarimi/lexo/blob/main/09_part4_ft_gpt4omini.ipynb)" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "gpuType": "A100", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.11" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/week6/community-contributions/lisekarimi/09_part4_ft_gpt4omini.ipynb b/week6/community-contributions/lisekarimi/09_part4_ft_gpt4omini.ipynb new file mode 100644 index 0000000..84ca7e6 --- /dev/null +++ b/week6/community-contributions/lisekarimi/09_part4_ft_gpt4omini.ipynb @@ -0,0 +1,510 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "12934dbc-ff4f-4dfc-8cc1-d92cc8826cf2", + "metadata": {}, + "source": [ + "# 🔍 Predicting Item Prices from Descriptions (Part 4)\n", + "---\n", + "- Data Curation & Preprocessing\n", + "- Model Benchmarking – Traditional ML vs LLMs\n", + "- E5 Embeddings & RAG\n", + "- ➡️ Fine-Tuning GPT-4o Mini\n", + "- Evaluating LLaMA 3.1 8B Quantized\n", + "- Fine-Tuning LLaMA 3.1 with QLoRA\n", + "- Evaluating Fine-Tuned LLaMA\n", + "- Summary & Leaderboard\n", + "\n", + "---\n", + "\n", + "# 🔧 Part 4: Fine-Tuning GPT-4o Mini\n", + "\n", + "- 🧑‍💻 Skill Level: Advanced\n", + "- ⚙️ Hardware: ✅ CPU is sufficient — no GPU required\n", + "- 🛠️ Requirements: 🔑 HF Token, Open API Key, wandb API Key\n", + "- Tasks:\n", + " - Convert chat data to .jsonl format for OpenAI\n", + " - Fine-tune the model and monitor with Weights & Biases\n", + " - Test the fine-tuned GPT-4o Mini \n", + "\n", + "Can fine-tuning GPT-4o Mini outperform both its zero-shot baseline and RAG-enhanced version? \n", + "Time to find out.\n", + "\n", + "---\n", + "📢 Find more LLM notebooks on my [GitHub repository](https://github.com/lisekarimi/lexo)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5809630f-d3ea-41df-86ec-9cbf59a46f5c", + "metadata": {}, + "outputs": [], + "source": [ + "# imports\n", + "\n", + "import os\n", + "import importlib\n", + "import json\n", + "import re\n", + "from dotenv import load_dotenv\n", + "from huggingface_hub import login\n", + "from datasets import load_dataset\n", + "from openai import OpenAI" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4120c84d-c310-4d31-9e1f-1549ea4a4186", + "metadata": {}, + "outputs": [], + "source": [ + "load_dotenv(override=True)\n", + "\n", + "openai_api_key = os.getenv('OPENAI_API_KEY')\n", + "if not openai_api_key:\n", + " print(\"❌ OPENAI_API_KEY is missing\")\n", + "\n", + "openai = OpenAI(api_key=openai_api_key)\n", + "\n", + "hf_token = os.getenv('HF_TOKEN')\n", + "if not hf_token:\n", + " print(\"❌ HF_TOKEN is missing\")\n", + "\n", + "login(hf_token, add_to_git_credential=True)" + ] + }, + { + "cell_type": "markdown", + "id": "31d3aa97-68a8-4f71-a43f-107f7c8553c5", + "metadata": {}, + "source": [ + "## 📥 Load Dataset" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f2bae96a", + "metadata": {}, + "outputs": [], + "source": [ + "# #If you face NotImplementedError: Loading a dataset cached in a LocalFileSystem is not supported run:\n", + "# %pip install -U datasets" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c45e23d6-1304-4859-81f0-35a9ddf1c755", + "metadata": {}, + "outputs": [], + "source": [ + "HF_USER = \"lisekarimi\"\n", + "DATASET_NAME = f\"{HF_USER}/pricer-data\"\n", + "\n", + "dataset = load_dataset(DATASET_NAME)\n", + "train = dataset['train']\n", + "test = dataset['test']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "667adda8-add8-41b6-9e60-7870bad20c02", + "metadata": {}, + "outputs": [], + "source": [ + "test[0]" + ] + }, + { + "cell_type": "markdown", + "id": "b85d86d0-b6b1-49cd-9ef0-9214c1267199", + "metadata": {}, + "source": [ + "## 🛠️ Step 1 : Data Preparation" + ] + }, + { + "cell_type": "markdown", + "id": "d3ba760d-467a-4cd9-8d3f-e6ce84273610", + "metadata": {}, + "source": [ + "To fine-tune GPT-4o-mini, OpenAI requires training data in **.jsonl format**. \n", + "\n", + "`make_jsonl` converts our chat data :\n", + "\n", + "from \n", + "\n", + "[\n", + " {\"role\": \"system\", \"content\": \"You estimate prices of items. Reply only with the price, no explanation\"},\n", + " {\"role\": \"user\", \"content\": \"How much is this laptop worth?\"},\n", + " {\"role\": \"assistant\", \"content\": \"Price is $999.00\"}\n", + "]\n", + "\n", + "into the .jsonl format \n", + "\n", + "{\"messages\": [{\"role\": \"system\", \"content\": \"You estimate prices of items. Reply only with the price, no explanation\"}, {\"role\": \"user\", \"content\": \"How much is this laptop worth?\"}, {\"role\": \"assistant\", \"content\": \"Price is $999.00\"}]}\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ec254755-67f6-4676-b67f-c1376ea00124", + "metadata": {}, + "outputs": [], + "source": [ + "# Mask the price in the test item\n", + "def mask_price_value(text):\n", + " return re.sub(r\"(\\n\\nPrice is \\$).*\", r\"\\1\", text)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e5e51957-b0ec-49f9-ae70-74771a101756", + "metadata": {}, + "outputs": [], + "source": [ + "def messages_for(datapoint):\n", + " system_message = \"You estimate prices of items. Reply only with the price, no explanation\"\n", + " user_prompt = mask_price_value(datapoint[\"text\"]).replace(\" to the nearest dollar\", \"\").replace(\"\\n\\nPrice is $\",\"\")\n", + " assistant_response = f\"Price is ${datapoint['price']:.2f}\"\n", + " return [\n", + " {\"role\": \"system\", \"content\": system_message},\n", + " {\"role\": \"user\", \"content\": user_prompt},\n", + " {\"role\": \"assistant\", \"content\": assistant_response}\n", + " ]\n", + "\n", + "messages_for(train[0])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "03583d32-b0f2-44c0-820e-62c8e7e48247", + "metadata": {}, + "outputs": [], + "source": [ + "def make_jsonl(datapoints):\n", + " result = \"\"\n", + " for datapoint in datapoints:\n", + " messages = messages_for(datapoint)\n", + " messages_str = json.dumps(messages, ensure_ascii=False)\n", + " result += '{\"messages\": ' + messages_str + '}\\n'\n", + " return result.strip()\n", + "\n", + "make_jsonl(train.select([0]))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "36c9cf60-0bcb-44cb-8df6-ff2ed4110cd2", + "metadata": {}, + "outputs": [], + "source": [ + "ft_train = train.select(range(100))\n", + "ft_validation = train.select(range(100, 150))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "494eaecd-ae5d-4396-b694-6faf88fb7fd6", + "metadata": {}, + "outputs": [], + "source": [ + "# Convert the items into jsonl and write them to a file\n", + "\n", + "def write_jsonl(datapoints, filename):\n", + " with open(filename, \"w\", encoding=\"utf-8\") as f:\n", + " jsonl = make_jsonl(datapoints)\n", + " f.write(jsonl)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ae42986d-ab02-4a11-aa0c-ede9c63ec7a2", + "metadata": {}, + "outputs": [], + "source": [ + "write_jsonl(ft_train, \"data/ft_train.jsonl\")\n", + "write_jsonl(ft_validation, \"data/ft_val.jsonl\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b9bed22d-73ad-4820-a983-cbdccd8dbbc8", + "metadata": {}, + "outputs": [], + "source": [ + "with open(\"data/ft_train.jsonl\", \"rb\") as f:\n", + " train_file = openai.files.create(file=f, purpose=\"fine-tune\")\n", + "with open(\"data/ft_val.jsonl\", \"rb\") as f:\n", + " validation_file = openai.files.create(file=f, purpose=\"fine-tune\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1e6c6ce8-6600-4068-9ec5-32c6428ce9ea", + "metadata": {}, + "outputs": [], + "source": [ + "train_file" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "26943fad-4301-4bb4-97e8-be52a9743322", + "metadata": {}, + "outputs": [], + "source": [ + "validation_file" + ] + }, + { + "cell_type": "markdown", + "id": "edb0a3ec-1607-4c5b-ab06-852f951cae8b", + "metadata": {}, + "source": [ + "## 🚀 Step 2: Run Fine-Tuning & Monitor with wandb\n", + "We will use https://wandb.ai to monitor the training runs\n", + "\n", + "1- Create an API key in wandb\n", + "\n", + "2- Add this key in OpenAI dashboard https://platform.openai.com/account/organization" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "59f552fe-5e80-4742-94a8-5492556a6543", + "metadata": {}, + "outputs": [], + "source": [ + "wandb_integration = {\"type\": \"wandb\", \"wandb\": {\"project\": \"gpt-pricer\"}}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "144088d7-7c30-439a-9282-1e6096c181ea", + "metadata": {}, + "outputs": [], + "source": [ + "# Run the fine tuning\n", + "\n", + "openai.fine_tuning.jobs.create(\n", + " training_file=train_file.id,\n", + " validation_file=validation_file.id,\n", + " model=\"gpt-4o-mini-2024-07-18\",\n", + " seed=42,\n", + " hyperparameters={\"n_epochs\": 1},\n", + " integrations = [wandb_integration],\n", + " suffix=\"pricer\"\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "330e75f5-0208-4c74-8dd3-07bc06047b2e", + "metadata": {}, + "outputs": [], + "source": [ + "job_id = openai.fine_tuning.jobs.list(limit=1).data[0].id\n", + "job_id\n", + "\n", + "# Then check your wandb dashboard to view the run of this job ID" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4a92dac5-e6d8-439c-b55e-507becb37a6c", + "metadata": {}, + "outputs": [], + "source": [ + "# Use this command to track the fine-tuning progress here\n", + "\n", + "openai.fine_tuning.jobs.list_events(fine_tuning_job_id=job_id, limit=2).data" + ] + }, + { + "cell_type": "markdown", + "id": "b6b65677-06b2-47d3-b0e6-51210a3d832b", + "metadata": {}, + "source": [ + "# 📧 You’ll get an email once fine-tuning is complete. ☕ You can take a break until then. ▶️ Once you receive it, run the cells below to continue." + ] + }, + { + "cell_type": "markdown", + "id": "0a7af4be-0b55-4654-af7a-f47485babc52", + "metadata": {}, + "source": [ + "## Step 3 : Test the fine tuned model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c8497eb8-49ee-4a05-9e51-fc1b4b2b41d4", + "metadata": {}, + "outputs": [], + "source": [ + "ft_model_name = openai.fine_tuning.jobs.retrieve(job_id).fine_tuned_model\n", + "ft_model_name" + ] + }, + { + "cell_type": "markdown", + "id": "12bed33f-be31-4d7c-8651-3f267c529304", + "metadata": {}, + "source": [ + "You can find the entire fine-tuning process in the **Fine-tuning** dashboard on OpenAI.\n", + "\n", + "![Fine-tuning Process](https://github.com/lisekarimi/lexo/blob/main/assets/09_ft_gpt4omini.png?raw=true)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac6a89ef-f982-457a-bad7-bd84b6132a07", + "metadata": {}, + "outputs": [], + "source": [ + "# Build LLM messages\n", + "def build_messages(datapoint):\n", + " system_message = \"You estimate prices of items. Reply only with the price, no explanation\"\n", + " user_prompt = mask_price_value(datapoint[\"text\"]).replace(\" to the nearest dollar\", \"\").replace(\"\\n\\nPrice is $\",\"\")\n", + " return [\n", + " {\"role\": \"system\", \"content\": system_message},\n", + " {\"role\": \"user\", \"content\": user_prompt},\n", + " {\"role\": \"assistant\", \"content\": \"Price is $\"}\n", + " ]\n", + "\n", + "def get_price(s):\n", + " s = s.replace('$','').replace(',','')\n", + " match = re.search(r\"[-+]?\\d*\\.\\d+|\\d+\", s)\n", + " return float(match.group()) if match else 0\n", + "\n", + "def gpt_ft(datapoint):\n", + " response = openai.chat.completions.create(\n", + " model=ft_model_name,\n", + " messages=build_messages(datapoint),\n", + " seed=42,\n", + " max_tokens=7\n", + " )\n", + " reply = response.choices[0].message.content\n", + " return get_price(reply)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "93a93017-458c-4769-b81c-b2dad2af7552", + "metadata": {}, + "outputs": [], + "source": [ + "print(test[0][\"price\"])\n", + "print(gpt_ft(test[0]))" + ] + }, + { + "cell_type": "markdown", + "id": "87a5ad10-ed60-4533-ad61-225ceb847e6c", + "metadata": {}, + "source": [ + "🔔 **Reminder:** \n", + "- In **Part 2**, GPT-4o Mini (zero-shot) scored: \n", + " Avg. Error: ~$99 | RMSLE: 0.75 | Accuracy: 44.8% \n", + "\n", + "- In **Part 3**, with **RAG**, performance improved to: \n", + " Avg. Error: ~$59.54 | RMSLE: 0.42 | Accuracy: 69.2%\n", + "\n", + "🧪 **Now it’s time to see** if fine-tuning can push GPT-4o Mini even further and outperform both baselines." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0adf1500-9cc7-491a-9ea6-88932af85dca", + "metadata": {}, + "outputs": [], + "source": [ + "import helpers.testing\n", + "importlib.reload(helpers.testing)\n", + "\n", + "from helpers.testing import Tester # noqa: E402\n", + "\n", + "tester = Tester(gpt_ft, test)\n", + "tester.run()" + ] + }, + { + "cell_type": "markdown", + "id": "37439666", + "metadata": {}, + "source": [ + "Gpt Ft Error=$129.16 RMSLE=0.94 Hits=35.2%" + ] + }, + { + "cell_type": "markdown", + "id": "5487da30-e1a8-4db5-bf17-80bc4f109524", + "metadata": {}, + "source": [ + "**Fine-tuning GPT-4o Mini led to worse performance than both its zero-shot and RAG-enhanced versions.**\n", + "\n", + "⚠️ When Fine-Tuning Isn’t Needed:\n", + "- For tasks like price prediction, GPT-4o performs well with prompting alone — thanks to strong pretraining and generalization.\n", + "- 💡 Fine-tuning isn’t always better. Use it when prompting fails — not by default.\n", + "\n", + "✅ **When Fine-Tuning Is Worth It (based on OpenAI’s own guidelines)**\n", + "- Custom tone/style – e.g., mimicking a brand voice or writing like a specific author\n", + "- More consistent output – e.g., always following a strict format\n", + "- Fix prompt failures – e.g., when multi-step instructions get ignored\n", + "- Handle edge cases – e.g., rare product types or weird inputs\n", + "- Teach new tasks – e.g., estimating prices in a custom format no model has seen before\n", + "\n", + "---\n", + "\n", + "Now that we’ve explored both frontier closed-source models and traditional ML, it’s time to turn to open-source.\n", + "\n", + "🚀 **Next up: Fine-tuned LLaMA 3.1 8B (quantized)** — can it beat its base version, outperform GPT-4o Mini, or even challenge the big players?\n", + "\n", + "🔍 Let’s find out in the [next notebook](https://github.com/lisekarimi/lexo/blob/main/09_part5_llama31_8b_quant.ipynb)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.7" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/week6/community-contributions/lisekarimi/data/human_output.csv b/week6/community-contributions/lisekarimi/data/human_output.csv new file mode 100644 index 0000000..e516273 --- /dev/null +++ b/week6/community-contributions/lisekarimi/data/human_output.csv @@ -0,0 +1,1500 @@ +"How much does this cost to the nearest dollar? + +OEM AC Compressor w/A/C Repair Kit For Ford F150 F-150 V8 & Lincoln Mark LT 2007 2008 - BuyAutoParts NEW +As one of the world's largest automotive parts suppliers, our parts are trusted every day by mechanics and vehicle owners worldwide. This A/C Compressor and Components Kit is manufactured and tested to the strictest OE standards for unparalleled performance. Built for trouble-free ownership and 100% visually inspected and quality tested, this A/C Compressor and Components Kit is backed by our 100% satisfaction guarantee. Guaranteed Exact Fit for easy installation 100% BRAND NEW, premium ISO/TS 16949 quality - tested to meet or exceed OEM specifications Engineered for superior durability, backed by industry-leading unlimited-mileage warranty Included in this K + +Price is $",120 +"How much does this cost to the nearest dollar? + +Motorcraft YB3125 Fan Clutch +Motorcraft YB3125 Fan Clutch Package Dimensions 25.146 cms (L) x 20.066 cms (W) x 15.494 cms (H) Package Quantity 1 Product Type Auto Part Country Of Origin China Manufacturer Motorcraft, Brand Motorcraft, Model Fan Clutch, Weight 5 pounds, Dimensions 10 x 7.63 x 6.25 inches, Country of Origin China, model number Exterior Painted, Manufacturer Part Rank Automotive Automotive Replacement Engine Fan Clutches 583, Domestic Shipping can be shipped within U.S., International Shipping This item can be shipped to select countries outside of the U.S. Learn More, Available October 10, 2007 + +Price is $",80 +"How much does this cost to the nearest dollar? + +Dorman Front Washer Fluid Reservoir Compatible with Select Ford/Lincoln/Mercury Models +This washer fluid reservoir is designed to match the fit and function of the original equipment reservoir. It is engineered to withstand the stresses of underhood heat and engine vibration on specified vehicle makes, models, and years. This part is compatible with the following vehicles. Before purchasing, enter your vehicle trim in the garage tool to confirm fitment. Ford Explorer 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 - Lincoln Aviator 2003, 2004, 2005 - Mercury Mountaineer 2002, 2003, 2004, 2005, + +Price is $",35 +"How much does this cost to the nearest dollar? + +HP Premium HD Plus Touchscreen 1TB HDD 2.3GHz AMD Ryzen 5, (12GB RAM, Ryzen 5 4500U, DVD Writer, Windows 10 Home) Natural Silver, (Renewed) +This pre-owned or refurbished product has been professionally inspected and tested to work and look like new. How a product becomes part of Amazon Renewed, your destination for pre-owned, refurbished products A customer buys a new product and returns it or trades it in for a newer or different model. That product is inspected and tested to work and look like new by Amazon-qualified suppliers. Then, the product is sold as an Amazon Renewed product on Amazon. If not satisfied with the purchase, renewed products are eligible for replacement or refund under the Amazon Renewed Guarantee. + +Price is $",350 +"How much does this cost to the nearest dollar? + +Super Switch Pickup Selector Super Switch 4-Pole Double Wafer for Strat/Nashville Tele Guitars with Black/Ivory/White Tips +Dopro Super Switch Pickup Selector Super Switch 4-Pole Double Wafer for Strat/Nashville Tele Guitars with Black/Ivory/White Tips Package includes 3 free tips which normally sold separately Five-position blade pickup selector switch ideal for four-conductor pickups. Used on American made Fat Strat and Double Fat Strat models, and on Nashville Telecaster models. Mounting screws included. Mounting screws and 35mm cavity depth required 1-5/8 standard mounting screw spacing Please consult your local Luthier if you don't know how to install the switch Dimensions 5.47 x 4.29 x 0 + +Price is $",75 +"How much does this cost to the nearest dollar? + +Horror Bookmarks, Resin Horror Bookmarks for Adults, The Best Gift for Fans of Horror Novels, Horror Personalized Bookmarks for Men Women, Horror Figures Sculpture Bookmarks (Set) +Horror Bookmarks - The Best Gift for Fans of Horror Novels, Resin Horror Bookmarks for Adults, Half-Length Figure Sculpture Bookmarks, Horror Bookmark Set for Men Women Office Supplies Specification Upper Part Material Resin Lower Part Of Material Wood Character Size 3.3cm x 2cm / x Overall Size 10cm / 4inch Package Includes 1 / 6 x Horror bookmarks - the best gift for fans of horror novels Note 1. Due to manual measurements, please allow slight measurement deviations. 2. Due to the different display and lighting effects, the + +Price is $",12 +"How much does this cost to the nearest dollar? + +SK6241 - Stinger 4 Gauge 6000 Series Power Amplifier Installation Kit +Amplifier installation kits can save you time & money when installing an amplifier in your vehicle. Instead of purchasing everything you need separately like power/ground cables, remote turn-on cable, fuse blocks, fuses, and more, you can get an amp kit that already has the cables & accessories in one package. Amplifier wiring kits come in various gauge configurations depending on the wattage of your sound system and include all the necessary components you need to successfully install an amplifier. This Stinger 4 AWG 6000 Series Power Amplifier Wiring Kit includes all the necessary hardware and wire to power one amplifier and is designed specifically for car audio systems up to 1750 watts. The included premium power + +Price is $",115 +"How much does this cost to the nearest dollar? + +Godox ML60Bi LED Light Kit, Handheld LED Video Light, Bi-Color LED Light, CRI 96+ TLCI 97+, 7 FX Effects, Slient Mode, with Softbox, RC-A6 Remote Control, 2X NP970 Lithium Battery +🌸Breathtakingly powerful and portable 10100 Lux @ 1 m with the included reflector. Weighing just 0.77 kg (lightweight housing only), the ML60Bi is extremely easy to hold and carry. In relation to its small size, it produces an impressively high output and thus offers you more flexible creativity for your productions. 🌸Variable color temperature The wide color temperature range from 2800K to 6500K allows you to quickly + +Price is $",100 +"How much does this cost to the nearest dollar? + +Randall G3 Plus Combo Guitar Amp +The Randall G3 is a combo amplifier that brings even more intensity and aggressiveness to your guitar playing with a redesigned preamp circuit, a 12 speaker, and digital effects. Randall took the high gain tone circuit of their flagship V2 and T2 guitar amplifiers and installed it in each of the G3 Plus amps. What you get is higher professional level tone and performance without the costs of a high-end amp. EQ Controls - Bass, Middle, Treble, Sweep, Voicing Master Control - Volume Power Output - 100 Watts @ 4 Ohms Single Speaker - 12 Celestion Seventy 80 2 Mode - Weight 51 Pounds, Dimensions 13.5 x 27 x 20.5 inches, + +Price is $",300 +"How much does this cost to the nearest dollar? + +HOLDWILL 6 Pack LED Shop Light, 4FT 24W 6500K, Cool White, Clear Cover, Hight Output, Linkable 4 Foot LED Strip Lights, T8 LED Tube Lights, LED Light Fixture for Garage Ceiling with Plug Cable +Specification Length Power 24W Lumen 2640lm CCT 6500K Beam Angle 120 degrees Luminous Flux 110lm per watt LED chips 120pcs Color Rendering Index(CRI) 85 Operation Temperature °F Lamp Power Factor 0.95 Input Voltage Material Top quality Aluminum & superior PC BRIGHT - HOLDWILL shop light bring your application the best lighting available with an incredible minimum of Illuminate the dark corners of your space SAVING - Each led under cabinet light has 24w + +Price is $",80 +"How much does this cost to the nearest dollar? + +Viking Horns 3 Gallon Air Tank and 200 PSI Air Compressor Kit, for Train Air Horn +New 3 gallon (12 liter) Air Tank & 200 PSI H.D Air Compressor Kit. For high pressure air horns systems that require an on-board air system. Air tank made from heavy gauge steel for outdoor use. Compact design and weight. Comes as a kit, ready to install, with 240 psi Air Gauge, 200 psi Air Pressure Switch, Compression Fittings for 1/4 O.D air hose, air pressure release safety valve. Comes with a 200 PSI Heavy Duty Air Compressor with a 1/4 inch NPT braided air hose, and Mounting Hardware. Both, Air Tank & Compressor Kit come complete ready for installation + +Price is $",90 +"How much does this cost to the nearest dollar? + +CURT 70110 Custom Tow Bar Base Plate Brackets for Dinghy Towing, Fits Select Jeep Wrangler JK +CUSTOM FIT. This tow bar base plate is uniquely engineered to fit select years of the Jeep Wrangler JK. It bolts onto the frame, providing a reliable connection for flat towing (not compatible with universal tow bar) HIGHLY VERSATILE. Add versatile towing options to your vehicle with this class 1 trailer hitch. It provides a standard, square receiver hitch and comes with a ball mount with a ball hole to connect a small trailer SOLID STRENGTH. CURT towing base plates and tow bar brackets are constructed from high-strength steel and welded together with precision for dependable towing strength DOUBLE FINISH. This tow plate is finished in a tough shield of dual-co + +Price is $",120 +"How much does this cost to the nearest dollar? + +Solar HAMMERED BRONZE Finish Post Deck Fence Cap Lights for 4 X 4 WOOD Post With White LEDs and Vertical-lined Clear Lens -GREEN NATURAL SOLAR +During the day, these energy-efficient Atlantic Solars lights harness sunlight energy to charge their internal Lithium Battery. At night they automatically turn on, emitting a bright glow for your fence post line. Each Atlantic Solars light is made of Premium Heat-Resistant Plastic for years of use. With our Newest & Improved Solar Panels, a 3.2V Lithium Ion Battery, and 5 Ultra Bright White LEDs, its output is virtually unparalleled. Atlantic Solars 4 x 4 Fence Post Caps One Pre-Installed 3.2V Lithium-Ion Battery for each light Light source + +Price is $",300 +"How much does this cost to the nearest dollar? + +COSTWAY Electric Tumble Dryer, Sliver +This is our brand new compact dryer with 10 lbs. cloth capacity, it will be your best helper to dry your cloth or sheet in a short time, It has four mode air dry, cool, warm, and hot. You can choose the drying time or mode according to the material and weight of the cloth. This dryer combine the cooling, wrinkle, freshening, function, without taking up a lot of room will bring much convenience for your life. Don't hesitate to buy one! feature brand new and high quality 1. 5 cu. Ft. Capacity allows you to dry up to 10 lbs. Of clothing stainless steel tub provides durability see- through window lets you monitor clothes as they dry four mode air dry + +Price is $",450 +"How much does this cost to the nearest dollar? + +FREE SIGNAL TV Transit 32 12 Volt DC Powered LED Flat Screen HDTV for RV Camper and Mobile Use +Mobile High Performance 32 inch LED TV - Get HD picture quality from this superb DC-powered 12 volt television with 1366 x 768 resolution. Groundbreaking engineering results in a lightweight TV with dynamic audio response and advanced noise reduction circuitry. The Easy-to-Set-Up and Versatile RV Flat Screen TV - Simple 12V connection. 3 HDMI Inputs. Can also be used at home by converting to AC with the included 1260 Power Brick Adapter. Perfect Television for Campers, Trailers, RVs, and More - The Transit 32 inch 12 volt powered flat screen TV is also ideal for cars and trucks. With high-resolution picture from a + +Price is $",280 +"How much does this cost to the nearest dollar? + +Bilstein 5100 Monotube Gas Shock Set compatible with Jeep Cherokee XJ w/2-3 Lift +Bilstein 5100 shocks utilize a monotube design, with a 46mm digressive piston, to increase road handling characteristics both on and off-road. The single tube body allows for rapid heat transfer between the shock oil to the shock body, and then dissipated further to reduce shock fade. German engineering combined with US manufacturing ensures optimum performance and longevity for upwards of 100k miles (real customer feedback!). The 5100 series is more compliant in tougher situations and off-road terrain compared to the 4600 series. Factory spec vehicles should use the 4600 series shocks as the valving is more appropriate for daily driven vehicles and pavement environments. Part Numbers + +Price is $",140 +"How much does this cost to the nearest dollar? + +Sangean K-200 Multi-Function Upright AM/FM Digital Radio (Pink) +Product Description The K-200 from SANGEAN brings sleekness and uniqueness into a multi-media entertainment unit including the features from traditional alarm clock. The versatile and unique omni-directional speaker design fills your work place with high-performance sound quality that plays your music with crystal clear digital sound and deep bass for more powerful overall sound. The eye-catching night light with 8 brightness settings definitely gives a little brightness to your counter. From the Manufacturer Sangean's new sleek and unique K-200 AM / FM-RBDS Digital Tuning Kitchen Radio brings a multi-media entertainment center to your kitchen combines the features of a traditional alarm clock. The versatile and unique omni-directional speaker design fills your + +Price is $",19.99 +"How much does this cost to the nearest dollar? + +Charles Leonard Magnetic Lapboard Class Combo Pack, Includes 12 Each Plain/Plain 9 x 12 Inch White Boards, 2-Inch Erasers, Black Dry Erase Markers +Class pack provides an environmentally friendly way for children to learn. Dry-erase surface lets students write, wipe off and reuse with no waste. Classroom set includes magnetic lap boards that are dual-sided; dry-erase markers; and multipurpose erasers. ALL INCLUSIVE set has everything you need for the classroom! 12 Each Double Sided Magnetic Plain/Plain 9x12 lap boards. Individual dry erase boards are large enough to work on for math problems, English language arts, Penmanship or for drawing. Just write and wipe, it’s that easy SAFETY FIRST each set also includes 30 + +Price is $",85 +"How much does this cost to the nearest dollar? + +Gigabyte AMD Radeon HD 7870 2 GB GDDR5 Mini-Displayport PCI-Express 3.0 Graphic Card +Powered by AMD Radeon HD 7870 GPU and Integrated with the industry's best 2 GB GDDR5 memory and memory interface Ultra Durable VGA Components - GPU Temperature 5%-10% Down - Overclocing Capability Up - Power Switching Loss Down WINDFORCE 3X Anti-Turbulence Cooling with New Triangle Cool Technology Gold Plated HDMI for optimum signal transfer between connections Features mini-Display port outputs with HDCP protection Supports AMD Eyefinity/Eyespeed/CrossFire/Avivo HD Technologies Minimum Recommended Power Supply 500W or greater with 2x 6-pin VGA power connectors Max Screen Resolution 4096 x + +Price is $",450 +"How much does this cost to the nearest dollar? + +3dRose LLC 8 x 8 x 0.25 Inches Bull Terrier on Zebra Pattern Mouse Pad +Bull Terrier On Zebra Mouse Pad is 8 x 8 x.25 and is made of heavy-duty recycled rubber. Matte finish image will not fade or peel. Machine washable using a mild detergent and air dry. Dimensions (in inches) 8 W x 8 H x 0.25 D Matte finish Soft to touch, will not crack or peel Clean with mild detergent Made of heavy-duty recycled rubber Manufacturer 3D Rose (Home Improvement), Brand Weight 4.9 ounces, Dimensions 8 x 0.25 x 8 inches, model number Shape Square, Material Type Rubber, s 1, Size 8\ x + +Price is $",7 +"How much does this cost to the nearest dollar? + +ROKINON 85mm F1.4 Auto Focus Full Frame Weather Sealed High Speed Telephoto Lens for Nikon F Mount +Dslr camera lens For Nikon F Mount full-frame & APS-C DSLR cameras Aperture range f/1.4 to f/16 Ultra multi-coated Optics; weather-sealed Takes front filter size of 77mm Constructed of 9 elements in 7 groups Dimensions 2.9 x 3.2 x 3.2 inches, Weight 1.06 pounds, model number Rank SLR Camera Lenses 1944, Available April 18, 2019, Manufacturer Rokinon, Country of Origin Korea, Republic of, Brand Rokinon, Focal Length Description 85mm, Lens Type Telephoto + +Price is $",320 +"How much does this cost to the nearest dollar? + +Headlight Assembly Compatible with 2012 2013 2014 2015 Civic Sedan 4-Door 12 13 Civic Coupe 2-Door Black Housing Amber Reflector +Vehicle compatibility headlights assembly compatible with Civic Sedan 4-Door / Civic Coupe high beam mode 9005 and low beam mode 9006; bulbs are not included Waterproof fully sealed with solid silicon & designed with one-way vents to prevent moisture from being trapped inside the housing, no corrosion or moisture worries in sorts of weather conditions Brighter lighting the metallic parabolic reflectors provide more light output to create a broader and smoother beam Safety reflector the sided micro-prism reflector makes the oncoming cars or passerby quickly notice you at night, ensuring your driving safety and others' Impact resistance the + +Price is $",110 +"How much does this cost to the nearest dollar? + +ASI NAUTICAL 2.5 Inches Opera Glasses Binoculars for Adults with Handle- Captain's Mother of Pearl Solid Brass Opera Glasses Binoculars-Pocket Size Handel Binoculars for Kids, Bird Watching, Hunting +Thanks for Visiting Best Antique & Nautical Items store at amazon ASI NAUTICAL This 2.5 Captains Solid Brass Black & White Binocular By ASI NAUTICAL made with Antique Finish is best suitable to gift your loved ones. Easy to carry to any outdoor place like when you go to watch football match,Any historical Monuments,Visit Mountains,hills, for Hunting,Birds Watching,etc. Material Brass Magnification 10x Approx. Total Weight 0.19 kg Approx. Binocular Size 2.5 Inches, + +Price is $",65 +"How much does this cost to the nearest dollar? + +Behringer TUBE OVERDRIVE TO100 Authentic Tube-Sound Overdrive Effects Pedal +BEHRINGER TUBE OVERDRIVE TO100 Authentic Tube-Sound Overdrive Effects Pedal Get tube-like distortion, smooth sustain and super fat tone Get tube-like distortion, smooth sustain and super fat tone This BEHRINGER product has been designed to compete head to head with leading products on the market This BEHRINGER product has been designed to compete head to head with leading products on the market Captures every nuance of your playing from smooth overdrive to screaming tube sounds Captures every nuance of your playing from smooth overdrive to screaming tube sounds Dedicated Drive, Tone and Level controls for awesome sound shaping Dedicated Drive, Tone and Level controls for awesome sound shaping Status LED for effect on/off and + +Price is $",185 +"How much does this cost to the nearest dollar? + +Fun Express Insect Finger Puppets - 24 finger puppet bugs for kids +You'll get an assortment of 24 insect finger puppets including bees, butterflies, ladybugs, dragonflies, and grasshoppers. Each plastic bug finger puppet is made of quality vinyl and measures 1 3/4 - 2 3/4. Plastic bugs and insects for kids make a fun and creative gift and can be used as party favors or decorations for your bug themed party! Adult supervision recommended for children under 3 years as small parts could be a choking hazard. 24 Insect Finger puppets for toddlers and children. Each finger puppet is made of vinyl and measures 1 3/4 - 2 3/4. You'll get an assortment of toy bugs and + +Price is $",6 +"How much does this cost to the nearest dollar? + +WAFJAMF Roller Stamp Identity Theft Stamp Perfect for Privacy Protection(Blue) +IDENTITY THEFT PROTECTION SOLUTION Paper can be recycled after using roller stamp, no need for a shredder. WIDE COVERAGE DESIGN The 1.26 inches wide roller is perfect for covering large swaths of private information in a quick and clean way. SAVE TIME Ink quickly dries. Stamp works well on all regular paper, envelopes and package addresses. One swipe and the info is covered, no need to go for a shredder. UNIQUE DESIGN FOR PRIVACY PROTECTION Compact design with CONFIDENTIAL letters, specially designed to obscure the text underneath it. Total length of stamp coverage can reach 50 meters. Dimensions 2.72 x 1.89 x 1.38 inches, Weight + +Price is $",33 +"How much does this cost to the nearest dollar? + +Capulina Tiffany Floor Lamp 16 Wide Stained Glass Dragonfly Antique Style Standing Reading Light for Living Room Bedroom +Size and Weight lamp shade 16 inches wide and lamp post 63 inches Height;product total weight is 18lbs And the base heavy is 6.6lbs to ensure stability Real tiffany lamp shade lamp shade is handmade by skilled craftsmen,Each small piece of stained glass of the lampshade is spliced by copper foil method,never fade color,durable and beautiful Bulb Matching We can use Incandescent or CFL bulbs (bulbs are not included),different bulbs get different looking,recommanding to use Edison LED bulb. Decor living room bedroom When you light bulb up,tiffany lamp shade eallows the light to filter in but also softens the heat and + +Price is $",65 +"How much does this cost to the nearest dollar? + +Apple Watch Series 6 (GPS, 44mm) - Space Gray Aluminum Case with Black Sport Band (Renewed Premium) +Apple Watch Series 6 (GPS, 44mm) - Space Gray Aluminum Case with Black Sport Band LEAVE YOUR PHONE IN YOUR POCKET Apple Watch Series 6 GPS Model lets you call, text, and get directions from your wrist, while leaving your phone in your pocket. It offers multiple connectivity options, including Bluetooth, Wi-Fi, and NFC to suit all of your possible needs. ALWAYS-ON RETINA DISPLAY You no longer need to raise your wrist or touch the screen to see the time or other information on your watch face, because the display never sleeps. All you need to do is glance to find the time or your workout metrics right there where you + +Price is $",199 +"How much does this cost to the nearest dollar? + +ICON 01725 Tandem Axle Fender Skirt FS1724 for KZ - Cobalt Blue +Tandem axle fender skirt measures to 65-1/4 x 14. Constructed of durable high-impact ABS plastic. This replacement fender skirt is textured. The legs of these fender skirts curve underneath the trailer. Durable, high-impact ABS plastic Textured finish Quick and simple installation Color Cobalt Blue Size 65-1/4 x 14 Brand ICON, Color Regular, Exterior Finish Smooth, Material Acrylonitrile Butadiene Styrene, Dimensions LxWxH 14.5 x 5 x 2.9 inches, Style Modern, Auto Part Position Lower, Vehicle Service Type Trailer, Fit Type Universal + +Price is $",310 +"How much does this cost to the nearest dollar? + +SanDisk 128GB Ultra (10 Pack) MicroSD Class 10 Micro SDXC Memory Card for Smartphone Bundle with (1) GoRAM Reader 10 Pack) +Shoot and save more high-quality photos and full HD video on your Android smartphone or tablet with SanDisk Ultra microSD UHS-I cards. With storage capacities up to 128GB, they're the ideal complement for Android smartphones and tablets. And the SanDisk memory zone app, available on the Google play store, makes it easy to view, access, and back up all of your files from your phones memory in one convenient place. To help your smartphone run at its peak performance, This app can be set to automatically off-load files from your smartphones internal memory to your memory card. Bundle Includes (10x) 128GB + +Price is $",180 +"How much does this cost to the nearest dollar? + +Velvac - 715427 +2020 Mirror System, 2003 & Newer Ford E-Series Cutaway Standard Head, Black, Htd Remote Flat Glass, Wedge Convex, 102 Body Width, Left Side 2020 System, Ford E, 102 Body, Black, Left Side Htd Remote Flat Glass, Wedge Convex, Standard Head Model 2020 mirrors are designed specifically for wide body applications such as high cube cut away vans, rental trucks, Class C RV's and ambulances. The fixed length arms are designed to position the mirror beyond the body providing the driver with an unobstructed view of blind spots and passing lanes around the vehicle. These versatile mirrors are available in several body widths and finishes as well as manual or heated remote glass + +Price is $",110 +"How much does this cost to the nearest dollar? + +TCMT Passenger Backrest Sissy Bar & Luggage Rack Fits For Indian Scout Scout Sixty Scout ABS 2020 Scout 100th Anniversary Scout Sixty ABS +An Indian Scout passenger will ride with greater comfort and confidence with this Quick Release Passenger Sissy Bar. This sturdy backrest can be installed quickly and easily without tools once a set of Mounting Spools are installed on the Motorcycle. The Passenger Backrest locks securely onto the Mounting Spools and provides the passenger with strong, stable support and comfort. The backrest must be equipped with a Genuine Leather Backrest Pad. To add convenient cargo space, an accessory Chrome Backrest Luggage Rack can be added to the backrest. Fitment Fit For 2020 Scout 100th Anniversary Fit For Scout Sixty ABS Fit For + +Price is $",85 +"How much does this cost to the nearest dollar? + +Alnicov 63.5MM Brass Tremolo Block,Tremolo System Bridge,With Bar Block For Fender Strat Stratocaster Bridge +Description 1.Fits bridges with 2-1/16 E to E string pacing and 6 screw modifications install (detailed instructions included) Fits for MIM Fender Standard Series StratAmerican Special StratMIM Classic PlayerClassic Vibe StratAny Import Strat with 6 screw pivot mounting and 2 1/16 string spacing Specifications Tremlo block block dia size brassWeight 248g Package included 1Pcs tremolo bar wrench Durable Electric Guitar Bridge Tremolo Block High quality, easy to handle Sustain your guitar bridge Add mass and sustain to your bridge along with the tonal qualities of brass Since this block is larger + +Price is $",65 +"How much does this cost to the nearest dollar? + +Subaru Forester Outback Legacy OEM Engine Block Heater Genuine new +Manufactured from top quality components, this is your inexpensive replacement option for your rebuild, repair, and maintenance needs. When you select a genuine OEM part - you can rely on the high quality and effectiveness of the product and brand without having to guess if the product will work in sequence with your vehicle. Protecting your investment is important and choosing the right parts can be challenging. Stick with what you know and choose a genuine OEM part. Genuine Subaru Genuine Engine Block Heater Warms engine coolant to promote easier starting in extreme cold conditions. Plugs into a a household electrical outlet. Crosstrek Hybrid models Forester Outback 2.5 Legacy 2.5 Manufacturer Subaru, Brand Subaru, Weight 1 pounds, Dimensions + +Price is $",350 +"How much does this cost to the nearest dollar? + +Richmond Auto Upholstery - 2012 Dodge Ram 3500 Laramie Crew-Cab - Driver Side Bottom Replacement Perforated Leather Seat Cover Dark Gray +Our OEM replacement leather seat covers are guaranteed to match your vehicles interior! Richmond Auto Upholstery has been manufacturing automotive seat covers for over 30 years and only specializing in original factory replacement leather covers & much more! If you cannot find what you need for your Dodge Ram then please give us a call at (281) with your vehicles information!To ensure you receive the correct cover, please send us your VIN Number & TRIM (Interior Trim) Code during check out or email it to us after you have made your purchase. If we do not receive this information within 24 hours we will send an email requesting the information + +Price is $",260 +"How much does this cost to the nearest dollar? + +AP-39 Automotive Paint Primer Grey 2K Urethane Gallon Kit Normal Activator +Automotive paint primer sealer applied as a high build sanding primer or final non-sanding primer sealer. Compatible with AF 970 Black Automotive Base coat certified to be among the Deepest Black Base coats in the market. Not for sale in California, Delaware, and Maryland. Direct to Metal Excellent Filling Properties; Superior Color Holdout Easy Spray and Sanding Shipped by UPS ground only. No overnight shipping. The material is considered hazardous and cannot be returned. Not for sale in California, Delaware, and Maryland. Brand enenfeifei, Color Grey, Size 2 Piece Set, Volume 1 Gallons, Special Feature Not for sale in California, Delaware, and Maryland + +Price is $",200 +"How much does this cost to the nearest dollar? + +Road Top Wireless Carplay Retrofit Kit Decoder for BMW i3 I01 NBT System Year, Support Android Auto, Mirrorlink, Reverse Camera, Original Car Knob Control +Pre-shopping Notes When you buy, please check our website picture to make sure your car system is right. This Wireless Carplay Fits for BMW i3 I01 NBT System Not fit for EVO system. Wireless/Wired Apple Carplay It can work with Siri/ Maps/ Music/ Phone Call. Built-in mic for Siri function and Bluetooth call, use Maps(Support Google Waze and sygic map, etc), listen to your favorite songs using iTunes, Apple Music or other app and access to messages. Keep your original car knob and steering wheel control. Wireless/Wired Android Auto Use wireless or wired connection ( + +Price is $",95 +"How much does this cost to the nearest dollar? + +Gibson Performance Exhaust 5658 Aluminized Dual Extreme Cat-Back Performance Exhaust System +For the extremist who wants to take their truck to the next level, this dual bolt-on Cat back system is for you. This system exists behind the rear tires at an aggressive angle with a powerful exhaust tone. You will gain bold street looks with powerful dyno tuned and tested street performance gains. You can expect to experience gains on average of 15-20 horsepower. Gibson muffler provides a mean performance sound and complemented with polished Stainless Steel Tip. Easy bolt-on installation. No welding required. Backed by a Lifetime Limited Warranty. If you want Extreme, this system is it. 3 inch aluminized mandrel bent tubing Gibson muffler features a baffled and chambered design, + +Price is $",499 +"How much does this cost to the nearest dollar? + +Bella Tunno Happy Links - Baby Montessori Silicone Links & Soft Silicone Baby Toys, Developmental Toys for Playing, Teething, Gross Motor Skills, Color Recognition & More, Navy, SL07 +Introducing your new favorite product. Is it a toy? A teether? A link? Yes, it is. It’s pretty much magic what our Happy Links can do. We took the classic link and made it into the product we wish we had for our littles. Generously sized, easy to grip and wrapped in food-grade silicone, our links relieve little gums, keep toys and lovies attached and keeps kids entertained. Our Happy Links set includes 5 links for teething playing and organizing. Attach them to the stroller, rocker, play gym, or carrier to bring + +Price is $",18 +"How much does this cost to the nearest dollar? + +CANMORE H300 Handheld GPS Golf Device, Shot Distance Yardage Measuring, 40000+ Free Worldwide Preloaded Courses, Lightweight Golf Accessory for Golfers, Powerful Magnetic Clip for Golf Cart, Orange +WORLDWIDE COURSE DATA - Free course data preloaded for over 40,000 (and counting) golf courses around the world (NO subscription fees) - Contact Canmore to add new courses or suggest fixes. ***Notice Golf courses may change layout over time, when detected, the device will display “Incorrect Hole” and require course update. Please visit the CANMORE website for updates and new course information. ESSENTIAL FUNCTIONS YOU NEED - Manage your game, not your golf assistant! GPS course finder switches hole automatically and gives you easy-to-access distance to green ( + +Price is $",299 +"How much does this cost to the nearest dollar? + +DCPOWER AC Adapter Compatible Replacement for KORG PS60 PS-60 61-Key Portable Performance Synthesizer +New aftermarket, custom-made item (NON-OEM/NON-Original Equipment Manufacturer). Auto-Switching adapter can be used in the worldwide. Returns accepted within 30 Days. Quantity 1 unit of adapter. Connector type Round Barrel/Round tip Can be used to power up the device Input AC for using in the worldwide Output 9V DC Dimensions 3 x 2 x 1.5 inches, Weight 6 Ounces, Rank Musical Instruments Keyboard Power Supplies 5234, Is Discontinued No, Available October 24, 2013, Manufacturer DCPOWER, Brand Generic, Connector Type barrel connector, Special Feature Portable, Input Voltage 240 Volts, + +Price is $",88 +"How much does this cost to the nearest dollar? + +Sharp, Commercial Desktop Calculator, LCD +Resume function lets you recall data after shut-off. Extra-large digits for excellent readability. Dual solar/battery power for use in any lighting. Resume function lets you recall data after shut-off. Extra-large digits for excellent readability. Dual solar/battery power for use in any lighting. Dimensions 7.2 x 5.1 x 1 inches, Weight 6.4 ounces, model number Batteries 1 CR2 batteries required. (included), Rank Office Products Basic Office Calculators 1027, Is Discontinued No, Available December 27, 2004, Manufacturer SHARP ELECTRONICS, Brand Sharp, Color Black, Calculator Type Business, Power Source Battery Powered, Batteries 1 CR2 batteries required. (included) + +Price is $",32 +"How much does this cost to the nearest dollar? + +Melissa & Doug Lifelike Plush Stork Giant Standing Stuffed Animal (3+ Feet Tall) +This lifelike plush stork really delivers! A terrific way to welcome a new baby and a great companion for years to come, this striking silky white stork with black wingtips and realistic details is sure to turn heads. Standing an impressive three-plus feet tall, this lifelike bird’s soft, squeezable body covered with silky feathers encourage hugs and cuddles, while quality construction and a strong interior structure keep it standing proudly for years to come. The included baby bib The stork wears proclaims “welcome baby”. the stories long bright orange legs stand on an oval two-foot-long base for extra stability. Kids’ imaginations are sure to take flight with this beautiful feathered + +Price is $",25 +"How much does this cost to the nearest dollar? + +Sony SSCS8 2-Way Center Channel Speaker with Bookshelf Speaker System and Subwoofer Bundle (3 Items) +Equipped with two 4 woofers and a 1 tweeter, the Sony SS-CS8 2-Way Center Channel Speaker handles 145W of peak power. The speaker's woofers use a mica-reinforced diaphragm, the upper surface of which is fashioned to deliver supple and faithful sound quality, while the bottom layer is designed to provide a powerful bass response. The cabinet of the SS-CS8 is made from wood, which is designed to provide a natural resonance, and its bass reflex construction will give directionality to the low frequencies. The speaker's crossover network is mounted directly to the cabinet for vibration isolation, which is intended + +Price is $",95 +"How much does this cost to the nearest dollar? + +ASUS Chromebook CX1, 14 Full HD NanoEdge Display, Intel Celeron N3350 Processor, 64GB eMMC, 4GB RAM, Chrome OS, Transparent Silver, +ASUS Chromebook CX1400 is made for boosting productivity and having more fun while on the move — all day, every day. This lightweight, ultraportable device is powered by Intel processor and gives you the freedom of up to battery life. The slim-bezel design fits more screen into the compact chassis for easy multitasking and incredibly immersive entertainment, and the device is your gateway to the best of Google, including the rich library of apps for work or play on the Google Play Store. With speedy performance, robust security and intuitive features, ASUS Chromebook CX1 is ideal for anyone + +Price is $",440 +"How much does this cost to the nearest dollar? + +FiiO X7 32GB Hi-Res Lossless Music Player, Titanium +FiiO X7 High Resolution Audio Player FiiO X7 High Resolution Audio Player- Currently supports Music Player function only. DAC and other features will be available through future firmware upgrade Dimensions 2.52 x 5.12 x 0.65 inches, Weight 7.8 ounces, model number FIIO X7, Rank Electronics MP3 & MP4 Players 2510, Is Discontinued No, OS Android 4.4.4, RAM 32 GB, Connectivity technologies Aux, Special features Hi Res Audio, Other display features Wireless, Color Titanium Blue, Manufacturer FiiO, Available November 30, 2015, Brand FiiO, Model Name X7, + +Price is $",60 +"How much does this cost to the nearest dollar? + +TORRO Leather Case Compatible with iPhone 14 – Genuine Leather Wallet Case/Cover with Card Holder and Stand Function (Red) +COMPATIBILITY – The TORRO leather iPhone 14 case with card holder is designed and crafted exclusively for iPhone 14. The precision fit ensures full, unrestricted access to the screen, camera, buttons and charging port. GENUINE LEATHER - TORRO are a UK company specialising in luxury leather goods handcrafted from premium cowhide leather. The top-grain leather used is sourced from the finest tanneries in the US and undergoes minimal treatment in order to preserve the natural properties and appearance of TORRO luxurious leathers. SHOCKPROOF – The folio case features a unique TORRO durable TPU frame that has been formulated to aid shock absorption, + +Price is $",45 +"How much does this cost to the nearest dollar? + +Universal Air Conditioner KT 1031 A/C Compressor and Component Kit +UAC A/C Compressor and Component Kit Brand New, OE replacement UAC branded Compressor Kit 100% Guaranteed Fit! Add your car (year/make/model) to Amazon's garage to confirm Premium ISO/TS 16949 quality; tested to meet or exceed OEM specifications Includes compressor & clutch, drier / accumulator, expansion device, 8oz bottle of PAG oil, seal kit; compressor may come charged with shipping oil to keep the part lubricated during transit - drain and replace according to your system's requirements Product is backed by industry leading warranty Manufacturer UAC, Brand UAC, Model KT 1031, Weight 17.9 pounds, Dimensions 17 x 16 x 12 + +Price is $",65 +"How much does this cost to the nearest dollar? + +Street Series Stainless Performance Cat-Back Exhaust system +Made in the USA and engineered to last, for those seeking increased performance and better economy, MagnaFlow MF Series Performance Exhaust systems deliver the smooth deep sound you want and the wide-open performance power you need. Our exhaust systems feature straight-through flow designs for the ultimate in unrestricted horsepower and torque for big power while maintaining exhaust efficiency. These systems are an engineered balance of interior and exterior noise levels and are tested against SAE j1169 standards. great quality and sound Manufacturer MagnaFlow, Brand MagnaFlow Exhaust Products, Model 17870, Weight 25 pounds, Dimensions 58.75 x 13.75 x 19 inches, model number 17870, Exterior Machined, Manufacturer Part 17870, Rank Automotive Automotive Replacement + +Price is $",260 +"How much does this cost to the nearest dollar? + +Lenovo IdeaPad 3 Laptop, FHD (1920 x 1080) AMD Ryzen 5 3500U 8GB DDR4 RAM, 256GB SSD, AMD Radeon Vega 8 Graphics Windows 10, Abyss Blue (Renewed) +14 FHD TN Anti-glare, Ryzen 5 3500U Mobile Processor - 3.80 GHz) 256GB SSD, 8GB DDR4 SDRAM 180 degree hinge, WiFi and Bluetooth 5.0 720p HD Webcam with Dolby Audio dual speakers, 4-in-1 Media Card Reader 2 x USB 3.1 | 1 x USB 2.0 | 1 x HDMI | headphones, Windows 10 in S mode Brand Lenovo, Model Name Lenovo Ide + +Price is $",360 +"How much does this cost to the nearest dollar? + +Access Bed Covers TonnoSport - Roll-Up Tonneau Cover - Compatible with Toyota Tundra 6ft. 6in. Bed (w/o Deck Rail) +Tonneau Cover TONNOSPORT Roll-Up Cover TONNOSPORT Roll-Up Cover; Roll-Up; Without Deck Rail;FEATURES Gives You A Sleek Low Profile Look Gives You A Sleek Low Profile Look Compatible With Bed Rails/Bed Caps/Tailgate Protector Compatible With Bed Rails/Bed Caps/Tailgate Protector Quick Clamp On Installation Quick Clamp On Installation Lockable/Protects Your Cargo Lockable/Protects Your Cargo Complete Bed Usage When Open Complete Bed Usage When Open No Need To Remove No Need To Remove Tailgate Stays Operational Tailgate Stays Operational 2 Year Warranty 2 Year Warranty + +Price is $",55 +"How much does this cost to the nearest dollar? + +G.I. JOE Hasbro 3 3/4 Wave 5 Action Figure SGT. Flash (Laser Rifle Trooper) +SGT. FLASH is highly skilled in many aspects of electronic technology and is capable of equipment repair in the field. His specialized education includes electronics school, chemical school, and covert electronics. He is a qualified expert with the M-16, and (shoulder laser rifle). Celebrate 25 years of the ultimate action team with this articulated action figure! Display your action figure on the included display base! Figure also comes with a weapon! Twenty-fifth anniversary action figure has detailed styling and comes with a weapon and a display base! Ages 5 and up. Dimensions 5.12 x 1.57 x 5.51 inches, Weight + +Price is $",29 +"How much does this cost to the nearest dollar? + +T&S Brass Double Pantry Faucet, Wall Mount, 8 Centers, 6 Swing Built in Stops +T&S Brass 8 Wall Mount Mixing Faucet, Eterna Cartridges, Lever Handles, 6 Swing Nozzle, Built-In Stops & 1/2 NPT Female Inlets. Package Dimensions 9 L x 4 H x 14 W (inches) Package Weight 5.11 pounds Country of Origin United States Part Number Brand T&S Brass, Mounting Type Wall Mount, Finish Type Polished, Color Brass, Handles 1, Included Components Nozzle, Instruction Manual, Handle Type Lever, Installation Type Single Hole, Dimensions LxWxH 13.3 x 8.8 x 3.7 inches, Handle Material Brass, + +Price is $",65 +"How much does this cost to the nearest dollar? + +ZTUOAUMA Fuel Injection Pump Compatible with Cummins Engine M11 N14 QSM11 ISM11 +Part Number Application Models Compatible with Cummins Diesel Engine M11 N14 QSM11 ISM11 Note Please verify the part number and the detailed parts on pumps between our pictures before buying Warranty Returnable for 6 Months and Changeable for 1 Year (return and change for free) Direct replacement with strict and full test in factory to ensure the long durable service life Brand ZTUOAUMA, Fit Type Vehicle Specific Fit, Vehicle Service Type Truck, Style Fashion, Auto Part Position Rear, Gas Type Diesel, Operation Mode Mechanical, Manufacturer zt truck parts, Weight 11.51 pounds, Dimensions 9.92 x 9.06 x 7.6 + +Price is $",250 +"How much does this cost to the nearest dollar? + +Hp Prime Graphing Calculator Ii +Hp Prime Graphing Calculator Ii IB Diploma Programme exam approved Sleek, slim, brushed metal design that looks great and performs even better. Keep the calculator protected when it's not in use with a slide-on cover Enjoy a feature-rich calculating experience with familiar HP alphanumeric keypad and a large diagonal, multi-touch display Lithium-Ion rechargeable battery, 256 MB flash memory Unique STEM ecosystem with HP Prime Graphing Calculator, HP Prime Wireless Kit1, and HP Connectivity Kit Dimensions 3.66 x 0.65 x 7.28 inches, Weight 8 ounces, model number Batteries 1 Lithium Ion batteries required., Rank Office Products 27247, Basic Office Calculators 79, Available July 10, 2019, + +Price is $",39 +"How much does this cost to the nearest dollar? + +Lowrance Nmea 2000 25' Extension Cable +Lowrance n2k extension cable Red plugs NMEA 2000 extension cable Mfg.# Lowrance connectors. Package Dimensions 10 L x 3 H x 5 W (inches) Country of Origin Mexico Part number For use with LGC 3000 and red NMEA network Dimensions L x W x H 9.92 x 4.25 x 3.23 inches, Weight 0.79 Pounds, Dimensions LxWxH 10 x 5 x 3 inches, Weight 0.32 Kilograms, Brand Name Lowrance, Model Name Color Red, s 1, Manufacturer Lowrance, Part Model Year 2015, Included Components Lowrance Nmea + +Price is $",35 +"How much does this cost to the nearest dollar? + +Jeep Genuine Accessories Hood Lock +Hood lock rivits on using existing holes. Self codes to the ignition key. Same as standard in Europe, meets Thatchem requirements. When you select a genuine OEM part you can rely on the high quality and effectiveness of the product and brand without having to guess if the product will work in sequence with your vehicle. Protecting your investment is important and choosing the right parts can be challenging. Stick with what you know and choose a genuine OEM part. Fits Wrangler Hood lock secures underhood items from theft Rivits into existing holes and automatically codes itself to the vehicle ignition key during installation Same as the production hood lock for European markets Manufacturer Jeep, Brand Jeep, Model Weight 3.7 pounds, Dimensions 8.2 x 7.8 + +Price is $",65 +"How much does this cost to the nearest dollar? + +GODOX CB-06 Hard Carrying Case with Wheels +Godox CB-06 Hard Carrying Case with Wheels Carrying/Transport Options Dual connecting straps Top handle Wheels Dimensions 94.0 x 34.0 x 25.0cm (37.01 x 13.39 x 9.84 ) Dimensions 41.25 x 16.25 x 12.5 inches, Weight 7.5 pounds, model number CB 06, Rank Tripod & Monopod Cases 13, Is Discontinued No, Available August 24, 2017, Manufacturer Godox, Language English, Brand GODOX, Color Black, Closure Type Zipper, Pattern Solid, Dimensions LxWxH 41.25 x 16.25 x + +Price is $",75 +"How much does this cost to the nearest dollar? + +Au-Tomotive Gold, INC. Ford Black Valet Key Chain +Milled alloy black finish with easy release spring-loaded key ring for valet parking. Laser cut engraved logo will never fade. Showing OEM style car logo on one side. It is about 4 long. Brand new Official licensed product. Milled alloy black finish with easy release spring-loaded key ring for valet parking. Laser cut engraved logo will never fade. Showing OEM style car logo on one side. It is about 4 long. Brand new Official licensed product. Manufacturer Au-Tomotive Gold, INC, Brand Au-Tomotive Gold, INC., Weight 1.44 ounces, Dimensions 4.3 x 2.1 x 0.6 inches, Manufacturer Part Rank Automotive Keychains 18749 + +Price is $",35 +"How much does this cost to the nearest dollar? + +Snailfly Black Roof Rack Rail + Cross Bar Fit for Honda All New CRV CR-V (4pcs +FITMENT Roof Rack Cross Bars Fit For Honda CRV CR-V 2017 2018 2019 2020 2021 2022 Please make sure the fitment before your purchase 2 PACKAGES Package 1# 2pcs Roof Racks Package 2# 2pcs Crossbars Necessary Mounting Hardware Like Bolts Are Included. SPECIFICS 100% Brand New Smooth surface Item exactly as the picture showed High Quality Aluminum Alloy Long lasting & durable finish, suitable for all weathers INSTALLATION Please contact us via message if you need installation insturctions. FEATURES Low profile streamline design,efficiently reduce wind resistance and noise. Greatly increase overall + +Price is $",125 +"How much does this cost to the nearest dollar? + +KING SHA Anti Glare LED Track Lighting Heads (50W Eqv.) Compatible with Halo Pack +Stable performance dimming capabilities that work seamlessly with universal dimmers, allowing you to adjust the brightness smoothly from 10% to 100% without any flickering. Anti-glare design to provide soft and eye-friendly lighting. The glare-free grid helps to reduce eye strain and protect your vision. Compatibility with H-type circuit track systems, making them suitable for a wide range of track lighting applications. GU10 base with a twist and turn type, which makes it easy to change bulbs. The 7W dimmable MR16 bulb with a high color rendering index of 90+ and a color temperature of 3000K (50W equivalent) provides bright and vibrant illumination. Adjustable + +Price is $",180 +"How much does this cost to the nearest dollar? + +APS Compatible with Chevy Silverado 1500 Main Upper Stainless Steel Black 8x6 Horizontal Billet Grille Insert +INSTALLATION This is Bolt Over/Overlay/Bolton (Drilling Not Required) 8x6 Horizontal Billet grille insert. OE grille shell remains on the car after installation. CUSTOM FIT Compatible with Chevy Silverado 1500 Not for Z71 SPECS Each grille made from premium Stainless Steel and customized to fit the Main Upper of your vehicle. All necessary hardware and instruction are included. Grille insert only, logo or emblem, frame or shell is NOT included. PERFECT DESIGN Each grille made from premium stainless steel with black powder coated surface that offers resistance to oxidization. This grille enhances the visual appearance of your car. SATISFACTION GUARANTEED + +Price is $",110 +"How much does this cost to the nearest dollar? + +Wilwood Engineering Brake Caliper +Wilwood's D52 Front Caliper Kit is a direct bolt-on 2 piston replacement for the factory original single calipers on many GM Passenger Vehicles and Trucks. Forged billet aluminum bodies, stainless steel pistons, and competition style high-temperature seals put an end to the rust, bore pitting, and seal failures that plague the OE caliper design. D52 calipers provide low-maintenance performance and a huge weight savings with high temperature reliability for the street and track. D52 calipers mount in the stock location over stock rotors, use the original style OE D52 brake pads and an OE banjo bolt brake line mounting. Calipers can be used with most wheels that clear the OE calipers. The front calipers with 2 + +Price is $",90 +"How much does this cost to the nearest dollar? + +ACDelco Gold Starter, Remanufactured (Renewed) +ACDelco’s Professional Remanufactured Starters are the high quality replacement ideal for many vehicles on the road today. ACDelco’s Professional Remanufactured Starters have new bronze sintered and oil-impregnated bushings. Solenoid contacts are new with copper terminals and plated hardware. Remanufacturing starters is an industry standard practice that involves disassembly of existing units, and replacing components that are most prone to wear with new components. Damaged and obsolete parts are replaced and are end of line tested to ensure they perform to ACDelco specifications. In addition, remanufacturing returns components back into service rather than processing as scrap or simply disposing of them. These starters will + +Price is $",110 +"How much does this cost to the nearest dollar? + +UWS Matte Black Heavy-Wall Aluminum Deep Angled Truck Tool Box with Low Profile, RigidCore Lid +UWS crossover truck tool boxes are the tried-and-true way of keeping your tools organized, on-hand and fully secure no matter where you and your truck roam. Each UWS tool box is built from extra-thick aluminum welded into a single-piece tub. This provides the tool box with reliable strength and helps keep the interior sealed off from the elements. Aluminum construction also makes the box highly resistant to corrosion for long-lasting use. To add even more strength to the crossover truck tool box, the lid features our patented RigidCore foam-filled design. Layered between two sheets of aluminum, this core greatly increases the structural integrity of the lid to prevent bending and warping and to ensure + +Price is $",50 +"How much does this cost to the nearest dollar? + +Dell Latitude E5440 14in Business Laptop Computer, Intel Core up to 8GB RAM, 256GB SSD, HDMI, DVDRW, WiFi plus BT, Windows 10 Professional (Renewed) +2018 Dell Latitude E5440 14 Business Laptop Computer, Intel Dual-Core up to 8GB RAM, 256GB SSD, HDMI, Bluetooth 4.0, WiFi Windows 10 Professional (CertifiedRefurbished) Operating System Microsoft Windows 10 Professional CPU Intel Core 1.9GHz up to 2.9GHz Screen 14 Memory 8 GB DDR3 Storage 256GB SSD Optical Drive DVD-Writer Graphics Card Intel HD Graphics 4400 Video Memory Shared memory Communication Gigabit LAN and WLAN CPU Type Intel Core i5 4 + +Price is $",350 +"How much does this cost to the nearest dollar? + +(Plug and Play) Spare Tire Brake Light Wheel Light Brake Light for Wrangler JK JKU Red Light +FITMENT Fit for JK JKU with all 16 to 20 inch rim diameter wheels, works with 5x5, 5x4.5, 5x5.5 inch lug patterns. Plug & Play Package comes with instructions including the video link of installing and wiring. Just plug to the 3rd brake light.Easy to install, just plug and play, no need to splice the existing brake light wires. No broken wire installation. You can install the third spare light in few minutes. Braking Function Obvious and fast braking warning signal, lights up the inside of your spare when step on the brake, more red brightness and stronger penetration, easy To Be + +Price is $",89 +"How much does this cost to the nearest dollar? + +The Ultimate Roadside Rescue Assistant +The Ultimate Roadside Rescue Assistant is the rechargeable power source, air compressor, emergency light and phone charger no driver should be without. It features a 140W inverter to power 110V household appliances, plus a car battery jump starter, 150 PSI air compressor and a 5 LED work light. Keep one in your home or vehicle for peace of mind. The Ultimate Roadside Rescue Assistant is the rechargeable power source, air compressor, emergency light and phone charger no driver should be without. It features a 140W inverter to power 110V household appliances, plus a car battery jump starter, 150 PSI air compressor and a 5 LED work light. Keep one in your home or vehicle for peace of mind. Manufacturer Rally Manufacturing + +Price is $",155 +"How much does this cost to the nearest dollar? + +Brand New 18 x 8.5 Replacement Wheel for Mercedes CLS500 CLS550 Rim 65371 +JWL/VIA Certifed Product ISO 9001 Certifed Product Replication Manufacturer WheelerShip, Brand Wheelership, Model OEM Replacement (Aftermarket), Weight 32.3 Pounds, Exterior Silver, Manufacturer Part Construction Rim Diameter 18 Inches, Rim Width 8.5 Inches, Bolt Pattern ( Holes) 5, Bolt Pattern (Pitch Circle Diameter) 112 Millimeters, Offset 28 Millimeters, Available April 24, 2014, Size 18 inch, Exterior Finish Silver, Wheel Size 18 Inches, Pitch Circle Diameter 112 Millimeters, Rim Size 18 Inches, Diameter 18 Inches, Vehicle Service Type Passenger Car + +Price is $",350 +"How much does this cost to the nearest dollar? + +Headlight Headlamp LH Left & RH Right Pair Set for Toyota Prius +For 10-11 Toyota Prius Headlight Headlamp Halogen LH & RH Pair Driver & Passenger Set DETAIL Assembly Type Composite Lens Color Clear Ballast Included No Manufacturer Part Number Mounting Hardware Included No Bulb Size Same as factory Bulb Type Halogen OE Number Bulbs Included No Certifications DOT,SAE Placement on Vehicle Left, Right Fitment Type Direct Replacement Headlight Style Factory Housing Color Chrome (Crystal) Fits Prius Headlight · 100% brand new and high quality · Fits both LH (Driver Side) & RH (Passenger Side) · Replaces dealer part numbers · Correct for models with Halogen Style Headlights · Do NOT fit models with HID (High Intensity Discharge + +Price is $",200 +"How much does this cost to the nearest dollar? + +Lilo And Stitch Deluxe Oversize Print Large 16 Backpack with Laptop Compartment - A19563 Multi-color +Send them off with awesome top quality and durable Backpack by KBNL! Our backpacks and accessories feature today's popular characters and designs. KBNL backpacks are as practical as it is stylish and include the following features Durable polyester exterior, Full interior lining, dual side pockets, front organizer pocket for additional accessory storage, Padded and adjustable shoulder straps, padded interior pocket which protects up to a laptop, Fully padded back panel - KBNL products are made with top quality material and workmanship. Front organizer pocket for additional accessory storage Padded and adjustable shoulder straps, fully padded back panel, padded interior pocket which protects up to a laptop Dimensions 5 x 12 x + +Price is $",29.99 +"How much does this cost to the nearest dollar? + +AC Compressor & A/C Clutch For Hyundai Accent 2006 2007 2008 2009 - BuyAutoParts NEW +Engineered for superior durability, backed by a one year, unlimited mileage warranty Guaranteed Exact Fit for easy installation 100% BRAND NEW, premium ISO/TS 16949 quality - no core deposit or return required! Make sure you flush the system thoroughly and replace the drier filter along with the compressor for better long-term reliability, or consider one of our AC kits that includes everything you need! Fits Hyundai Accent Manufacturer BuyAutoParts, Part Weight 16 Pounds, Dimensions 12 x 11 x 10 inches, Quantity 1, Rank Automotive Automotive Replacement Air Conditioning Compressors 9735, Available April 25, 2015, + +Price is $",160 +"How much does this cost to the nearest dollar? + +House Of Troy Pinnacle Collection Portable Halogen Wall Lamp, Antique Brass +From the Manufacturer The House of Troy Pinnacle Collection Portable Halogen Wall Lamp shows that hand-craftsmanship is a time honored tradition, as alive today as the land itself. In this tradition, House of Troy carefully crafts each light for you by hand, to the highest quality standards. This swing arm wall lamp will create a stunning presence in any room and works well with many styles of decor. Showcasing the classic lines of this lamp and cut from the highest quality solid brass, the has an antique brass finish achieved by coloring the solid brass with an application of acid oxide. The finish is then darkened and partially rubbed away, leaving dark highlights throughout. All oxidized finishes are protected with a finish coat of matte + +Price is $",40 +"How much does this cost to the nearest dollar? + +Juno T29 WH Floating Electrical Feed Single Circuit Track, 120 Volts, White +Floating Electrical Feed for Juno Single Circuit Track Permits mounting at any point of Juno single circuit track under the outlet box. Includes floating connector and outlet box cover plus extra track dead end.. Floating Electrical Feed For Juno 1 Circuit Track - White Lighting Rail. 1- Juno Floating Electrical Feed T29Wh For Juno Single Circuit Track Brand Name Juno Lighting Product Dimensions 6.0 X 3.0 X 3.0 Country Of Origin China Manufacturer Acuity Brands Lighting, Part Weight 4.2 ounces, Dimensions 6 x 3 x 3 inches, Country of Origin China, model number T29 WH, Color White, Style Voltage, Finish White, + +Price is $",60 +"How much does this cost to the nearest dollar? + +Sherman GO-PARTS - for Toyota Avalon Side View Mirror - Right (Passenger) Replacement 2014 2015 +Sherman Replacement Part Compatible with TOYOTA AVALON Right Mirror outside rear view (Partslink Number Sherman Replacement Part Compatible with TOYOTA AVALON Right Mirror outside rear view (Partslink Number Manufacturer Sherman, Brand Sherman, Model Weight 3.35 pounds, Dimensions 17.01 x 11.73 x 6.69 inches, model number Exterior Painted, Manufacturer Part ABPA Partslink Position Rear, Lift Type Manual, Rank Automotive Automotive Exterior Mirrors 21172, Available November 8, 2021, Auto Part Position Rear, Mounting Type Windshield Mount, Included Components Mirror, Operation Mode Manual, Shape Rect + +Price is $",80 +"How much does this cost to the nearest dollar? + +Roland RPU-3 Electronic Keyboard Pedal or Footswitch, 3 Pedal +Product Description Combining three pedals into one convenient and clutter-free unit, the Roland RPU-3 offers a real grand piano pedaling experience. With separate 1/4 outputs for each of its three pedals, the RPU-3 is compatible with keyboards such as Roland’s FP-90, FP-60, and pianos. In addition to providing the same pedal configuration as a grand piano, the RPU-3 also provides hands-free control of various instrument functions, such as selecting registrations or activating vocal effects on the FP-90. From the Manufacturer Combining three pedals into one convenient and clutter-free unit, the Roland RPU-3 offers a real grand piano pedaling experience. With + +Price is $",45 +"How much does this cost to the nearest dollar? + +Rockland VMI14 12,000 Pound 12 Volt DC Electric Integrated Vehicle Winch Kit with a Synthetic Rope and Remote Accessory for Jeep, Truck, and ATV Recovery +MULTI-PURPOSE WINCH Electric vehicle winch with a Hawse fairlead and synthetic rope provides car recovery in tough situations for trucks and SUVs CONVENIENT REMOTE OPERATION Wired remote controller power switch allows for retracting the rope for winching as desired DC MOTOR Series-wound motor stays cooler during longer pulls to increase continuous operation time HIGH-PERFORMANCE GEAR SYSTEM planetary gear system with free spooling provides a fast line speed with a fast line-out FEATURES AND SPECIFICATIONS Voltage detection and stall load protection capabilities flash red and blue LED lights to warn and alert you; Color Black; Dimensions (L + +Price is $",60 +"How much does this cost to the nearest dollar? + +Max Advanced Brakes Elite XDS Front Cross-Drill & Slots Rotors with Elite Max Brake Pads +Max Advanced Brakes Elite XDS FRONT brake kit is exceptional in every way to meet the demanding braking needs for multiple driving styles, road and weather conditions FRONT brake kit with Elite XDS brake rotors are finished with a special coating to prevent corrosion & rust and to protect against moisture and salt. Brake rotors are cross-drilled and slotted to dissipate heat and keep your brakes in perfect condition at all times. Elite Max brake pads and hardware clips included Max Advanced Brakes has been providing replacement brake kits, brake rotors and brake pads for over 10 years and we've always prioritized the safety and satisfaction of our customers. Our brakes are designed to be safe and durable + +Price is $",80 +"How much does this cost to the nearest dollar? + +Quality-Built 11030 Premium Quality Alternator +Quality-Built Alternators are remanufactured for a perfect fit. Housings are 100 percent blasted clean, all mounting threads inspected, re-tapped for easy installation and consistent torque. Terminals are of 100 percent OE-quality. High-temp insulators make connections secure and reliable. Quality-Built alternators are re-designed to operate with every turn of the key for reliable performance. Rotors are electronically tested and coated with high dielectric insulation to ensure maximum durability and charging performance. Bearings are inspected or new, with high-temperature grease for reduced heat and friction. Stators are electronically tested for maximum insulation quality and phase balance. Rectifiers are load tested to ensure alternator durability and charging performance. Brushes and springs are new + +Price is $",110 +"How much does this cost to the nearest dollar? + +Lucida LG-510 Student Classical Guitar, Full Size +The perfect guitar for any beginner, the Lucida Student LG-510 features Gotoh tuners for easy tuning, nylon strings for low string tension and a classic design available in multiple sizes. White Wood Top, Back and Sides Open Gear Gotoh Tuning Machines Nato Neck Hard Maple Fretboard Multi-Colored Rosette Weight 3.7 pounds, Dimensions 39 x 15 x 4 inches, model number Rank Musical Instruments Classical & Nylon-String Guitars 336, Is Discontinued No, Available May 3, 2010, Back Material White Wood, Body Material Wood, Color Name Multi-colored,White, Fretboard Material Maple Wood, String Material Nylon, Top Material White Wood, + +Price is $",160 +"How much does this cost to the nearest dollar? + +Longacre Aluminum Turn Plates +Longacre is an established brand name in the racing industry and is recognized for dedication to quality, innovation and customer satisfaction. Check out our comprehensive line of race scales, alignment tools, racing gauges and other products. Whether you are into stock, modified, drag, go kart, off-road, sprint or RC car racing, we'll provide you with the quality racing parts you deserve. The free floating in 2 directions eliminates bind It reads to 1/2° - Degrees can be zeroed with the car on The low profile design means that its only 1 tall Can also be used on top of scale pads Has a weight capacity of 1,500 lbs. per scale Manufacturer Longacre, Brand Longacre, Model Longacre Racing Products, Weight 31 + +Price is $",300 +"How much does this cost to the nearest dollar? + +Motion Pro Adjustable Torque Wrench Adapter +Torque any fastener with a combination wrench or Allen wrench. Will work with 6 millimeter (1/4 inch) through 19 millimeter (3/4 inch) combination wrenches and with 6 millimeter and 8 millimeter Allen wrenches. Torque any fastener with a combination wrench or Allen wrench Brand Motion Pro, Material Alloy Steel, Dimensions LxWxH 6 x 3.9 x 1.1 inches, Weight 0.5 Pounds, Quantity 1, Head Style Fixed Square, Hex, Finish Type Black Oxide Finish, Torque 90 Foot Pounds, Operation Mode Mechanical, Manufacturer Motion Pro, Model Dimensions 6 x 3.9 x 1.1 + +Price is $",120 +"How much does this cost to the nearest dollar? + +Glyph Thunderbolt 3 NVMe Dock (0 GB) +Perfect fit ultra slim case for iphone 6 plus. Lightweight, and easy access to all buttons Snap-on case Strong packaging to protect the goods from possible damage High quality and durable protection cover. Brand Glyph Production Technologies, model number Weight 1.89 Kilograms, Dimensions 9.4 x 3.8 x 0.9 inches, Dimensions LxWxH 9.4 x 3.8 x 0.9 inches, Color Black, Manufacturer Glyph Production Technologies, Is Discontinued No, Available June 6, 2015, Rank Computers & Accessories Laptop Docking Stations 2945, Hardware Interface USB, Thunderbolt, Compatible Devices iPhone 6 Plus, Total USB Ports 3, Dimensions L + +Price is $",100 +"How much does this cost to the nearest dollar? + +TOYO Open Country MT Performance Radial E/10 129P +TOYO Open Country MT Performance Radial E/10 129P Country of Origin Japan The Package Height of the Product is 11.8 inches The Package Length of the Product is 34.5 inches The Package Width of the Product is 34.5 inches Fit type Universal Fit Load capacity 4080 pounds Brand Toyo Tires, Size E/10, Rim Size 18 Inches, Section Width 295 Millimeters, Tire Aspect Ratio 70.0, Load Index 129, Speed Rating P, Load Capacity 4080 Pounds, Tread Depth 19.4 32nds, Tread Type Non-Directional, Tire Diameter 25, Weight 58 pounds, Manufacturer Toyo + +Price is $",300 +"How much does this cost to the nearest dollar? + +Razer Seiren X USB Streaming Microphone and Razer Kiyo Streaming Webcam +Bundle Contents 1x Kiyo Webcam, 1x Seiren X Microphone Super Cardioid Pickup Pattern Sound is recorded at a tighter angle, reducing unwanted background noise and providing crisp clear audio Designed for Streaming Supports video and audio recording in 720p 60 FPS / 1080p 30 FPS; Streamlabs certified and compatible with popular platforms like OBS and XSplit Convenient, Built In Lighting An attached, 5600K daylight balanced ring light around the camera keeps subjects evenly lit without the hassle of additional lighting equipment Brand Razer, Connectivity Technology USB, Color Black, Video Capture Resolution 1080p, 720p, Lens Type Zoom, Form Factor Compact + +Price is $",110 +"How much does this cost to the nearest dollar? + +Happy Birthday to Dad From Your Daughter Greeting Card - I've Always Known I Could Depend On Your Love and Support No Matter What +Greeting Card Includes Envelope Front From Your Daughter - Ever since I was a little girl, you've been such an important part of my life... I've always known I could depend on your love and support... no matter what. Inside On your birthday, if I could give you anything in return for all you've given me, it would have to be the love I always hold in my heart for you. Manufacturer Greeting Card, Brand Greeting Card, Weight 1.6 ounces, Dimensions 9 x 7 x 0.1 inches, Is Discontinued No, Pre-printed happy birthday, s 1, Manufacturer Part GC, Rank + +Price is $",6 +"How much does this cost to the nearest dollar? + +Little Tikes My Real Jam First Concert Set with Electric Guitar, Drum and Keyboard, 4 Play Modes, and Bluetooth Connectivity - for Kids Ages 3+ +The My Real Jam™ First Concert Set lets kids harness their inner musician. Four play modes—Play with the Band, Free Play; Solo Jam; Play Any Song with Bluetooth® —provides countless hours of musical fun. The realistically designed Electric Guitar, Drums and Keyboard are packed with features, while the packages double as reusable instrument cases, perfect for storing the instruments or for hitting the road as an aspiring musician. BECOME A SUPERSTAR – Lets kids jam their way to rock star status with a perfect combo of musical play and pretend play PLAY ANY SONG WITH BLUETOOTH - Sync with any Bluetooth enabled device to play along + +Price is $",19.99 +"How much does this cost to the nearest dollar? + +Studio M Peace and Harmony Art Pole Community Inspirational Outdoor Decorative Garden Post, Made in USA, 60 Inches Tall +Impactful. Beautiful. Unique. An Art Pole is an impactful way to bring beautiful artwork into any landscape. With a patented, state-of-the-art design and exceptional quality, it will be at the heart of your garden for years to come. Art Poles are easy to install - all hardware is included and no digging is necessary. Made in the USA from ultra-durable, maintenance free PVC, each Art Pole features vivid artwork with an expected 5-year fade-resistance (this will vary by regional climate and sun exposure). U.S. Patent No. U.S. Patent No. U.S. Patent No. Art Poles are created by the team of passionate people + +Price is $",110 +"How much does this cost to the nearest dollar? + +MyVolts 12V Power Supply Adaptor Compatible with/Replacement for HP Scanjet 3500C Scanner - US Plug with Extension and ON/Off Switch +Need to power your HP Scanner Scanjet 12V high-quality power adapter is compatible with the HP Scanjet 3500C Scanner.The plug fits a US 2-pin wall power socket.This power adaptor is designed to meet the power specification of the HP Scanjet 3500C Scanner - correct voltage, amperage and tip size.It meets and exceeds all US safety standard, features overvoltage, overcurrent and short circuit protection to protect your device, and is energy efficient.Also included in the Premium option is a handy in-line on / off switch, AND a 3 meter (10 feet) extension cable.Power + +Price is $",35 +"How much does this cost to the nearest dollar? + +Dell Latitude 7212 Rugged Extreme Tablet, 11.6 inch FHD Touch LCD, Intel Core 8GB Ram, 128GB SSD, WiFi, GPS, Windows 10 Professional (Renewed) +This Certified Refurbished product is tested and certified to look and work like new. The refurbishing process includes functionality testing, basic cleaning, inspection, and repackaging. The product ships with all relevant accessories, a minimum 90-day warranty, and may arrive in a generic box. Only select sellers who maintain a high performance bar may offer Certified Refurbished products on Amazon.com Intel Core 7th Generation Processor (Dual Core, 3MB + u-blox NEO-M8 GPS card FHD Outdoor-Readable Glove-Capable Touchscreen w/ Gor + +Price is $",220 +"How much does this cost to the nearest dollar? + +Covermates Contour Fit Car Cover - Light Weight Polyester, Weather Resistant, Elastic Hem, Vehicle Covers-Khaki +From freezing rain and snowstorms to harsh sunlight and bird droppings, your vehicle faces it all. Spring brings bouts of rain followed by showers of pollen, leaving your vehicle a yellow, sticky mess. Our WeatherTite Prime covers are made of 300D stock-dyed polyester designed for climates with moderate humidity, moderate sunlight, heavy wind gusts, and heavy rain and snowfall. WeatherTite Prime covers provide excellent protection from dirt, dust, pollen, rain, and anything else nature has to throw at it. Hidden grommets are placed along the bottom of the cover, allowing optional cable locks to keep the cover secure and safe. An extra + +Price is $",199 +"How much does this cost to the nearest dollar? + +Westin Black HDX Grille Guard fits Ram 2500 3500 (Excl. Power Wagon) +The HDX Grille Guard is the ultimate in extreme truck gear. Its a fully welded grille guard that features full wraparound wings made of heavy duty 2 diameter tube. Uprights are finished and protected with extra wide rubber that is 1/8 thick and 2 3/4 wide resulting in a solid clean look. The full punch plate grille protects the vehicle's grille area. HDX Grille Guards are available in stainless and black powder coat finish. PERFECT FIT Direct fit for Ram 2500 3500 (Excl. Power Wagon) 2 inch tube, full wrap around wings Mount kit and hardware included Full punch plate grille solid construction + +Price is $",160 +"How much does this cost to the nearest dollar? + +Fieldpiece JL2 Job Link Wireless App Transmitter Bluetooth +With the JL2 transmitter and the job link app, you can start running your jobs through your mobile device. Fill out inspection checklists, view live measurements, gather in-depth Diagnostics, and adjust systems to live data. All reports can be emailed to customers and office, as well as saved in the cloud for access at anytime. The JL2 transmitter receives measurements from any Fieldpiece Wireless manifold and the Fieldpiece Wireless dual in-duct Psychomotor (SDP2) via radio frequency for extra distance - up to 100' from instrument to phone. Then the JL2 transmitter converts all live measurements and data to Bluetooth connection with your mobile device. made in United States. Manufactured by Fieldpiece instruments Inc. Sman digital + +Price is $",99 +"How much does this cost to the nearest dollar? + +hansgrohe Talis S Modern Premium Easy Clean 1 9-inch Tall Bathroom Sink Faucet in Chrome, +Design With a range of models and styles, paired with the quality and design you expect from hansgrohe, dream bathrooms become a reality. Bath faucets by hansgrohe exude beautiful design with superior performance and durability. Pick your desired faucet, then browse the entire product suite for complementary accessories. German engineering ensures a lifetime of consistent and dependable operation. Maintenance Products that function perfectly are essential. To ensure that they do, every hansgrohe product 100% air tested in production. hansgrohe faucets feature a silicone aerator that optimizes water flow performance, resists mineral deposit build-up, and is designed to be easily wiped clean. Installation Can be installed in + +Price is $",60 +"How much does this cost to the nearest dollar? + +G-Technology G-SPEED eS PRO High-Performance Fail-Safe RAID Solution for HD/2K Production 8TB +Product Description High-Performance, Fail-Safe RAID Solutions for HD/2K Production The new G-SPEED eS PRO from G-Tech provides professional content creators better than Fibre-Channel performance for demanding post production applications at a fraction of the cost. The compact and whisper quiet G-SPEED eS PRO features mini-SAS connectivity to a high performance PCIe x8 IOP RAID controller that supports RAID levels or 6. A single G-SPEED eS PRO enclosure with four 7200 RPM, SATA II drives in RAID 0 mode supports multi-stream ProRes 422 HQ playback and a single-stream of uncompressed 10-bit HD. Two units + +Price is $",399 +"How much does this cost to the nearest dollar? + +DreamLine Shower Door, 56-60 W x 72 H, Chrome +The DreamLine Mirage-X frameless sliding shower or tub door is the epitome of simple elegance with a modern flair. The remarkably innovative headerless design creates an unobstructed and open view for your shower. The Mirage-X shower door will complete any bathroom space with a look of luxury and style. DreamLine exclusive ClearMax water repellant and stain resistant glass coating adds superior protection from stains and is nearly maintenance-free. IMPORTANT! All measurements should be taken only AFTER walls are finished (tile, back walls, etc. ) Model Size 56 - 60 in. W x 72 in. H; Walk-in Opening 22 to 26 in. Configuration consists of a Sliding Door and a Station + +Price is $",89 +"How much does this cost to the nearest dollar? + +Sanctuary Square Backplate Finish Oiled Rubbed Bronze, Size 1.25 H x 1.25 W x 0.06 D +Finish Oiled Rubbed Bronze, Size 1.25 H x 1.25 W x 0.06 D Features -Screw pack M4. -Base material Zinc alloy. -Lifetime warranty. -Sanctuary collection. Dimensions Size 1 H x 1 W x 0.06 D - Overall Height - Top to Bottom -1. Size 1 H x 1 W x 0.06 D - Overall Width - Side to Side -1. Size 1 H x 1 W x 0.06 D - Overall Product Weight -0.1 lbs. Size 1.25 H + +Price is $",35 +"How much does this cost to the nearest dollar? + +Pelican Protector 1750 Long Case - Multi-Purpose Hard Case with Foam - Tripod, Camera Equipment, Sportsmans Rifle Case, Electronics Gear, and More (Black) +Sensitive equipment needs protection, and since 1976 the answer has been the Pelican Protector Case. These cases are designed rugged, and travel the harshest environments on earth. Against the extreme cold of the arctic or the heat of battle, Pelican cases have survived. Made in the USA, these tough cases are designed with an automatic purge valve, that equalizes air pressure, a watertight silicone O-ring lid, over-molded rubber handles and stainless steel hardware. PREMIUM HARD CASE In use with camera and film professionals, military, law enforcement, and hunters worldwide as a rifle case. + +Price is $",55 +"How much does this cost to the nearest dollar? + +Brock Replacement Driver and Passenger Halogen Headlights Headlamps Compatible with +Meets all OE specifications, with DOT stamp Exact replacement for stock assembly New, clear lenses ensure full illumination and maximum safety Lens and housing included 1-Year Limited Warranty Manufacturer Brock, Brand Brock, Model Replacement Headlight Assemblies, Weight 16 pounds, Dimensions 23 x 23 x 15 inches, Country of Origin Taiwan, model number Is Discontinued No, Manufacturer Part OEM Part 3C0 941 006 AE, ABPA Partslink Position Rear, Front, Bulb Type Halogen, Special Features Waterproof, Rank Automotive Automotive Headlight Assemblies 16731, Available September 9, 2022, Specific Uses For Product Head Lights, Light Source Type Halogen, Vehicle Service Type Car + +Price is $",120 +"How much does this cost to the nearest dollar? + +Carlinkit Ai Box Mini, Android 11, Multimedia Video Magic GPS,Wireless Caplay & Wireless Android Auto, Only Support Car with OEM Wired CarPlay +Compatible models recommended Car Links Your Phone Over The Air, Applicable to cars of 2015 and above. Please check the listing page before purchasing. If yours is not in the list, please ask for help from carlinkit. Multiple Online service Real-time online Maps will guide you at any time, either by connecting to a mobile phone hotspot, or by inserting a SIM card. Both of these allow you to enjoy the convenience. It also supports voice assistants, adding a new way to free your hands. SIM & TF Card & Type-C slot Simple card slot design makes everything clear at a glance. Support NANO SIM card + +Price is $",100 +"How much does this cost to the nearest dollar? + +StarDot YouTube Live Stream Camera Bundle, Gray +is a standalone live streaming camera which is compatible with YouTube live streaming and Facebook Live. This camera has been thoroughly tested for continuous 24/7 live streaming. It will broadcast high-quality video directly to YouTube without assistance from computers, cellphones, or third-party servers. Copy and paste a YouTube stream name/key from your YouTube account to the camera configuration page, and you're streaming in less than a minute. Easy to set up - connect the camera to your network, and get the stream name/key from YouTube or Facebook Live. Place it in the camera’s web setup page to start streaming. No need to open up additional network Ports in your router or modem settings. Live stream resolutions include HD 1080P, HD 720P, + +Price is $",110 +"How much does this cost to the nearest dollar? + +Atomic Compatible MERV 8 Carrier Replacement Furnace Filter - 2 Pack +The Atomic is a compatible filter fits the Carrier FILCAB mechanical air cleaner and MPKA series. This media filter is a whole house filter which is attached to the HVAC system. It has a MERV 8 filter efficiency value, which indicates how efficient the particles that can be trapped by the filter. The higher the rating, the finer the filtration and the fewer the particles that pass through it. To further increase filtration, it has a pleated rather than a flat surface, thereby increasing the filtering surface area. This efficiently traps airborne particles as small as 3 microns. An additional benefit is that an air filter will also extend the life of your heating and cooling system by making it work more efficiently by preventing the + +Price is $",50 +"How much does this cost to the nearest dollar? + +Bandai Awakening of S. H. s.h.figuarts star wars / force Obi-Wan Kenobi +SH Figuarts Star Wars Obi-Wan Kenobi (EpisodeI) about 155mm ABS & PVC painted action figure bandai star wars japan awakens Theme Action,Star Wars, Brand STAR WARS, Material Polyvinyl Chloride, Occasion Birthday, Dimensions 8\ L x 6\ W x 8\ H, Cartoon Character Star Wars, Room Type Office, Living Room, Bedroom, Pieces 1, Assembly Required No, s 1, Collection Name Action Figure, Shape Novelty, Manufacturer Bandai, Quantity 1, Weight 4.6 ounces, model number Rank Toys & Games Action Figures 44598, Is Discontinued + +Price is $",15 +"How much does this cost to the nearest dollar? + +Fit System 62135G Passenger Side Towing Mirror for Silverado/Sierra, 2500, 3500, Textured Black, Arrow Signal, Dual Lens, 1st Design, (no Power fold/Side Reflector/BLIS), fold, Heated Power +Passenger Side Towing Mirror for Silverado/ Sierra 1500, 2500, 3500, 1st design. Textured black, LED Arrow Signal and dual lens. Without power fold, side reflector and blind spot detection system. Foldaway. Heated Power. Towing Mirror glass is power adjustable. Convex Lens. Towing Mirror glass has heating capability to clear ice, snow and fog. Manual folding for additional clearance. Towing Mirror has the ability to extend. + +Price is $",88 +"How much does this cost to the nearest dollar? + +Black Horse Black Aluminum Exceed Running Boards Compatible with GMC Terrain / Chevriolet Equinox +Black Horse Black Aluminum Exceed Running Boards compatible with GMC Terrain / Chevriolet Equinox Black Horse Off Road Aluminum Exceed Running Board - Features an all-black design with Chrome Trim/Compatible with GMC Chevrolet wide flat stepping surface Built with heavy-duty aluminum/Resistant to rust and corrosion for long-lasting use /All necessary hardware included Stripe design for a strong grip/Designed to look like a part of your vehicle/Easy installation, DIY instructions and all mounting hardware included Eases step in or out of vehicle// Manufacturer Black Horse Off Road, Brand Black Horse Off Road, Model Exceed Running Boards, Weight 40 Pounds, Dimensions 73 x 9 x 11 inches, Country of + +Price is $",70 +"How much does this cost to the nearest dollar? + +Dearsun Twinkle Star Color Night Light Plush Pillows Light up Night Stuffed Toys Perfect for Birthday (Orange) +% Polyester Polyester 100% Polyester Size 13.8 x 3.1 x 13.8 inch This is a good plush pillow to show your kids that what is a star and how to shine. Turn on the press on Star and the light will turn off in 15 minutes automatically. The star has multiple colors when lighted. Fill Material Polyester, Color Orange, Size 1 Count (Pack of 1), Brand DearSun, Shape Novelty, Special Feature Protable, Cover Material Polyester, Pattern Star, Age Range (Description) Child, s 3, Dimensions 13.8\ L x 13.8\ W, Care Instructions + +Price is $",30 +"How much does this cost to the nearest dollar? + +Pokemon - Gallade Spirit Link - XY Roaring Skies +In the Pokemon Trading Card Game, players build decks around their favorite Pokemon and then play against each other, sending their Pokemon into battle to prove who the best Pokemon Trainer is. Players can begin with theme decks - pre-constructed decks designed to cover the basics of the game. Then, they can augment their card collections with booster packs that provide more cards, letting players develop more diverse decks. With thousands of cards to choose from, the game is never the same twice. Card Name Gallade Spirit Link Card Type Trainer - Item Card Number 83/108 Artist 5ban Graphics Set Roaring Skies Card Text Your turn does not end if the Pokmon this card is attached to becomes M Gallade-EX. A single individual + +Price is $",39 +"How much does this cost to the nearest dollar? + +Ibanez GIO Series Classical Guitar - HH Infinity R - Black Night +Ibanez classical guitars take the guesswork out of finding an affordable, great-sounding classical guitar that's easy to fret and play. Whether you are looking for a traditional classical-sized instrument or a comfortable nylon-string beginner guitar, they are extremely well-constructed, affordable and have the pristine tonality and playability of much more expensive instruments. Ibanez builds guitars for players of all levels—from beginners to the most demanding masters of the instrument. Regardless of price, Ibanez always strives to offer the absolute best sound, style, and playability in its class. The Standard series incorporates all the staples the Ibanez brand is famous for, such as fast necks, floating terms, and high-oct + +Price is $",200 +"How much does this cost to the nearest dollar? + +Set 2 Heavy Duty 12 Ply Skid Steer Tire w/Rim Guard +Deep tread designed to resist gouging and cutting. Brand new, not retreads. Heavy duty 12 Ply rated with Rim Guard to protect your wheels, Durable tread pattern for super stability. 32.7 oval diameter, 12.3 section width, 23/32 tread depth, max load 6320 lb@80 psi Tire Specifications Tire Size Tire Size Brand SUPERGUIDER Brand SUPERGUIDER Tread Pattern SKS-1 Tread Pattern SKS-1 Ply Rated 12 Ply Rated 12 Tread Depth 0.72 Tread Depth 0.72 Rim Width 9.75 Rim Width 9.75 Max Load Max Load Please note fitment guide is for + +Price is $",400 +"How much does this cost to the nearest dollar? + +Hairpin Table Legs 28 Heavy Duty Hairpin Legs, (Set for 4 ) Heavy Duty Table Legs (Black) +★Hairpin Table Legs Whether you’re a professional carpenter or woodworking is your hobby, our metal furniture legs will give your project the support it needs! ★CREATE A CUSTOM UNIQUE GIFT - Using these hairpin legs to create a custom coffee table, end table, or night stand lets you put together a unique gift that will stand out above the rest. Your gift will be remembered, cherished, and used for years to come. ★Designed for Versatility With a sleek, mid-century modern look, our industrial table legs are ideal for desks, benches and any piece of furniture in between! Finished with the latest in powder coating technology, the legs are uniform and smooth to + +Price is $",50 +"How much does this cost to the nearest dollar? + +Marada Racing Seat with Adjustable Slide for Racing Wheel Simulator Stand Cockpit Adjustable Seat Back Breathable Fabric Black with Installed Parts +Adjustable The adjustment angle of the seat back is 60-135 degrees. By adjusting the handle you can easily adjust to the angle you want.Can be suitable for players of different sizes. Overall Height 34.2, Side Width 21.2, Knee Width 20.6, Seat Back Height 30.7, Shoulder Width 21 Material Cloth, not easy to dirty. The fabric is very breathable and Suitable for sedentary. The product is not easy to deform, protect your spine and cultivate good driving habits. Design The seat bottom adopts double lock slide rail design, which is very stable, high matching with our bracket, and easy to install Experience + +Price is $",299 +"How much does this cost to the nearest dollar? + +Remington Industries 24 AWG Gauge Stranded Hook Up Wire, 25 feet Length, White, 0.0201 Diameter, 300 Volts +Hook up wire is used in a variety of general-purpose electrical applications. Stranded copper wire provides good electrical connectivity while PVC insulation protects the wire against abrasion, chemicals, oils, and solvents. The wire conforms to UL and MIL-SPEC specifications, and provides excellent uniformity for easy processing, stripping, and terminating. Available in black, red, white, Blue, green & yellow. Voltage rating 300 volts Type Ul1007 stranded wire (7/32) Insulation pvc (0.016 inch Color white Color White, Brand Remington Industries, Material wire wound, Gauge 24.0, Voltage 300 + +Price is $",99 +"How much does this cost to the nearest dollar? + +Acer Ultrabook, Intel Core 4GB Memory, 320GB HDD and 20GB SSD, Windows 8 +The Acer Aspire S3 Ultrabook is catching lots of attention and now so will you with the Champagne color design. This ultra-thin 13.3 ultrabook is less than 3 lbs light and only 0.5 thin, yet it packs a powerful 2nd Gen Intel Core i3 Processor and is outfitted with Acer Green Instant On and Always Connect for instant response and continuous connectivity. The Acer Aspire S3 Ultrabook all the best new experiences in a ultra-aerodynamic design, transforming your mobile lifestyle! HD widescreen CineCrystal LED-backlit display. Screen Resolution 1366 x 768 Intel Core processor + +Price is $",299 +"How much does this cost to the nearest dollar? + +ICBEAMER 7 RGB LED Headlights Bulb Halo Angel Eye DOT Approved Phone APP Bluetooth Control for Jeep Wrangler +⭐Transform your Jeep Wrangler with ICBEAMER's RGB Multifunction Halo Angle Eye LED Headlights - control brightness, mode selection, and more with the ICBEAMER phone app. ⭐Upgrade your Jeep Wrangler with ICBEAMER's easy-to-install LED Headlamp Assembly - Plug & Play design, with built-in Canbus and H4/H13 Adapter included. ⭐Experience unbeatable visibility with ICBEAMER's 7 LED Headlight Bulbs - high and low beam output of 3600 LM and 1800 LM respectively, and water-proof IP67 for reliable performance in any weather. ⭐Perfect fit for + +Price is $",100 +"How much does this cost to the nearest dollar? + +R1 Concepts Front Rear Brakes and Rotors Kit |Front Rear Brake Pads| Brake Rotors and Pads| Ceramic Brake Pads and Rotors |fits Lexus IS250 +R1 Concepts Series brake rotors are great for those who want a medium performance upgrade over their factory brakes. Every rotor uses a iron grade of G3000 that provides great stability and braking power. All-in-One Complete Brake Kit Replacement eLine Series Front & Rear Brake Kit comes with (4) high performance brake rotors and (8) low-dust ceramic brake pads. High Performance Brake Rotors Made of G3000 grade cast iron with zinc finish for ultimate rust protection. Built with O.E.M specifications in mind, no modification required. Ultimate Stopping Power Precision-drilled holes and countersunk design + +Price is $",100 +"How much does this cost to the nearest dollar? + +Camplux 2.64 GPM Tankless, Outdoor Portable Gas Water Heater with Overheating Protection, Instant Propane Hot Water Heater for RV, Camping, Cabins, Barns, White +𝐂𝐨𝐦𝐩𝐚𝐜𝐭, 𝐋𝐢𝐠𝐡𝐭 𝐖𝐞𝐢𝐠𝐡𝐭 𝐏𝐨𝐫𝐭𝐚𝐛𝐥𝐞 𝐃𝐞𝐬𝐢𝐠𝐧 - 12.8 inches, lbs. Compact and portable design perfect for barns, cabins, outdoor instant + +Price is $",70 +"How much does this cost to the nearest dollar? + +KNOKLOCK 10 Pack 3.75 Kitchen Cabinet Handles Brushed Satin Nickel Cabinet Pulls Kitchen Cabinet Hardware Drawer Pulls for Dresser Cupboard Wardrobe +Material - The cabinet handles is made of zinc alloy, brushed satin nickel finish, more stable and durable, while making your cabinet more delicate and beautiful. Cabinet Pulls Dimensions - Hole Centers(CC) 3.75 Overall Length 4.9 Width 0.60 Projection 0.80 (22mm) Fits Most Cabinets - We offer 1 (25mm) and 1.77 (45mm) mounting screws to help you mount most furniture of different thicknesses, Machine Screws Metric Size M4 Versatile Appicatications - This brushed satin nickel cabinet handles is perfect for dressers, drawers, + +Price is $",60 +"How much does this cost to the nearest dollar? + +Valley Enterprises Yaesu USB FTDI CT-62 CAT Cable Length 10 Feet +Aftermarket Programming Cable Aftermarket Programming Cable FTDI USB Chipset FTDI USB Chipset TX and RX Led indicators TX and RX Led indicators Total Length 10 feet Total Length 10 feet No programming software included No programming software included For use with Yaesu This device requires an FTDI USB VCP Driver. Virtual COM port (VCP) drivers cause the USB device to appear as an additional COM port available to the PC. Application software can access the USB device in the same way as it would access a standard COM port. A link to download the free driver is included. Aftermarket Programming Cable FTDI USB chipset TX and RX Led indicators Total Length 10 Feet No + +Price is $",30 +"How much does this cost to the nearest dollar? + +G9 LED Light 100W replacement halogen bulbs equivalent g9 led bulbs AC110V 120V 130 voltage Bi-Pin Base Corn Base,Daylight White of 4) +Perfect G9 replacement(Daylight White 6000K) This G9 bulb is the same type as traditioanl g9 base replacement, producing confortable light Efficient Each this type of candelabra bulbs provides around 850lm, improving the brightness of your room/home Simple Installation G9 base. Installs into existing G9 base holder Applications Furniture lighting, office lighting, merchandise lighting, display lighting, interior light etc one year, free replacement if any not working during period, please send email to us directly. Brightness 102 pcs LED chip.Brightness than general LED G9 ALL + +Price is $",60 +"How much does this cost to the nearest dollar? + +ZCHAOZ 4 Lights Antique White Farmhouse Ceiling Light Fixture Flush Mount Chandelier Ceiling Lamp Modern Sputnik Light Fixtures Hanging for Dining Room Bedroom Living Room Kitchen Entryway Foyer +Light Source & Dimmable White flush mount ceiling light is compatible with various types of 4 x E26 base bulbs(max 60w per blub), options include incandescent, led, halogen, Edison bulb, cfl, etc(Bulbs are Not Included). This hanging light fixtures is dimmable if working with dimmable bulbs and compatible dimmer switch(Not Included Also). Handmade Distressed White ZCHAOZ white ceiling light fixtures ceiling mount is made from high quality iron material in handmade white finish coating with a sturdy cylinder structure design in the center extending + +Price is $",50 +"How much does this cost to the nearest dollar? + +Honeywell Honeywell VisionPro Heat/Cool Digital Thermostat, White +This Honeywell Digital Thermostat is the perfect upgrade to any home. Thermostat has RedLink Wireless Communication, Touch Screen and 7 day programmability. Stages up to 3 Heat / 2 Cool RedLINK wireless communication Precise temperature control (+/- 1° F) for reliable and consistent temperature Package weight of the Product 9.6 Ounces Brand Honeywell, Model Name Controller Type Android, Special Feature Programmable, Color White, Power Source Battery Powered, Weight 9.6 ounces, Voltage 24 Volts, Material Plastic, Shape Rectangular, Display Type 10 sq.in. LCD, Control Type Touch, s 1, Control Method Touch, Mounting Type Wall Mount, + +Price is $",60 +"How much does this cost to the nearest dollar? + +Patriot Exhaust 1-7/8 Clippster Exhaust Header for Big Block Chevrolet 67-81, Silver Ceramic Hi-Temperature Coating +Clippster style headers are perfect for grafting modern uni-body front clip suspensions to street rods, muscle cars and trucks. Clippster headers use longer primaries than tight tucks, yet shorter than full length headers. Collectors exit toward the rear of the engine compartment providing excellent ground clearance on slammed applications as well as clearing steering and suspension components. Mid length or clippster headers provide improved ground clearance for popular muscle cars and street rods Durable tubing Comes complete with gaskets, header bolts and collector reducers Available in three finishes Silver Ceramic Hi-Temperature Coating Popular metallic ceramic coating Limited one year warranty Manufacturer Patriot Exhaust, Brand Patriot + +Price is $",70 +"How much does this cost to the nearest dollar? + +Fitrite Autopart New Front Left Driver Side Fender For Nissan Altima, Made Of Steel +Product Name New Front Left Driver Side Fender For Nissan Altima, Made Of Steel Product Name New Front Left Driver Side Fender For Nissan Altima, Made Of Steel Condition New Condition New Warranty 1 Year Warranty 1 Year Fitment Type Vehicle Specific Fitment Type Vehicle Specific Condition New Placement on Vehicle Front, Left Driver Side Warranty 1 Year Fitment Type Vehicle Specific Parts Link No OEM Number Brand Fitrite Autoparts, Exterior Finish Primed, Material Alloy Steel, Dimensions LxWxH 45 x 35 x 11 inches, Weight 8.1 Pounds, Style Modern, Auto Part Position Front Left, Vehicle Service Type Car, Fit Type Vehicle Specific Fit + +Price is $",70 +"How much does this cost to the nearest dollar? + +Technical Precision Replacement for GE General Electric G.E Light Bulb +Replacement For GE GENERAL ELECTRIC G.E Light Bulb Unit per sale 1 Brand Technical Precision, Light Type CFL, Wattage 55.00, Bulb Base G8, Specific Uses For Product Lamp, Light Color Warm White, Unit Count 1 Count, Color Temperature 3000 Kelvin, s 1, Brightness 4000 Lumen, Shape Cd, Size 1 Count (Pack of 1), Connectivity Technology Normal bulb, Controller Type Push Button, Color Rendering Index 82, Manufacturer Technical Precision, Part Weight 7 ounces, Dimensions 11.57 x 9.45 x 1.89 inches, Is Discontinued No, Quantity 1, Rank Industrial & Scientific Compact Fluorescent Bul + +Price is $",50 +"How much does this cost to the nearest dollar? + +Covercraft Carhartt SeatSaver Front Row Custom Fit Seat Cover for Select Ford Models - Duck Weave (Gravel) +Carhartt SeatSaver seat covers from Covercraft are the solution to the problem of keeping the seats in your truck or SUV clean and protected from daily use and weekend adventures. Made from durable, duck-weave fabric, these custom-fit seat covers protect your seats from dirt, mud, grime, spills and more. Featuring Rain Defender technology, a durable water repellency finish is added to the fabric to make it highly water resistant. Combine these features with the custom fitment and classic Carhartt styling, you get seat covers that look great and protect your seats from whatever you throw at them. Classic Duck-Weave Carhartt fabric for durability Custom-made + +Price is $",99 +"How much does this cost to the nearest dollar? + +Sennheiser SD Pro 2 - Double-Sided Multi Connectivity Wireless Headset for Desk Phone & Softphone/PC Connection, Ultra Noise-Cancelling Microphone (Black) +With the SD Pro 2, you get everything you need in an office headset, wrapped in one unique product. The SD Pro 2 is a double-sided, premium wireless DECT headset for desk phone and PC/softphone with base station. It features Sennheiser Voice Clarity, ultra noise-cancelling microphone, and ActiveGard hearing protection technology. Choosing the right SD Pro 2 SD PRO 2 This headset is designed for business professionals who communicate with their desk phone and softphone/PC. SD PRO 2 ML This headset is designed for business professionals who communicate in desk phone and + +Price is $",80 +"How much does this cost to the nearest dollar? + +Hitachi Mass Air Flow Sensor +Hitachi’s Air Flow Sensors (MAFs) measure the amount and characteristics of air entering the engine. Hitachi uses precision elements and high-quality components for enhanced durability and accurate air flow measurements. Hitachi MAFs are 100% air flow tested for ideal performance and are calibrated for each application ensuring your vehicle meets the strict emission standards set by the manufacturer. Details such as a contaminant bypass port (when applicable) and protected circuitry providing a durable and reliable product approved by OE manufacturers makes Hitachi’s air flow sensors the premium choice. New orignial equipment part Restores original drivability characteristics Meets the OE performance and durability standards for this application Precision manufactured and assembled sensing elements for accurate air flow measurements Built in contaminant bypass port provides reliable operation + +Price is $",120 +"How much does this cost to the nearest dollar? + +AmScope LED Cordless Stereo Microscope w/Top & Bottom Light Illumination System and 36 specimens +This cordless LED binocular stereo microscope comes with two pairs of stereo objective lenses mounted in a rotating nosecone, sturdy all-metal pillar stand, and a versatile illumination system that provides both incident (top) lighting and transmitted (bottom) lighting. You can choose between incident illumination shining down onto the object or transmitted illumination through the frosted stage plate. The first is used for the observation of three-dimensional objects and the second for the observation of slides. It comes with a rechargeable illumination system capable of taking rechargeable AA batteries, and an AC adapter/charger. This microscope offers high resolution and good depth within a broad field of view. It gives sharp clear stereo images. Its 45 + +Price is $",300 +"How much does this cost to the nearest dollar? + +Front Left Driver Side Window Regulator - Compatible with Kia Optima +Front Left Window Regulator - Compatible with 2014 - 2015 Kia OptimaPosition Front LeftNote Includes Module PanelCompatible With or Fits Note w/ USA Built - 2014 - 2015 Kia Optima EX - 2014 - 2015 Kia Optima EX Luxury - 2014 - 2015 Kia Optima LX - 2014 - 2015 Kia Optima SXNote - 2014 - 2015 Kia Optima Limited - 2014 - 2015 Kia Optima SX Turbo - 2014 - 2015 Kia Optima SXL Turbo Includes Module Panel Compatible with or fits (Note w/ USA Built; 2014 - 2015 Kia Opt + +Price is $",45 +"How much does this cost to the nearest dollar? + +Premium Replica Hubcap Set, Fits Nissan Rogue Replacement Wheel Covers +This is a set of 4 Brand New replica Nissan hubcap. Fits 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015 Rogue. Silver-painted. This is a copy of a factory-original. Our Replica wheel covers are made of sturdy ABS plastic and feature a rich silver finish, just like the originals. They will look great on your vehicle for years to come. Brand New Condition Aftermarket replacement for Nissan part Fits Nissan Rogue 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015 model years. Mounts easily and securely to + +Price is $",66 +"How much does this cost to the nearest dollar? + +Excellerations Phonics Spelling Game for Kids and Classrooms Classroom Activity (12 Game Boards) (Item # PSG) +Excellerations Early Language, Phonics Spelling Game, Kids Educational Toy, Ages 3 Years and Up (Item # PSG) CLASSROOM ESSENTIAL This spelling game is perfect for those ages 3 and up to learn about how to sound out words EASY TO USE This activity comes equipped with a helpful activity guide for tricks to teach about phonics and words INTERACTIVE LEARNING This phonics spelling game will get students interacting with their learning while developing spelling and word association skills DURABLE DESIGN This spelling game is made with reusable game boards and durable foam letter tiles so that it can be used again and again ENHANCE CURRICULUM Build on + +Price is $",60 +"How much does this cost to the nearest dollar? + +RC4WD BigDog Dual Axle Scale Car/Truck Trailer Electric Car/Truck Option Parts +Key Features Hand made tube trailer chassis Billet aluminum wheels 1.55 Dirt grabber tires Steel leaf spring Working lift jack Steel tool box for battery and light switch Working lights Easy clip hitch mount Two steel ramps Ramp holders Whitebone inspired design Weight Length 22.63 Width 12.28 Height 5.31 Inside of the Deck Length 15.9in / 404mm Inside of the Deck Width 8.5in / 216mm Tail Plate Length 6.1in / 155mm Tail Plate Width 2.55in / 65mm Overview This is a 1/10 Car or Truck Hauler. Perfect for towing around your rig to + +Price is $",100 +"How much does this cost to the nearest dollar? + +Unknown Stage 2 Clutch Kit - Low Altitude +Amazing all-around performance gains. Easily adjustable for altitude, modifications and more. Superior weight profile offers better acceleration. Goldstar weights adjust easily with magnets. Custom angle cut helix. Comes with two washers to prevent spring bind and free up clutch movement for faster shifts. Includes primary and secondary springs.This item fits the following vehicle applications compatible with Polaris 600 Rush PRO-S with Polaris 600 Rush XCR with Polaris 600 SwitchBack Adventure with Polaris 600 SwitchBack Assault 144 with Polaris 600 SwitchBack PRO-S with Polaris 600 SwitchBack SP 144 with Polaris 600 SwitchBack XCR with Polaris 800 Rush PRO-S with Polaris 800 Rush PRO-S LE with Pol + +Price is $",99 +"How much does this cost to the nearest dollar? + +Dodge Ram 1500 Mopar 4X4 Emblem - +BRAND NEW AND MOPAR GENUINE 2007 2008 2009 2010 Dodge Ram 1500 2500 3500 4X4 Logo Emblem Decal Genuine MOPAR Part Number Oe Spec Or Performance/Custom OE Spec, Manufacturer Warranty 2 Year, Modified Item No Manufacturer Part Number Model Ram 1500 2500 3500, Brand Compatible with Mopar Returns Accepted Returns Accepted, Non-Domestic Product No, Make Compatible with Dodge Fitment Type Direct Replacement, Model Year Manufacturer Mopar, Brand Mopar, Weight 1 pounds, Dimensions 6 x 6 x 6 inches, model number Manufacturer Part Position Rear, Available + +Price is $",60 +"How much does this cost to the nearest dollar? + +Pro Comp Alloys Series 89 Wheel with Polished Finish (16x8 +Pro Comp Alloys are designed using State-Of-The-Art Low-Pressure-Casting Technology providing unsurpassed wheel strength, style and value. Pro Comp Alloy Wheels combine head turning style, light weight, durable finish in black, graphite, milled, chrome, polished and dual-tone finishes. Pro Comp Alloys allows for massive brake clearance for todays performance Jeeps, trucks and SUVs. 108 inches Bolt Pattern 6x5.5 inch Back Space 4.5 inch Size 16 inches X 8 inches, Brand Pro Comp Alloys, Wheel Size 16 Inches, Pitch Circle Diameter 139.7 Millimeters, Weight 26 Pounds, Diameter 16 Inches, Vehicle Service + +Price is $",300 +"How much does this cost to the nearest dollar? + +Detroit Axle - Front Rear Strut & Coil Spring Assembly Replacement for Toyota Camry 2.2L Models - 4pc Set +Kit Includes 1x Complete Front Strut & Coil Spring Assembly - Driver Side - 171956 1x Complete Front Strut & Coil Spring Assembly - Driver Side - 171956 1x Complete Front Strut & Coil Spring Assembly - Passenger Side 1x Complete Front Strut & Coil Spring Assembly - Passenger Side 1x Complete Rear Strut & Coil Spring Assembly - Driver Side - 171958 1x Complete Rear Strut & Coil Spring Assembly - Driver Side - 171958 1x Complete Rear Strut & Coil Spring Assembly - Passenger Side - 171957 1x Complete Rear Strut & Coil Spring Assembly + +Price is $",300 +"How much does this cost to the nearest dollar? + +ECCPP Rear Wheel Axle Replacement fit for for Honda Sportrax 2009 +This axle works on the following models 2002 for HondaSportrax for HondaSportrax for HondaSportrax for HondaSportrax for HondaSportrax for HondaSportrax for HondaSportrax for HondaSportrax for HondaSportrax for HondaSportrax for for for for Package including 1 piece of Rear Wheel Axle Fitment - for Honda Sportrax 2009 Length - 855 mm, brand new complete rear wheel axle OE quality - Produced in the same specifications and functions as OE. Refer OE number Premium Material - Long service life with high quality raw material and the complete polishing Installation - Replace directly with assembly, easy to install. Brand ECCPP, Weight 15.22 pounds, Dimensions 35 + +Price is $",300 +"How much does this cost to the nearest dollar? + +Dell Latitude E6520 Intel 8GB RAM 500GB HDD Win 10 Pro DVD-RW (Renewed) +Keep up with business wherever you are with the Latitude E6520 laptop. It is ideal for professionals looking for a stable and durable laptop that is light and easy to carry on the go. Specifications Processor Intel Quad Core up to 3.3 GHz Graphics Intel HD Integrated Graphics Memory 8G DDR3 Hard Drive 500G Webcam Webcam Operating System Windows 10 Pro 64 Bit Multi-Language. Ports Network connector USB 2.0 (4) – 1 USB/eSATA combo, Stereo headphone/Microphone combo jack, 1394, Docking Connector, VGA, HDMI. Warranty 1 full year Parts and Labor Warranty Included in the box Computer + +Price is $",310 +"How much does this cost to the nearest dollar? + +F FIERCE CYCLE 251pcs Black Universal Motorcycle Fairing Body Bolts Kit Fastener Clips M5 M6 Screws Nuts +Includes hardware for all fairing pieces including, front, mid, lowers, rear, windscreen, and more. Fitment for Honda, for Kawasaki, for Yamaha, for Suzuki This kit is a common size on most motorcycles but it remains the responsibility of the buyer to check the appropriate size fittings are ordered. Simply replace the existing stock fairing bolts with these bolts. Package Includes 5 x Bolt (M6 x 40mm), 20 x x 16mm), 20 x x 20mm), 40 x x 16mm), 8 x x 25mm), 17 x Self Tapping x 12mm + +Price is $",40 +"How much does this cost to the nearest dollar? + +Flash Furniture 4 Pk. HERCULES Series 880 lb. Capacity Black Plastic Stack Chair +When in need of a space-saving seating solution that is either permanent or temporary, stack chairs have been proven to be beneficial. Stack chairs are a popular choice for many businesses that include hotels, schools, restaurants, cafeterias, and offices. This industrial looking chair hits the mark on comfort. This chair features a carrying handle to easily transport. This versatile chair is ideal for both indoor and outdoor functions. With the ability to quickly store the chairs, it allows for the space to be used again for other purposes or when cleaning is needed. This heavy duty plastic stack chair is sturdy in construction to withstand regular use and frequent stacking. To make transporting even easier, equip yourself with the appropriate sized + +Price is $",60 +"How much does this cost to the nearest dollar? + +B&M 30287 Throttle Valve/Kickdown Cable, Silver/Black +This throttle valve / kickdown cable is adjustable, so it can accommodate most TH350 applications. It's handy as a straight replacement for an old OE model, and less expensive. If your project includes a transmission swap, you will appreciate the universal sizing during installation. Adjustable for a universal fit in 95% of all listed transmission applications Eases the installation process for transmission swaps Less expensive than OE models For use with TH350 transmissions Backed by the manufacturer with a 1 year limited warranty Brand B&M, Color Silver/Black, Pieces 1, Special Feature Easy to Install, Included Components Screw, Weight 8 ounces, Unit Count 1.0 Count, s 1, Manufacturer B&M, model + +Price is $",70 +"How much does this cost to the nearest dollar? + +Gates TCK226 PowerGrip Premium Timing Belt Component Kit +Gates is the world's leading manufacturer of timing belts and Timing Component Kits. We designed our kits for virtually every vehicle make and model so technicians can conduct complete system replacements and streamline parts sourcing. As the Original Equipment Manufacturer (OEM) for vehicle manufacturers globally, our Timing Component Kits are OE-equivalent or better in service life, quality, and performance. Total solution for any application TCK includes belts, idler(s), tensioners, tensioner springs, supporting hardware, detailed installation instructions and Technical Service Bulletins for troublesome applications Popular kits covering domestic and import (European and Asian) vehicle applications Designed for convenience, easy parts sourcing and reduced customer comebacks Manufacturer Gates, Brand Gates, Model PowerGrip Premium Timing Belt Component + +Price is $",60 +"How much does this cost to the nearest dollar? + +Monroe Shocks & Struts Quick-Strut 171491 Strut and Coil Spring Assembly +Featuring a vehicle-specific design, Monroe Quick-Strut strut assemblies are fit checked, ride tested and engineered to restore factory ride height and ride performance. Assembled in Paragould, AR, they include all required components in a single unit. QUICKER, SAFER, EASIER AND COMPLETE REPAIR -- Includes everything you need for strut replacement in a single, fully assembled unit with no need for a spring compressor RESTORES RIDE HEIGHT -- Precisely calibrated to meet the OE design, each application-specific coil spring type is engineered to restore ride height and support the vehicle's weight VEHICLE-SPECIFIC DESIGN -- Application-specific coil spring, mount and strut designs ensure optimized ride + +Price is $",70 +"How much does this cost to the nearest dollar? + +Feit Electric 35W EQ DM MR16 LED Light Bulb, 6 Bulbs +This Feit Electric equivalent traditional glass MR16 flood LED light bulb has a GU10 base. Featuring bright white and high 90+ CRI (color rendering index) rating this Enhance LED is our highest quality energy efficient light with bolder color rendering and enhanced contrast so people and objects appear more realistic and vibrant. This MR16 reflector produces a similar light output while using less energy than a standard incandescent light bulb. The dimmable light has an average life of 25000 hours / 22 years and is safe for indoor or outside use. Choose a dependable high quality 120 volt MR16 bulb for residential or commercial applications. Specifications 💡 Color temperature 3000K + +Price is $",70 +"How much does this cost to the nearest dollar? + +Yellow Jacket 2806 Contractor Extension Cord with Lighted End; 100 ft; 100 Ft +Product Description Yellow Jacket 2806 10/3 Heavy-Duty 15-Amp SJTW Contractor Extension Cord with Lighted End, Super flexibility in cold and hot weather. Power Lite power indicator lamp glows through the extra heavy, clear molded plug when the cord has power. Three times as abrasion resistant as standard vinyl, making these cords the toughest on the jobsite. Extra heavy, clear molded plugs are rugged, durable and oversized. Meets OSHA specifications, UL Listed. 10 Gauge. The Yellow Jacket (R) brand is a registered trademark of Coleman Cable Inc. From the Manufacturer Yellow Jacket 2806 10/3 Heavy-Duty 15-Amp SJ + +Price is $",99 +"How much does this cost to the nearest dollar? + +Garage-Pro Tailgate SET Compatible with Chevrolet Silverado 1500, Fits 2007 Chevrolet Silverado 1500 Classic, 1500 HD Classic, 2500 HD Classic, 3500 Classic Fleetside/Styleside +Manufactured from high quality materials Manufactured from high quality materials Easy to install; replaces old or damaged part Easy to install; replaces old or damaged part This is an OE replacement item This is an OE replacement item Garage-Pro is the most affordable brand for your old, worn-out, or damaged factory part! This premium quality replacement part is made to give your car, truck, and SUV that original factory look and performance. Available for different applications, our Garage-Pro part will surely fit right to your vehicle. Comes with 1-year unlimited mileage warranty! + +Price is $",80 +"How much does this cost to the nearest dollar? + +3M Perfect It Buffing and Polishing Kit | 36060 06094 06068 3M Rubbing Compound, Machine Polish, Ultrafine Polish | Buffing Compound, Car Polishing Kit | Bundled with Kangaroobands Microfiber Cloth +The 3M Perfect-It Paint Finishing System 3M Perfect-It EX AC Rubbing Compound is the best-performing rubbing compound for removing scratches and surface defects before polishing, even on the latest clear coats. Longer Working Time, Easier Cleanup Even in Extreme Conditions As part of a complete system for creating showroom-grade finishes, it is the ideal compound for the critical pre-polishing stage in collision repair. This fast-cutting, fine-finishing compound removes fine grade (P1200 or finer) sand scratches and + +Price is $",40 +"How much does this cost to the nearest dollar? + +Chinese Style Dollhouse Model DIY Miniature Furniture Kit Wooden Tea Shop Dolls House with LED Lights Accessories Hand Craft Puzzle Toy Birthday Gift +Feature This dollhouse makes a great craft project and gift for both friends and collectors! The pictures shows finished project. You receive are spare parts,Mainly through, paste, assembly, modeling, placement DIY craft, complete your lovely beautiful house.Glue and Battery are not included.Detailed pictures instructions. ( Just follow the pictures! )Description Assembly Difficulty Level Time 2-10 hoursFinished Size as picture showsWeight Approximate Include 1 x DollhouseNote 1.The real color of the item may be slightly different from the pictures shown on allow error due to the hand measurement. 3.Due to long shipping, the item may damage in transit, if + +Price is $",40 +"How much does this cost to the nearest dollar? + +Generic NRG Innovations Steering Wheel Short Hub Adapter Kit + LED Keychain Flashlight, black +NRG Innovation has developed another complement to our quick release steering kits. These units were designed specially for an aftermarket steering wheel installed with the quick release kit still mounts in the same location, not too close to the driver. Made from the highest quality aluminum. Our Short Hubs are made to work with our quick release's. This product is designed utilizing one piece solid construction for the maximum in durability and usability. Made of High Quality Aluminum Direct Bolt-on Design, Perfectly fits Any Wheel or Quick Release with a 6-Bolt X 74MM Pattern Anodized for Durability and Strength Racing Style, for Most Aftermarket Racing Brand Steering Wheel Manufacturer NRG Innovations, Brand NRG Innov + +Price is $",50 +"How much does this cost to the nearest dollar? + +Learning Resources Coding Critters Ranger & Zip,22 Piece Set, Ages 4+, Screen-Free Early Coding Toy for Kids, Interactive STEM Coding Pet, Gifts for Boys and Girls,Back to School Gifts +Meet the Coding critters your first coding friends. These playful puppy pets bring early STEM concepts to preschool learning through 100% screen-free coding. Kids code along with their new pets’ storybook adventure, and help the brave Ranger and mischievous zip have a playtime they'll never forget. Each storybook coding challenge unfolds in the Coding critters' Fun pet playset - can you code Ranger to play hide and seek, fetch a ball from the tennis ball launcher, or catch zip after a ride down the slide? In addition to following along with the storybook's coding challenges, + +Price is $",60 +"How much does this cost to the nearest dollar? + +Bosch Automotive 15463 Oxygen Sensor, OE Fitment (Mazda) +Premium Bosch oxygen sensors promise better quality, better overall OE Fit/Form/Function while ensuring better coverage against the competition. Premium Bosch oxygen sensors are designed to improve fuel economy. Vehicles utilizing Bosch premium oxygen sensors experience better engine performance. Premium Bosch oxygen sensors assist in cleaner exhaust emissions. Brand Bosch Automotive, Dimensions LxWxH 2.2 x 1.97 x 5.83 inches, Weight 0.25 Pounds, Style Modern, Mounting Type Flange Mount, Specific Uses For Product Oxygen Sensor, Manufacturer Bosch Automotive, Dimensions 2.2 x 1.97 x 5.83 inches, Country of Origin United Kingdom, model number 15463, Is Discontinued + +Price is $",70 +"How much does this cost to the nearest dollar? + +Case of 24-2 Inch Blue Painters Tape - 60 Yards/roll +Case of 24 rolls of painters tape bulk packed for easy use and access. Each roll is 1.88 inches by 60 yards of masking tape. Professional grade tape is flexible, leaves no sticky residue behind, prevents paint bleed, removes without damaging surface, and gives clean edges. Use for every kind of painting, trimming edging, masking. or protecting. Brightly colored tape works well with delicate and bold paint colors. Durable, strong tape sticks to a variety of clean / dry surfaces. Apply pressure when adhering tape for the cleanest lines as adhesive is pressure sensitive and heat activated. Ideal for use in temperatures from 40 to 130 degrees Fahrenheit. Made in the USA. + +Price is $",70 +"How much does this cost to the nearest dollar? + +MOCA Engine Water Pump & Fan Clutch fit 04-07 for Buick Rainier & 02-09 for Chevrolet Trailblazer & 02-09 for GMC Envoy & 02-06 Envoy XL & 04-05 Envoy XUV & 03-07 for Isuzu Ascender 4.2L +Please confirm this item fits for your vehicle before purchasing (Check Fitment Data Above or see description below) Package Includes 1 Water Pump, 1 Fan Thermostat Housing Assembly Part Numbers 33939, All the Components are produced under strictly observed and meet or exceed OEM performance requirements in Manufacturing and Material Local US friendly after-service team to resolve your issues in time, parts have 2 years or 40000 miles warranty Manufacturer OELINE Auto + +Price is $",110 +"How much does this cost to the nearest dollar? + +SAREMAS Foot Step Bars for Hyundai Palisade 2023 Running Boards Side Steps nerf bar Pedal Protector +The price for one pair(left and right running board) Don't drill,use the factory hole Main raw material high quality Aluminum&ABS ect Including brackets and mounting parts For Hyundai Palisade 2020 2021 2022 2023 Manufacturer Donarrw, Brand SAREMAS, Weight 32.3 pounds, Dimensions 81 x 11 x 8 inches, Exterior Aluminum, Manufacturer Part PATXTB, Rank Automotive Running Boards 6648, Available February 18, 2020, Material Aluminum, Acrylonitrile Butadiene Styrene (ABS), Exterior Finish Aluminum, Vehicle Service Type Passenger Car + +Price is $",100 +"How much does this cost to the nearest dollar? + +Gretsch G9210 Square Neck Boxcar Mahogany Resonator Acoustic Guitar +Classic Squareneck Resonator from Gretsch Neck; Padauk Fingerboard; and Hand-spun Cone - Mahogany Natural Acoustic Squareneck Resonator Guitar with Mahogany Top Sides Weight 10 pounds, Dimensions 20 x 7 x 48 inches, model number Rank Musical Instruments 50797, Acoustic Resonator Guitars 12, Is Discontinued No, Available February 5, 2018, Back Material Mahogany, Body Material Mahogany, Color Name Natural, Fretboard Material Padauk, String Material Phosphor Bronze, Top Material Mahogany Wood, Neck Material Type Mahogany, Strings 6, Brand Gretsch, Color Natural + +Price is $",350 +"How much does this cost to the nearest dollar? + +NikoMaku Mirror Dash Cam Front and Rear OEM Design Backup Camera for Cars 4K Resolution Type-C 11 Inch Full Touch Screen Rear View Mirror Camera 170° Wide Angle Dual Cameras Waterproof AS5 Pro +4K Resolution The AS5 Pro mirror dash cam delivers clear video with its 4K front camera and 1080P rear camera. Equipped with 170° wide-angle front lenses, this camera can capture high-quality footage day or night. The mirror dash cam records in real-time and boasts an enhanced imaging system for superior image quality. With its 4K resolution, every detail on the road is vividly displayed. OEM Look Design The supplied bracket allows for a complete replacement of your existing rear-view mirror. Say goodbye to shaky footage while driving, as the bracket effectively + +Price is $",360 +"How much does this cost to the nearest dollar? + +Fenix HP25R v2.0 USB-C Rechargeable Headlamp Bundle with Backup Battery, 1600 Lumen Spotlight, 400 lumens Floodlight and Red Light with LumenTac Organizer +HIGH-PERFORMANCE - The Fenix HP25R v2.0 headlamp emits up to 1600 lumen spotlight reaching 317 yards. You can also switch to a wide-angle floodlight, or an auxiliary red light to preserve the night vision. USB-C RECHARGEABLE - via its built-in charging port. Includes a high capacity battery. Runs up to 400 hours on the lowest mode. DESIGN FOR COMFORT -The HP25R v2.0 keeps the battery compartment in the rear to maintain a balanced weight. Also comes with cable clips + +Price is $",200 +"How much does this cost to the nearest dollar? + +R&L Racing Heavy Duty Roll-Up Soft Tonneau Cover Compatible with 94-02 Dodge Ram Regular/Club/Quad Cab 6.5' 78 Bed +R&L Racing Roll Up Tonneau Cover. Get effective bed protection, upgraded appearance, and even improved fuel economy, all at a budget price, with the R&L Racing Roll-Up Tonneau Cover. The vinyl cover will protect your truck bed and contents from the elements, and give your truck a smooth aerodynamic appearance that will even reduce drag for more miles per gallon. It features the quick and easy Clean-Seal closure system, lightweight aluminum rails and bows, and easy no-drill installation. Cargo in an unprotected pickup truck bed can become moisture damaged and corrode from exposure to rain and snow, + +Price is $",350 +"How much does this cost to the nearest dollar? + +Garmin GPSMAP 64sx, Handheld GPS with Altimeter and Compass, Preloaded With TopoActive Maps, Black/Tan +Navigate your next adventure with the GPSMAP 64sx handheld navigator series. Whether you’re hiking, cycling, geocaching or climbing, you are free to explore more with the reliable Garmin handheld navigation in the palm of your hand. And the series now has multi-GNSS support and Topo Active mapping. Rugged and water-resistant design with button operation and a 2. 6” sunlight-readable color display Preloaded with Topo Active maps (U. S. and Australia only) featuring routable roads and trails for cycling and hiking Know where you’re at with a high-sensitivity receiver with quad helix antenna and multi-GNSS support + +Price is $",200 +"How much does this cost to the nearest dollar? + +Brown 5-7/8 X 8-1/2 X 3/16 Thick Heavy Duty Felt Sheets - 12 Pcs +Protect your beautiful laminate, ceramic, vinyl or hardwood flooring as well as your precious furniture, with our Heavy Duty Felt Pads made of 100% polyester felt. These brown protector pads are designed to blend in with dark furniture to compliment your home decor. Simply peel and stick them to lamps, furniture and small appliances to protect tabletops, shelves, desks, floors and countertops. These can also be used to provide a cushioning layer between glass tabletops and pedestals or frames. Or place them on cabinet doors to reduce noise when they're closed. Trim them into the exact shape and size you need, and prevent scratches or damages anywhere + +Price is $",120 +"How much does this cost to the nearest dollar? + +GAOMON PD2200 Pen Display & 20 Pen Nibs 8192 Tilt-Support Full-Laminated Graphics Drawing Monitor Tablet for Digital Drawing/Animation/Online Teaching and Meeting +GAOMON PD2200 PEN DISPLAY + 20 PEN NIBS FOR ONLINE EDUCATION & MEETING You can use PD2200 pen monitor for online education and remote meeting. It works with most online meeting programs, like Zoom, and so on. FOR DIGITAL ART & CREATION -- It's not only for amatuer but also for professionalists for digital drawing, sketching, graphics design, 3D art work, animation, etc. FOR ANNOTATING AND SIGNATURE --It is also broadly used in annotating and signing files WITH AG-FILM PRE-APPLIED + +Price is $",80 +"How much does this cost to the nearest dollar? + +VXMOTOR for 97-03 Ford Lightduty 4WD for 99-03 F150 Lightduty F150 Super Crew Cab/04 F150 Heritage for 97-02 Expedition 4WD for 99-02 Expedition 2WD Matte Black Heavyduty Bull Bar +Application for Ford F150 / F250 Lightduty 4WD ( 4 Wheel Drive ) Models, for Ford F150 Lightduty 2WD ( 2 Wheel Drive ) Models, for Ford F150 Super Crew Cab Models, for 2004 Ford F150 Heritage Models, for Ford Expedition 4WD ( 4 Wheel Drive ) Models, for Ford Expedition 2WD ( 2 Wheel Drive ) Models Front Bumper Bull Bar Guard Heavy Duty Steel With Flat Black Fine + +Price is $",100 +"How much does this cost to the nearest dollar? + +HP EliteBook 2540p Intel Core X2 2GB 160GB DVD+/-RW 12.1'' Wi, Black (Refurbished) +Standing screen display size 12.1 Inches, Processor RAM 2 GB DDR3, Hard Drive 160 GB, Graphics Coprocessor Intel HD Graphics, Chipset Brand Intel, Card Description Integrated, Wireless Type Bluetooth, USB 2.0 Ports 3, Brand HP, Microsoft, Series HP EliteBook, model number Operating System Windows 8 1, Weight 3.97 Pounds, Dimensions 19 x 17 x 5 inches, Rear Webcam Resolution 1 MP, Processors 2, Computer Memory Type DDR3 SDRAM, Flash Memory Size 160 GB, Power Source Battery Powered, Available + +Price is $",199 +"How much does this cost to the nearest dollar? + +Green EPX Mixing Nozzles 3M 50ml Duo-Pak Adhesive Cartridges (Longer 4.5in, 1 1 & 2 1 ratios) +This is a of our Atlas Professional Green Screw-On Mixing Nozzles for the New 3M 1 1 and 2 1 ratio 50ml Duo-Pack Cartridge Design (also called a B-System design with a large gray screw-off cap). These are the longer mixing nozzles, which are preferred for most Urethane and many Epoxy adhesives that require more mixing elements to properly mix. They are also in the high-efficiency quadro style, which reduces wasted material by about 50% vs traditional helix nozzles. They reduce wasted + +Price is $",40 +"How much does this cost to the nearest dollar? + +Box Partners 6 1/4 x 3 1/8 13 Pt. Manila Shipping Tags - Pre-Wired +Box Partners G10083 6 1/4 x 3 1/8 13 Pt. Manila Shipping Tags - Pre-Wired 6 1/4 x 3 1/8 13 Pt. Manila Shipping Tags - Pre-Wired Dimensions L x W x H 1.5 x 1.5 x 1.5 inches, Weight 1 Pounds, Dimensions LxWxH 1 x 1 x 1 inches, Weight 1 Pounds, Brand Name Aviditi, Model Name Color Manila, Material Blend, Suggested Users unisex-adult, s 1, Manufacturer BOX Partners LLC + +Price is $",10 +"How much does this cost to the nearest dollar? + +Vixen Air 1/2 NPT Air Ride Suspension High Flow Electric Air Valves/Solenoids 250 PSI Four Corners with Fittings and Hoses +These eight powerful ½ NPT air valves with exceptional high flow deliver unparalleled performance for 12V vehicles. The unique design provides high pressure control at minimal power consumption. Valves support pressures of up to 250 PSI and are constructed with high quality brass to ensure continuous use through extreme conditions. Solenoid's DIN connector is water and dust resistant, a metal mounting bracket is included for each valve, and the air flow direction is clearly marked with an arrow to provide an easy trouble-free installation. Premium brass fittings, flow control valves, pressure switch, drain valve, hoses and cutter are included in this kit. ½ + +Price is $",80 +"How much does this cost to the nearest dollar? + +Smart Floor Lamp, Multicolors Scene DIY Torch Floor Lamp, 24W 2400LM Dimmable Tall Standing Lamp work with Alexa Google Home,Wifi Remote Control RGB Floor Lamp For Living Room +Smart Control💡Control this smart floor lamp using the Smart Life app or your voice with Amazon Alexa or Google Home. You can group multiple lamps together and control them individually or together. Choose from 16 million colors and 12 scenes to create the perfect lighting for any occasion. Note The lamp only works with 2.4GHz Wi-Fi networks. Adjustable Lighting💡 This floor lamp features a range of white color temperatures from 2700K to 6500K and single color RGBWW options. The lamp is also dimmable and uses high-quality LED chips with a C + +Price is $",99 +"How much does this cost to the nearest dollar? + +SOZG 324mm Wheelbase Body Shell RC Car Body Shell Super Hard Plastic Black with Screw for RC Vehicle, SOZGpuFdVe +Specification Item Type Body ShellProduct Material Rigid plasticWeight Approx. 1190g / BlackWheelbase Size For 1/10 RC for, for Axial List 1 Set x Body Shell (81 Bags x ScrewNote 1. Manual measurement, please allow 1‑3mm error, thank you!2. Due to the difference between different monitors, the picture may not reflect the actual color of the This car shell is suitable for 324mm wheelbase chassis, if it is installed on other chassis, the wheelbase needs to be adjusted to Shipped in bulk, assembled by the customer (the door cannot be opened). + +Price is $",30 +"How much does this cost to the nearest dollar? + +Mickey Thompson ET Street S/S Racing Radial Tire - +A D.O.T. approved street tire which provides excellent traction at the strip. Proven polyester-ply, steel belted, tubeless radial construction provides strength & durability for excellent ride control on the street Proven R2 compound provides quick and consistent traction at the STRIP with little burnout required Minimal tread void for excellent dry traction, strategically placed to aid in hydroplane resistance 18 popular sizes for 15- to wheel diameters DO NOT USE ON DYNO Brand Mickey Thompson, Seasons Year Round, Size Section Width 275 Millimeters, Ply Rating Polyester, Tire Diameter 25.9 inches, Weight 30.95 Pounds, Manufacturer Mickey Thompson, Model ET Street S/S, model number Is Discontinued No, Manufacturer + +Price is $",300 +"How much does this cost to the nearest dollar? + +Pirelli 106W XL RFT P0 +Product Type Vehicle Tire Package Dimensions 10.9 L X29.0 W X29.0 H Country Of Origin Mexico Package Weight Fit type Universal Fit Brand Pirelli, Seasons Year Round, Size Section Width 275 Millimeters, Load Capacity 2094 Pounds, Tread Depth 9 32nds, Tread Type Asymmetrical, Ply Rating XL, Tire Diameter 28.66, Weight 36 pounds, Manufacturer PIRELLI, Model P Zero PZ4 Run Flat, model number Is Discontinued No, Manufacturer Part OEM Part Special Features Run_flat, Construction Radial, UTQG Rank Automotive Passenger Car Performance Tires 722, Available August 5, 2017, Rim Size + +Price is $",350 +"How much does this cost to the nearest dollar? + +Torklift C3212 Rear Tie Down +Fits 11-14 Chevy/GMC 2500 / 3500 HD (Crew / Ext. Cab ONLY) with factory hitch 11-14 Chevy/GMC 2500 / 3500 HD (Crew / Ext. Cab ONLY) with factory hitch 11-13 Chevy/GMC 2500 / 3500 (Regular Cab ONLY) with factory hitch 2014 Chevy/GMC 1500 4wd (Crewcab) with factory hitch 2014 Chevy/GMC 2500 4wd (Crewcab) with factory hitch 2014 Chevy/GMC 2500 4wd (Regular Cab) with factory hitch 2014 Chevy/GMC 3500 4wd ( + +Price is $",200 +"How much does this cost to the nearest dollar? + +Cardone Remanufactured Ford Computer +CARDONE Remanufactured Electronic and Powertrain Control Modules are designed to meet or exceed O.E. performance. Reverse engineering provides insight into how and why the unit originally failed, allowing our engineers to identify and correct original design weaknesses. All critical components are re-soldered or replaced at our Philadelphia manufacturing plant, and each unit is 100% computer tested to ensure reliability. CARDONE is committed to getting your vehicle back to peak performance. On-car vehicle validation testing ensures product fits and functions properly OE components with high failure rates are 100% replaced All electronic modules are 100% tested to ensure they meet OE requirements for the application Advanced robotic equipment ensures precision made units and consistent high quality with every part Every unit is 100% tested to ensure + +Price is $",199 +"How much does this cost to the nearest dollar? + +Kidde AccessPoint 001798 Supra TouchPoint Lock +From the Manufacturer TouchPoint lock is designed to replace a standard cam lock in a variety of metal storage cabinets or enclosures sized with 5/16 inch square-hole cams. Solid die-cast body with a 10 digit changeable combination and a clutch mechanism to turn the cam. Can be mounted on top of the door surface or flush-mounted into the door. User changeable combination lock with push button combination is designed to replace a standard cam lock Door lock can easily change keyed cabinets to pushbutton locks; for use with items with 5/16 inch cams Combination lock features heavy-duty die-cast construction; great for metal cabinets and other enclosures Mounts flush into a door, or on top of a door surface; clutch + +Price is $",99 +"How much does this cost to the nearest dollar? + +3M Protecta Self Retracting Lifeline Rebel 6' (18M) Web Twin, Steel Rebar and Carabiner, Black/Red +Our Protecta personal self retracting lifelines (SRL’s) represent a major improvement in economy line SRL’s. Employers can economically replace simple lanyards with the versatility and added safety of a 6 ft. (1.8m) SRL. Protecta personal SRL’s are ergonomically designed for ease of use and are ideal for direct connection to most harnesses. The compact and lightweight design is barely noticeable on your back and stays out of the worker’s way. In addition, tension is always kept on the lifeline, which reduces dragging, snagging and trip falls. Whether your application requires + +Price is $",20 +"How much does this cost to the nearest dollar? + +Plantronics Wired Headset, Black, 7 x 5.4 x 2.2 inches +The next generation of our most popular over-the-head monaural headset. Completely re-imagined for the demands of the modern customer service center and office. Features soft ear cushions for all-day wearing comfort, metal joints that deliver durability and reliability and a flexible mic with visual and tactile positioning guides for precise positioning and clearer conversations. Frequency response - up to 6,800 Hz Dimensions 7 x 5.4 x 2.2 inches, Weight 4.8 Ounces, Manufacturer PLANTRONICS, INC., model number Rank Computer Headsets 370, Is Discontinued No, Available October 30, 2014, Units 1.0 Count + +Price is $",40 +"How much does this cost to the nearest dollar? + +Logitech K750 Wireless Solar Keyboard for Windows, 2.4GHz Wireless with USB Unifying Receiver, Ultra-Thin, Compatible with PC, Laptop - Black +Product Description Battery hassles are a thing of the past with the solar-powered Logitech Wireless Solar Keyboard K750. It charges itself whenever there's light, so you can say good-bye to batteries, power bricks and charging cables. With sleek lines and a thin profile, this stylish, streamlined keyboard adds style to your workspace. Combining the best of traditional keyboards, laptops and a Logitech-only concave key cap design, you'll enjoy faster, quieter, feel-good typing-hour after hour. Plus, you'll get Logitech Advanced 2.4 GHz wireless and the tiny Logitech Unifying receiver. From the Manufacturer + +Price is $",85 +"How much does this cost to the nearest dollar? + +Olympus PEN E-PL9 Body Only with 3-Inch LCD (Pearl White) +Introducing the PEN E PL9. It has everything to produce images you’ll be proud to share. There’s nothing to learn; just pick it up and let the on screen guides and built in settings make every shot perfect. Thanks to the powerful image stabilization system, you’ll easily shoot blur free stills and smooth 4K video, all handheld. With features like flip touchscreen, built in flash, Wi Fi and Bluetooth for easy sharing make the E PL9 your go to camera. 16 Megapixel live MOS sensor TruePic VIII Image Processor 3 180 Degree Flip down touch screen In body 3 axis image stabilization 4K video & still image capture from 4K + +Price is $",450 +"How much does this cost to the nearest dollar? + +Beck/Arnley Hub & Bearing Assembly +Since 1914, Beck/Arnley has focused on the customer, offering high quality parts that look and perform the same as the original part. This ideal has never changed. Today, Beck/Arnley is committed to being the premium supplier of high quality import parts within the automotive market. BeckArnley is an original equipment brand that partners with other manufacturers to supply the parts that cars were originally built with. This product is in a BeckArnley package, note that the part may have been manufactured by an independent BeckArnley supplier and the number on the part may differ from the number on the package. Quality construction Excellent materials Exacting tolerances Manufacturer Beck/Arnley, Brand Beck/Arnley, Weight 6.37 Pounds + +Price is $",120 +"How much does this cost to the nearest dollar? + +Eibach Pro-Kit Performance Springs Set Of 4 Compatible with Nissan Altima +Eibach production technology is recognized worldwide as leading its field, from our high-strength spring-steel alloys, our advanced CNC winding process, our high-quality corrosion protection and the legendary longevity of our components. High Performance Handling and Aggressive Good Looks. Each Spring Individually Tested Stop Quicker, Corner Faster and get Better MPG! Progressive Spring Design for Excellent Ride Quality. Manufacturer Eibach, Brand Eibach, Model Weight 24.8 pounds, Dimensions 24.7 x 14.7 x 7 inches, model number Exterior Machined, Manufacturer Part Rank Automotive Automotive Replacement Shocks 12221, Available December 9, 2019 + +Price is $",60 +"How much does this cost to the nearest dollar? + +LEGO DC Batman 1989 Batwing 76161 Displayable Model with a Buildable Vehicle and Collectible Figures Batman, The Joker – Mime Version and Lawrence The Boombox Goon, New 2021 (2,363 Pieces) +This is no kid’s toy. If you’re serious about BATMAN, comic book super heroes or making cool models, this LEGO DC BATMAN 1989 Batwing is for you! Recreate the authentic detail and gothic elegance of BATMAN’s iconic aircraft, the Batwing, with this LEGO brick build-and-display model. The impressive reproduction features realistic details, removable canopy, full interior, poseable flaps and a new special brick that will allow you to mount and display your model on your wall. There’s also a stand, nameplate + +Price is $",29 +"How much does this cost to the nearest dollar? + +Kingston Brass Restoration 4-Inch Centerset Lavatory Faucet with Porcelain Lever Handle, Brushed Nickel +Product Description Classic style. Two handle deck mount. 4 in. center set. Max 1.2 LPM water flow rate at 60 PSI. Integrated removable aerator. Drip-free ceramic cartridge system. Three hole sink application. 4.05 in. spout reach. 3 in. spout height. 4 in. center spread installation. 1/4 turn on and off water control mechanism. 1.05 in. spout clearance. Made from brass. Satin nickel finish. Made in Taiwan. From the Manufacturer Functional and Stylish Faucets Gives an Irresistible Beauty to the Bathroom. Design is Perfectly Co- + +Price is $",35 +"How much does this cost to the nearest dollar? + +Polk Vanishing Series 265-LS In-Wall 3-Way Loudspeaker, Dual 6.5 Dynamic Balance Drivers & 1 Ring-Radiator Tweeter, Polk PowerPort Technology, Rotating Cam System for Easy Installation +Enjoy extraordinary audio performance for your movies, music and TV shows with the Polk Vanishing Series 265-LS 3-Way Loudspeaker that disappears into your wall and yet delivers impactful, room filling sound. The in-wall speaker is equipped with dual 6.5 Dynamic Balance Drivers for clear, accurate mids and dynamic lows, and a 1 Ring-Radiator Tweeter for incredible imaging. With Polk's Patented PowerPort Bass Technology, the speaker adds deep, rumbling bass to your audio, while minimizing unwanted resonances. + +Price is $",200 +"How much does this cost to the nearest dollar? + +Spec-D Tuning LED Projector Headlights Glossy Black Housing Smoke Lens Compatible with Subaru Impreza Outback Sport, Subaru Impreza WRX Left + Right Pair Headlamps Assembly +✔️ All of Our Items are 100% Brand New In Original Packaging! You Will Never Receive a Used Item From Us! Comes in a Pair (Driver Side Left & Passenger Side Right Included) ✔️ DOT and SAE Compliant. Made by an ISO Certified Manufacturer using Materials that meet or Exceed OEM Requirements! ✔️ Direct Bolt On Replacement From Your Original Headlights! No Wiring or Modifications Needed! No Installation Instructions Included, Professional Installation is Highly Recommended! ✔️ Products Undergo Strict Quality Control to Ensure it is Waterproof (fully sealed with solid silicon) & Impact/UV Resistant + +Price is $",300 +"How much does this cost to the nearest dollar? + +RICHMOND & FINCH Airpod Pro Case, Green Leopard Full Protective Cover, Shockproof, Scratch Resistant, Wireless Charging Compatible Case for Airpods Pro +COMPATIBILITY This Richmond & Finch Airpod Pro Case is compatible with Airpods Pro Only PROTECTION Our Richmond & Finch Airpods Pro Case offers premium protection to your air pods pro with our shockproof protective cover, protecting your Airpod Pro from drops and knocks WIRELESS CHARGING The Richmond & Finch Airpods Pro Case is wireless charging compatible, so you can charge your Airpod Pros easily and quickly SCRATCH RESISTANT Our Richmond & Finch Airpod Pro Protective Cover is made from high quality scratch resistant materials, ensuring your Air Pods Pro are safe from any scratches or damage FASHION FORWARD All + +Price is $",30 +"How much does this cost to the nearest dollar? + +LFA Industries - mm Capacity, 33 Jacobs Taper Mount Plain Bearing Precision Crafted Heavy Duty All Steel, Keyed Drill Chuck with T5/k32 Chuck Key Included +LFA Industries Plain Bearing Precision Crafted Heavy Duty All Steel, Keyed Drill Chuck with T5/k32 Chuck Key Included, mm Capacity, 33 Jacobs Taper Mount. LFA Industries Plain Bearing Precision Crafted Heavy Duty Keyed Drill Chuck All Steel, Keyed Drill Chuck with Key Included LFA Industries mm Capacity, 33 Jacobs Taper Mount Manufactured To Last-For Quality and Excellence-Chucknology Made in France over 85 Years Manufacturer LFA Industries, Part Weight 2 pounds, Dimensions 2.88 x 1.67 x 2.88 inches, Country of Origin France + +Price is $",40 +"How much does this cost to the nearest dollar? + +SAUTVS LED Headlight Assembly for Slingshot, Center Head Light Kit for Polaris Slingshot S GT R LE SL Modified Accessories, Replace OEM +Compatible with Polaris Slingshot S SL SLR R LE (Please refer to the compatible list in description) Plug & Play, perfect and accurate replacement for the original headlight without any changing or modifying, replace OEM The design of internal protection mechanism makes it no flickering or failure; IP67 waterproof and scratch resistant materials prevent from dust, mud, snow or heavy rain leaking in; Strictly follow the quality and safety standards, working in all the weather conditions High quality LED beads are used, long service time and life span; Brighter and concentrated light source, ensuring your driving safety Package include 1 set LED headlight assembly + +Price is $",100 +"How much does this cost to the nearest dollar? + +2 Pack Combo Womens Safety Glasses Impact Resistant Clear Smoke Lens +Package Includes 2 pairs of Womens Safety Glasses with Clear Lenses and Black Sunglasses Lens Assorted Color Temple Frames Available! Sizing Information Frame length – 6.25 in, Frame Width 5.4 in. Exceeds ANSI Z87.1+ Safety Standards. Shatter Proof Protection Our lenses offer 100% protection against glare and protection against UV/UVA/UVB rays. The Safety Glasses are also scratch-resistant, impact-resistant, and shatter proof. Keep your eyes safe during construction, metalworking, welding, woodworking, hunting, fishing, sports, shooting, and other activities outdoors. Impact Resistant Coating coating Package Includes 2 pairs of Womens Safety Glasses with Clear Lenses and Black Sunglasses + +Price is $",40 +"How much does this cost to the nearest dollar? + +Arepa - Venezuelan cuisine - Venezuela PopSockets PopGrip Swappable Grip for Phones & Tablets +Arepa Venezolana. Arepa - Venezuelan cuisine - Venezuela. Arepa - Venezuelan cuisine - Venezuela. Great gift for holidays, birthdays, events, parties and much more. Arepa - Venezuelan cuisine - Venezuela Great gift for holidays, birthdays, events, parties and much more. PopGrip with swappable top; switch out your PopTop for another design or remove it completely for wireless charging capabilities. (Not compatible with Apple MagSafe wireless charger or MagSafe wallet.) Expandable stand to watch videos, take group photos, FaceTime, and Skype handsfree. Advanced adhesive allows you to remove and reposition on most devices and cases. Note Will not stick to some silicone, waterproof + +Price is $",19 +"How much does this cost to the nearest dollar? + +Schlage Lock Company Padlock, 1-1/2 x 5/16, Brass +Schlage Commercial Padlock 5/16 Diameter with 1-1/2 Shackle and Keyway Schlage commercial grade padlock is designed for use in high risk locations Solid brass body resists corrosion for all-weather performance 1-1/2 in. x 5/16 in. molybdenum hardened steel shackle for increased cut resistance Double deadbolt locking mechanism provides extra security Re-key able Schlage cylinder Brand SCHLAGE, Special Feature Keyway, Lock Type Key Lock, Dimensions LxWxH 0.4 x 1.5 x 2.5 inches, Material Brass, Steel, Recommended Uses For Product Security, Color Brass + +Price is $",40 +"How much does this cost to the nearest dollar? + +Techni Mobili White Sit to Stand Mobile Laptop Computer Stand with Height Adjustable and Tiltable Tabletop +Techni Mobili Sit-to-Stand Rolling Laptop Stand offers an adjustable height mechanism that is compact, portable and is a perfect choice for a laptop or writing setup in a limited space. This Sit-to-Stand mobile laptop stand features a large tabletop with a tilt mechanism attached so it can be adjusted to your most comfortable working angle. It also features a safety edge-stopper to prevent objects from sliding down when tilted. The heavy-duty steel frame supports a sturdy structure, and the non-marking locking casters let you glide while maintaining the balanced level. 𝐒𝐔𝐑𝐅𝐀𝐂𝐄 𝐌𝐀� + +Price is $",40 +"How much does this cost to the nearest dollar? + +Special Lite Products Contemporary Wall Mounted Mailbox with Rain Overhang Finish Oil Rubbed Bronze +The clean lines and minimal design of the Contemporary Horizontal Mailbox provide an immediate way to add a lovely and welcoming outdoor accent to your front porch. The straightforward design makes this mailbox a perfect match with any home while upgrading your entry way at the same time. The durable powder coat finish will keep your mailbox looking vibrant and beautiful for years to come while the door closure will protect your mail from rainy weather keeping it dry inside. All types and sizes of magazines, letters, envelopes can fit easily inside the enclosure. Matching newspaper scroll arms are included and can be easily attached at your choosing. Deliberate but stylish, the Contemporary delivers at all angles. One of our best sellers! All screws, hinges, and like + +Price is $",110 +"How much does this cost to the nearest dollar? + +Tascam Digital Portastudio Multi-Track Audio Recorder & Tascam RC3F 3-Way Footswitch +Tascam Digital Portastudio Multi-Track Audio RecorderTascam Digital Portastudio Multi-Track Audio RecordeTascam RC3F 3-Way FootswitchThe RC-3F is a 3-way footswitch for the GB-10, LR-10, DP-03 and other TASCAM recorders and players. The 1/8 mini jack plugs into the remote jack of these TASCAM products to add features like play/pause, looping or punch in. See the products' user manual for details. Product 1 Eighteen track faders and one master fader allows instant access to any track without selecting pages + +Price is $",399 +"How much does this cost to the nearest dollar? + +Glow Lighting Vista Crystal Flush Mount, 6 W +Create that beach feeling with this capiz shell and chrome pendant chandelier. Ideal for bedrooms, kitchens, dining rooms and bathrooms. Uses 3 x 40 Candelabra base bulbs Trimmed with clear crystal Easy installation hardware, instructions included for convenient setup CSA/CUS approved for dry location Manufacturer Glow Crystal Lighting Inc., Part Weight 2.69 pounds, Dimensions 10 x 10 x 8.5 inches, Country of Origin Canada, model number Is Discontinued No, Size 6\ W, Color 8.5, Power Source Corded Electric, Voltage 120 Volts, Quantity 1, Type of Bulb incandescent, Mounting Type Ceiling Mount, Plug Format A- US style, Certification CSA + +Price is $",166 +"How much does this cost to the nearest dollar? + +Z3 Wind Deflector, Smoke Tint, Lexan, Windscreen, Windstop, Windblocker +- Easy installation, installs in less than two minutes. - Take long trips with the top-down in comfort. - Cruise at night without freezing from cold drafts. - Hear the full richness and clarity of your stereo. Reduce turbulence up to 70%; prevents unrelenting wind buffeting and driver fatigue Unique no reflection or glare, easy to use at night against headlights; Unlike others, no abrasion, does not induce long term wear 30 day trial period and lifetime warranty; No rattles or squeaks, is silent; Keep hair in place while driving with the top-down. Talk clearly on your blue tooth device; Talk with passengers without strain; Enjoy conversations while driving with the + +Price is $",99 +"How much does this cost to the nearest dollar? + +Olympus E-20 5MP Digital Camera w/ 4x Optical Zoom +Product description 5.2 megapixel sensor creates 2,560 x 1,920 images for prints at 11 x 14 and beyond 4x optical zoom lens with autofocus Included 32 MB SmartMedia card holds 7 images at default resolution Compatible with SmartMedia and Type I and II CompactFlash Uses Amazon.com You'd be hard-pressed to find a digital camera that captures better images than those from the Olympus E-20N. The camera pairs a sensor with a high-quality custom-designed 4x zoom lens for photos with clarity that rivals film. First, a note about naming conventions this camera is also known as the E-20 and the E-20P. The N + +Price is $",220 +"How much does this cost to the nearest dollar? + +PHYNEDI 1 1000 World Trade Center Bricks Model Compatible with Lego, MOC DIY Creative Large Architecture Collection Challenge Building Toy, (4,870 Pieces) +The building instructions of this model are two PDF guides (Part 1 has 135 pages, Part 2 has 155 pages),. Part 1 also includes a four page introduction about the World Trade Center history and design. World Trade Center features Scale 1 in inches 13,2 x 10,7 (base area), 22,7 (height)Size in centimeters 33,6 x 27,2 (base area), 57,5 (height)Size in studs 42 x 34 (base area), 71,9 (height)Style ArchitectureYear 2022 Package + +Price is $",50 +"How much does this cost to the nearest dollar? + +YANGHUAN Unstable Unicorns Adventure Card Game Toy Expansion Pack-Teen Board Game-Adult Strategy is Designed to add to The Base Unstable Unicorn Solitaire Expansion Pack +Product Description Product Name Card Game Single piece size 15 x 10.5 x 5cm Single piece weight 350g Expansion package parameters Single piece size Single product weight 105 grams Material coated paper Color Unstable Unicorns white frame, Unstable Unicorns black frame, NSFW extension, Legenda extension, Rainbow extension, Dragons extension, Uncut extension Ability training emotion, intellectual development, brain use, other ability training, interactive toys, parent-child communication, interest development Suitable age 14 years old and above Game type Unstable Unicorns is still a strategy game, it will destroy your + +Price is $",40 +"How much does this cost to the nearest dollar? + +Interlogix NetworX Touch Screen Keypad, 3.5 Color Touch Screen, Icon-based Graphic Interface, Built-in Message Board, NetworX System Compatibility, Capability, Modern Design +Interlogix NetworX Touch Screen Keypad, 3.5 Color Touch Screen, Icon-based Graphic Interface, Built-in Message Board, NetworX System Compatibility, Capability, Modern Design Ideal for almost any size application, the NetworX Touch Screen Keypad offers powerful yet simple control of any NetworX security system. An intuitive interface, 3.5 touch screen and Quick Keys for rapid system arming and status updates enable quick and easy system management Users can record their own names for different system components and leave voice messages for others when arming or disarming. When a Net + +Price is $",110 +"How much does this cost to the nearest dollar? + +Steering Damper,Universal Motorcycle Handlebar Aluminum Alloy Steering Damper Stabilizer Safety Control(Gold) +Features 1. Durable in Use Made of durable aluminum alloy for extreme strength. 2. Excellent Quality Professional manufacturing, high precision and good quality. 3. Easy and Simple to Hand Easy installation without any modification required. 4. Stable Quality The anodized surface for enhance its oxidizing and corrosion resistance. 5. Scope of Application Universal for motorcycle, high-emissions car, sports car, street car. Specification Condition 100% Brand New Material Aluminum alloy (CNC) Color Black/Gold/Red/Silver/Blue(optional) Mounting screw Fitment Universal for motorcycle, high-emissions car, sports car, street car. Package List 1 * Dam + +Price is $",120 +"How much does this cost to the nearest dollar? + +Amprobe TIC 410A Hot Stick Attachment +Amprobe products range from an extensive line of clamp meters and digital multimeters to industry-specific tools for residential/commercial electricians, HVAC/R technicians, utilities and industrial maintenance professionals. All Amprobe tools undergo rigorous testing to ensure full compliance with the latest IEC and CE safety regulations in Fluke Safety labs for quality and safety you can trust. Extension probe attaches to Amprobe TIC 300 Pro AC voltage detector to test for high AC voltages without touching or disconnecting the circuit Can detect AC voltages between 1,500V and For utility, industrial, and mining applications when working with high-voltage equipment such as transmission lines, downed power lines, fuses, and load-break connectors Extends to 57 long Conforms + +Price is $",100 +"How much does this cost to the nearest dollar? + +MyCableMart 3.5mm Plug/Jack, 4 Conductor TRRS, Self Solder, Male +Connects stereo audio & microphone devices requiring 4 conductors (left and right audio and microphone plus ground). This connector MAY also be suitable for left/right audio 1 video (composite) and ground. Great for making your own 3.5mm 4 conductor Cables or for repairing existing cables. Wire terminals are attached using solder (not included).Features 3.5mm 4 conductor (3 band) plug 3.5mm 4 conductor (3 band) plug Nickel Plated Nickel Plated Strain relief Strain relief Outer Dimensions (at PVC outer molding) Outer Dimensions (at PVC outer molding) Outer Dimensions (with PVC outer molding + +Price is $",25 +"How much does this cost to the nearest dollar? + +OtterBox + Pop Symmetry Series Case for iPhone 11 Pro (ONLY) - Retail Packaging - White Marble +OtterBox + Pop Symmetry Series Case for iPhone 11 Pro (ONLY) - Retail Packaging - White Marble Compatible with iPhone 11 Pro Thin one-piece case with durable protection against drops, bumps and fumbles that is also compatible with Qi wireless charging PopSockets PopGrip is integrated into case to help with holding, texting, snapping better pictures and hand-free viewing PopTop designs are easy to switch out — just close flat, press down and turn to swap the PopTop. Includes OtterBox limited lifetime warranty (see website for details) and 100% authentic Dimensions 7.8 x 4.29 x 1.06 inches, Weight 3 + +Price is $",20 +"How much does this cost to the nearest dollar? + +Dell XPS Desktop ( Intel Core i7 4790 (3.6 GHz), 8GB, 1TB HDD,Windows 10 Home Black +Product description Bring your multimedia to life with Dell XPS desktop PCs offering powerful processors, superb graphics performance and lots of storage space. Amazon.com Processor 4th Generation Intel Core processor (8M Cache, up to 4.00 GHz) OS Windows 7 Professional, English Graphics Card NVIDIA GeForce GTX 750Ti 2GB DDR5 Memory 32GB Dual Channel DDR3 - 4 DIMMs Hard Drive 1TB 7200 RPM SATA Hard Drive 6.0 Gb/s + 256GB SSD Processor 3.6 GHz RAM 8 GB DDR5, Memory Speed 1600 MHz, + +Price is $",500 +"How much does this cost to the nearest dollar? + +Franklin Iron Works Sperry Industrial Bronze Chandelier 28 Wide Rustic Farmhouse Cylinder Scavo Glass Fixture for Dining Room House Foyer Kitchen Island Entryway Bedroom Living Room +28 wide x 28 high. Glass is 6 1/4 high x 3 wide. Canopy is 5 1/2 wide. Weighs 19.58 lbs. Comes with of lead wire and 6-feet of chain. Sloped ceiling adaptable. Uses eight maximum 60 watt standard-medium base bulbs (not included). Contemporary farmhouse eight-light chandelier from Franklin Iron Works. Industrial bronze finish metal frame. Scavo glass cylinder shades. Brand Franklin Iron Works, Color Scovo Glass, Material Glass, Style Farmhouse, Light fixture form Chandelier, Room Type Entryway, + +Price is $",400 +"How much does this cost to the nearest dollar? + +Avery Legal Dividers, Standard Collated Sets, Letter Size, Side Tabs, 51-75 +You have the right to organized and professional-looking files. This Standard Collated Legal Divider Set features Tabs 51-75 so it's perfect for index briefs, legal exhibits, mortgage documentation files and more. White paper stock with clear, Rip Proof reinforced tabs are preprinted on both sides using Helvetica bold type for ease of use, and the unpunched binding edge gives you the freedom to fit practically any binding system. Here's evidence these dividers will do your files justice. Clear Rip Proof reinforced tabs printed on both sides Unpatched binding edge so indexes can fit practically any binding system Contains 30% post-consumer recycled content Avery Style is printed using Helvetica bold type Manufacturer Avery, + +Price is $",20 +"How much does this cost to the nearest dollar? + +Moen 8346 Commercial Posi-Temp Pressure Balancing 4 Port Cycling Valve Hand Shower System 2.5 gpm, Chrome +Product Description VERSATILE DESIGN Chrome finish is highly reflective for a mirror-like look that works with any decorating style From the Manufacturer This single-handle handheld shower system has a Posi-Temp pressure-balancing valve that maintains water pressure and controls temperature, a slide bar, drop ell, vacuum breaker, a metal hose and mounting hardware. The pressure balancing cycle valve design has 1/4 turn stops, the rubber nozzles are quick cleaning and the chrome plated metal construction provides a bright, highly reflective, cool grey metallic look. The temperature handle operates counterclockwise through a 270 degree arc, with off at 6 o' clock + +Price is $",200 +"How much does this cost to the nearest dollar? + +Carlisle Versa Trail ATR All Terrain Radial Tire - NHS +Tire designed to provide high performance for sports driving. Providing maximum traction with good braking control and handling, the tire offers unrivaled comfort when driving at high speeds. Comfort, experience, technology and design! Tire only, Rim not included made in united states package height 8.9 package length 27.1 package width 27.1 Fit type Universal Fit Brand Carlisle, Seasons NON_WINTER, Size Rim Size 12 Inches, Section Width 9 Inches, Tire Aspect Ratio 8, Speed Rating M, Tread Depth 24 32nds, Ply Rating 6-Ply, Tire Diameter 27 Inches, Weight 23.6 pounds, Manufacturer Carlisle, Model Versa Trail ATR + +Price is $",300 +"How much does this cost to the nearest dollar? + +SUNWAYFOTO 44mm Tripod Ball Head Arca Compatible Sunway +66lb Max load! Eliptical Ball for progressive resistance. Y-axis diameter 0.03mm longer than the X-axis. Single notch design. Super strong shell. All Metal knobs. For long term durability. Panning Base Scale Independent Pan Lock Knob, 360° panning movement with calibrated precision, precisely capture overlapping panoramic images. Panning Base is laser-engraved with index marks from with increments at 5° 50mm clamp with Bubble level. Ball Diameter 44mm. Base 55mm. Height 94mm. Weight 450g. Max load 30kg (66 lbs). Bottom thread 3/8 with 1/4 adapter. 1 year + +Price is $",100 +"How much does this cost to the nearest dollar? + +NanoBeam AC 4 Units 5GHz High-Performance airMAX ac Bridge CPE with Dedicated Management Radio +Models Ubiquiti Networks NanoBeam AC 5GHz High-Performance airMAX ac Bridge CPE with Dedicated Management Radio Incorporating innovative industrial design with proprietary airMAX ac technology, the NanoBeamAC is ideal for CPE deployments requiring maximum performance from the smallest possible footprint. The NanoBeam ac Gen2 airMAX ac CPE with Dedicated Management Radio from Ubiquiti Networks offers a more reliable long-distance point-to-point connection. Boasting a maximum throughput up to 450 Mb/s, this NanoBeam radio is designed to filter out noise to reduce interference in areas congested with multiple RF signals while offering up to 19 dBi gain. Setup is simple, as the NanoBeam + +Price is $",150 +"How much does this cost to the nearest dollar? + +WULF 4 Front 2 Rear Leveling Lift Kit with Spindles & Shackles compatible with Ford Ranger 2WD with Coil Spring Suspension +Compatible with Ford Ranger 2WD with Coil Spring Suspension FRONT WULF 4 Lift Ductile Cast Iron Spindles / Knuckles REAR 1.5-2 Adjustable Lift Black Powder Coated Shackles, Zerk-Grease Fittings, Pressed greaseable high grade poly bushings and metal sleeves included NOTE 2WD models only. Excludes models with Stabilitrak. Requires Coil Spring suspension Please see the description for full details, or contact us for assistance Fast Shipping. Manufacturers Lifetime Warranty. Dedicated Customer Service Manufacturer WULF Suspensions, Brand WULF Suspensions, Country of Origin USA, + +Price is $",250 +"How much does this cost to the nearest dollar? + +Alera ALEVABFMC Valencia Series Mobile B/f Pedestal, 15 7/8 X 19 1/8 X 22 7/8, Med. Cherry +Sturdy woodgrain laminate mobile box file pedestal to store all your office necessities. Full-extension ball bearing slides on file drawer for easy access. Durable laminate is water-, scratch-, and dent-resistant with 3 mm protective edge banding. Two fold-away safety keys included. Accepts Alera® Pedestal Cushions for an instant seating option. Sturdy woodgrain laminate mobile box file pedestal to store all your office necessities. Full-extension ball bearing slides on file drawer for easy access. Durable laminate is water-, scratch-, and dent-resistant with 3 mm protective + +Price is $",50 +"How much does this cost to the nearest dollar? + +YU-GI-OH! Ignition Assault Booster Box +24 Packs per Display 9 cards per packHumanitys greatest fear has been realized! Dueling A.I.s have become sentient and organized their own army to take the Yu-Gi-Oh! TRADING CARD GAME by storm in Ignition Assault! Winter 2020s booster set heats things up with Ais @Ignister cards from the climax of YuGi-Oh! VRAINS, multiple brand-new strategies, new cards for popular strategies, and powerful, general use cards that every Duelist will want to add to their arsenal! Keep your A. 24 Packs per Display 9 cards per pack Dimensions 5.65 x 4.75 x 1.5 inches, Weight 11.2 ounces + +Price is $",30 +"How much does this cost to the nearest dollar? + +48 x 36 Extra-Large Framed Magnetic Black Chalk Board (Black Frame) +Handsome, smooth 48 x 36 inches extra-large framed black chalk board. Perfect for office, meeting rooms, classrooms, at work or at home...to serve as black board, or magnetic board, or menu board, or bulletin board etc. Black frame. (Search for on Amazon if you want a Dark Brown wood tone frame or if you want a Medium Brown wood tone frame.) DELIVERY Shipped to continental U.S. addresses only. Handsome, smooth black board with elegant black veneer frame and reinforced backing. (If you want a dark brown wood tone frame, search for on Amazon; If you want a medium brown wood tone frame, search for ) Lean this light-weight black board + +Price is $",60 +"How much does this cost to the nearest dollar? + +Dell Latitude D620 Renewed Notebook PC +Dell Latitude D620 14.1 Laptop (Intel Core Duo 80GB Hard Drive, 2048Mb RAM, DVD/CDRW Drive, XP Professional) Windows XP Professional with Dell Reinstallation XP Pro. CD Intel Core Duo Processor 2GB DDR2 RAM 80GB Hard Drive Screen, Wifi Standing screen display size 14 Inches, Screen Resolution 1366 x 768 pixels, Processor 1.83 GHz RAM 2 GB DDR2, Memory Speed 1.83 GHz, Hard Drive 60 GB HDD, Chipset Brand Intel, Card Description Integrated, Wireless Type USB 2.0 Ports 3, Brand Dell, Series Dell Latitude, model number d620, Hardware Platform PC, Operating System Windows XP + +Price is $",150 +"How much does this cost to the nearest dollar? + +acer Aspire 5 Laptop, AMD Ryzen 3 5300U Quad-Core Processor, 15.6 FHD IPS Display, 8 GB DDR4 RAM, 512 GB PCIe SSD, HDMI, Fingerprint, Wi-Fi 6, Backlit Keyboard, Windows 11 Home S Mode +Processor AMD Ryzen 3 5300U 4-Core Processor (8 Threads, 4MB L3 Cache, Up to Graphics AMD Radeon Operating system Windows 11 Home English Memory 8 GB DDR5 SDRAM Hard Drive 512 GB PCIe Solid State Drive Optical Drive No Display 15.6 FHD (1920 x 1080) LED-backlit, IPS Wide Viewing Angle, Slim Bezel, 16 9 aspect ratio 1 x USB 2.0 + +Price is $",400 +"How much does this cost to the nearest dollar? + +Elk 30 by 6-Inch Viva Pendant with Green Glass Shade, Satin Nickel Finish +The Viva light pendant is meticulously hand blown with up to three layers of uncompromising beauty and style. This pendant features green hand blown glass shade. Shade holder comes in satin nickel finish. Accommodates six medium base bulbs. Measures 9-inch extended length by width by 6-inch height. Viva light pendant is meticulously hand blown with up to three layers of uncompromising beauty and style Features exquisite line of green hand blown glass shade Accommodates six medium base bulbs Shade holder comes in satin nickel finish Measures 9-inch extended length by width by 6-inch height Brand Elk, Color Satin Nickel, Material Material Other, Style Contemporary, Light fixture form Pendant, Specific + +Price is $",60 +"How much does this cost to the nearest dollar? + +Barbie Top Model Doll +Amazon.com.caption font-family Verdana, Helvetica neue, Arial, serif; font-size 10px; font-weight bold; font-style italic; ul.indent list-style inside disc; text-indent -15px; Barbie is ready to hit the runway and show off the latest global fashions as a top model. While we all know Barbie’s occupational curiosity has led her down varied paths, this one is perfect for the slender fashionista. She comes wearing a trendy outfit of fishnet stockings, a tiered black miniskirt, a patterned top with a short sleeve shrug, and ankle boots. Her long blond hair is styled sleekly straight and she is carrying an animal print handbag. While this outfit is certainly catwalk worthy, it’s only + +Price is $",60 +"How much does this cost to the nearest dollar? + +Danby Designer 20-In. Electric Range with Coil Elements and Ft. Oven Capacity in Stainless Steel/Black +You dont need to be a world-class chef to enjoy cooking with this ultra-compact electric range by Danby Designer. Measuring only 20 inches wide, this stylish model is the ideal addition to trailers, cottages or efficiency apartments. It features a glass window on the oven door, plus angled front-mounted push and turn safety knobs with hot surface indicator lights. The lift-up porcelain cooktop has one 8-inch coil element for quick boiling and three 6-inch coil elements. Each element has a removable drip bowl for easy cleaning. The ft. electric oven has two oven racks with safety stops and four adjustable positions, plus a powerful broiler with 2400 watts of bro + +Price is $",200 +"How much does this cost to the nearest dollar? + +FixtureDisplays® Metal Truss Podium Double Width Modern Design +FixtureDisplays Metal Truss Podium Double Width Modern Design Churches & Other Venues Black truss is great to project a simple, clean and crisp look. Decorative truss panel design. Great for Churches, Schools, Hotels, Conferences, Funeral Homes, Stages, Debates, Wedding & Events, Restaurants Reception, Concierge etc. Easy screw aseembly. Contact us if u wish to order assembly service. Double-wide Full Size Pulpit Measurement 39 wide x 15.5 deep x 46.7 tall. Podium weighs 41 lbs. Reading panel comes with book stopper. Works great for two person services, or a larger room where a wider podium is proper. Sturdy Construction Made + +Price is $",250 +"How much does this cost to the nearest dollar? + +ACDelco GM Original Equipment Alternator +ACDelco GM Original Equipment Light Duty Alternators have components that are newly manufactured, and are GM-recommended replacement for your vehicle’s original alternator. Alternators provide power to the vehicle's electrical systems and charge the battery while the engine is running. These original equipment alternators have been manufactured to fit your GM vehicle, providing the same performance, durability, and service life you expect from General Motors. 100% newly manufactured as an exact replacement for your GM vehicle’s original alternator Components are tested to meet original specification requirements for remarkable durability GM-recommended replacement part for your GM vehicle’s original factory component Offering the quality, reliability, and durability of GM OE Manufactured to GM OE specifications for fit, form, and function Dimensions 13.8 + +Price is $",200 +"How much does this cost to the nearest dollar? + +EBC Premium Street Brake Kit +Type Automotive Brake Save 10% from buying separate parts with EBC Brakes quality brake kit. High efficiency EBC pads with patented EBC Brake-in coating and premium rotors with thermic anti rust coating, fully balanced and run-out tested for smooth braking.Dimension 12 x 12 x 12 inchWeight 35.18 lbsManufacturer Warranty Covered by Manufacturer's Warranty Daily Driver Premium Brake Kit For Cars Truck Or SUV Quality British Made EBC Pads Premium G3000 OE Style Rotors Geomet Anti Rust Coating Manufacturer EBC Brakes, Brand EBC, Model EBC Brakes, Weight 40 pounds, model number Manufacturer Part Available May 19, 2012, Vehicle Service Type Car, Orientation Front + +Price is $",220 +"How much does this cost to the nearest dollar? + +FXR Men's Boost FX Jacket (Black/Orange/White - Large) +HydrX Pro - Shell - durable, sublimated 450d polyester shell with HydrX Pro laminate Boost LE Shell - M-Series Omni Stretch 450d polyester shell with HydrX Pro laminate F.A.S.T. 90g insulation value in outer shell body, perforated at vent areas Lining - moisture-wicking quick-dry mesh lining FXR Dry Vent system - snowproof and moisture resistant chest side body vent system Removable liner - FXR Thermal Dry active liner with 175g Thermal Flex fill YKK Aquaguard front zipper HD #8 W/P zippers throughout Adjustable windskirt 360 3M Scotchlite reflective Shock-cord adjustable collar Shock-cord adjustable bottom + +Price is $",99 +"How much does this cost to the nearest dollar? + +SuperATV Scratch Resistant 3-in-1 Flip Windshield For CFMOTO ZForce 500 / 800 Trail / 800 EX / 1000 | 1/4 Thick Polycarbonate | USA Made | Can be set to Open, Vented Or Closed +Fits CFMOTO ZForce 500 | CFMOTO ZForce 800 Trail | CFMOTO ZForce 800 EX | CFMOTO ZForce 1000 | Can be used with most soft or hard tops | 100% Fitment Guaranteed Great For All Weather Want a CFMOTO Windshield that works in all conditions? Our 3-in-1 Flip-Up design allows you to choose from closed, vented, or open positions to ride comfortably in all-weather without having to + +Price is $",200 +"How much does this cost to the nearest dollar? + +SBU 3 Layer All Weather Mini Van Car Cover Compatible for Ford Windstar Minivan Model Years Breathable Automobile Van Protection +This Van Cover will provide all year round protection to your car. -It will efficiently shield your car’s paint from all finish-destroying agents sun, rain, snow, dust, dirt, tree sap and other corruptive elements. -The cover will minimize accidental bumps, dings, and scratches. You will save money on car washes, repair shops and will enjoy your ride in a brand-new looking car all year round. Investing in our quality cover is not only a practical move but also the perfect option for maintaining the car’s exterior. Condition Brand New, Color Gray, PACKAGE INCLUDES Brand New Van Cover.Free Storage Pouch, Antenna Patch. All + +Price is $",250 +"How much does this cost to the nearest dollar? + +2 Pack Outdoor Brochure Holder Advertising Pamphlet Display Box with Lid Wall Mount Flyer Holder Acrylic Envelope Holder Waterproof Outdoor Brochure Box for Store Literature Display (Clear) +Features Fit for various occasions Our realtor flyer holders are ideal for literature, real estate advertisements, flyers, paper, letters, tickets, signature papers, etc., suitable for plenty of occasions indoor and outdoor, such as home, office, shopping malls, real estate companies, food stores, public places, business occasions, banks and so on. Warm to share These outdoor brochure boxes can also be applied as gifts for family members, friends, relatives, coworkers, neighbors, and other people you care about, so you can send them to show your love and concern, and to strengthen your relationships. Specifications Material acrylic Color clear Size + +Price is $",10 +"How much does this cost to the nearest dollar? + +Monroe Shocks & Struts Quick-Strut 171585 Strut and Coil Spring Assembly +Featuring a vehicle-specific design, Monroe® Quick-Strut® strut assemblies are fit checked, ride tested and engineered to restore factory ride height and ride performance. Assembled in Paragould, AR, they include all required components in a single unit. QUICKER, SAFER, EASIER AND COMPLETE REPAIR -- Includes everything you need for strut replacement in a single, fully assembled unit with no need for a spring compressor RESTORES RIDE HEIGHT -- Precisely calibrated to meet the OE design, each application-specific coil spring type is engineered to restore ride height and support the vehicle's weight VEHICLE-SPECIFIC DESIGN -- Application-specific coil spring, mount and strut designs ensure + +Price is $",80 +"How much does this cost to the nearest dollar? + +Elements of Design Magellan Three Handle Tub and Shower Faucet, Oil Rubbed Bronze +Solid brass water way construction, Premium color finish resists tarnishing and corrosion, 2.5 GPM / 9.5 LPM at 60 PSI, 6-Inch reach Shower Arm, 1/4 turn washer less cartridge, IPS Inlets, Pressure Balance Valve, Temperature Check Stop.. Constructed from solid brass for durability and reliability Our corrosion and tarnish-resistant finishes provides long-lasting use Pressure Balance Valve; Fine Artistic Craftsmanship Max 2.0 LPM Water Flow Rate At 80 PSI On Showerhead Compliant with California Energy Commission Title 20 Brand Elements of Design, Color Oil Rubbed Bronze, Material Brass, Finish Type Oil Rubbed, Handles 3 + +Price is $",60 +"How much does this cost to the nearest dollar? + +GM Genuine Parts Air Conditioning Evaporator Core +ACDelco GM Original Equipment A/C Evaporator Cores are heat exchangers and are located in the HVAC housing, where they cool and dehumidify the cabin air. Refrigerant is metered into the evaporator by the orifice tube or expansion valve. This original equipment evaporator core is a GM-recommended replacement for your vehicle’s original components and has been manufactured to fit your GM vehicle, providing the same performance, durability, and service life you expect from General Motors. Channel-plate construction provides a high refrigerant contact surface area, resulting in better performance Vacuum-brazed, corrosion-treated, and leak-tested to help provide trouble-free operation GM-recommended replacement part for your GM vehicle’s original factory component Offering the quality + +Price is $",200 +"How much does this cost to the nearest dollar? + +Baseus USB C Docking Station to Cast on 3 Monitors with 100W PD USB-C Port, 4K USB 3.0 * 5, LAN, SD/TF Cards Reader, Audio Port for Windows, Mac Laptop +Docking Station — Up to 16 ports allowing you to connect almost all devices through a single gear; 3 4K HDMI ports to cast different content on each display, PD Type-C to connect mobile devices, 3 USB 3.0 ports, 2 USB2.0 ports. Note Docking station requires a second power adapter through the PD USB-C port when charging your laptop through it Triple Extend to the Fullest — Boost productivity by casting up to 3 different contents on displays; Actual pixels up to when using + +Price is $",50 +"How much does this cost to the nearest dollar? + +Whitehall™ Personalized Whitehall Capitol Mailbox with Door & Side Address Plaques Personalized Mailbox (3 Colors Available) +After your order is placed, our friendly US based representatives will send a layout for your approval THREE COLORS AVAILABLE 1) Black with Gold Address 2) Bronze with Gold Address 3) White with Gold Address BOX DIMENSIONS - 9.625 X 13 X Approved by Postmaster General. Manufactured from die cast, high-density aluminum alloy The address Plaque can display up to five, 3 numbers and the bottom line holds up to sixteen, 1.25 characters. Material Aluminum, Included Security Features Hopper & Baffle, Brand Clarus Crystal, Dimensions 20.38\ D x 9.63\ W x 13\ H + +Price is $",200 +"How much does this cost to the nearest dollar? + +Pro Circuit Works Pipe for 02-19 YAMAHA YZ250 +The Original Pro Circuit Works Pipe Offers Unparalleled Performance and Power for Every Two-Stroke Application. Increased Horsepower and Torque Gains Will Quickly Be Noticed Across the Entire Rpm Range. The Unplated, Oiled Metal Finish Requires Some Maintenance, but Really Gives Your Bike That Works Look. Please Note The Image Displayed Is Representative of the Item, but May Vary Slightly Depending on Your Specific Model. Please Note The Image Displayed Is Representative of the Item, but May Vary Slightly Depending on Your Specific Model. Size YAMAHA YZ250 Style CARBON STEEL Color silver Warranty Pro-Circuit provides a 90-day warranty. See their site for full details. Manufacturer Pro Circuit + +Price is $",180 +"How much does this cost to the nearest dollar? + +HYANKA 15 1200W Professional DJ Speaker, Portable Pa System, Bluetooth Party Speaker with Subwoofer, Microphone and Speaker Set, Powered Pa Speaker System with Light, FM, TWS, USB, Remote, EQ +1. High Powered Active Professional DJ Speaker The B-15 has been finely tuned by our experienced engineer teams with 1200W P.M.P.O portable loud powered system with the HF Unit made of super titanium film with high sound without any distortion. Separate bass and treble controls on this Active Professional DJ Speaker allow for precise pitch tuning. This Bluetooth DJ speaker with subwoofer will give you incredibly loud sound crystal-clear treble and booming bass. 2. Multiple Easy Connections This large party powered pa speaker with 15 subwoofer can be connected + +Price is $",100 +"How much does this cost to the nearest dollar? + +Bluetooth X6BT Card Reader Writer Encoder Card Writer Device +Package Includes 1 x X6(BT) Card Reader Writer 1 x Software MINI CD 1 x Bluetooth dongle 1 x USB cord 20 x Blank Cards - X6 Bluetooth Card Readers Writer Encoder card swipe - World's Only Bluetooth Card Reader / Writer. - The World First Bluetooth Manual Swipe Smallest Card Reader/Writer is designed in USA to offer a card reading/writing solution. - for ISO 7811-6 formats, it’s Powered by USB directly not need for extra power adaptor. - Works with all the major operating systems as Windows 7, 8, 10, Vista, X, bits) and Apple Computers(MacBook Air, MacBook Pro, Mac Mini, Mac Pro, i + +Price is $",50 +"How much does this cost to the nearest dollar? + +AIRAID Cold Air Intake System by K&N Increased Horsepower, Cotton Oil Filter Compatible with FORD (Excursion, F250 Super Duty, F350 Super Duty) +INCREASES HORESPOWER AIRAID performance air intake systems feature an aerodynamically-engineered intake tube, designed to accelerate airflow to your engine and reduce turbulence—helping increase your vehicle’s performance SUPERIOR FILTRATION AIRAID performance intake systems are engineered to provide a smooth, unimpeded path for airflow to your engine—keeping the air cooler and more oxygen-dense. Injecting more oxygen-rich air into the cylinders allows the engine to burn fuel more efficiently during combustion, offering you an increase in performance EASY INSTALL These simple-to-install, sophisticated systems help maintain proper air-to-fuel + +Price is $",150 +"How much does this cost to the nearest dollar? + +Bostingner Shower Faucets Sets Complete, Shower System 10 Inch All Metal Overhead Rain Shower Combo Set with Handheld Ceiling Mounted 3 Way Pressure Balance Shower Valve and Trim Kit, Matte Black +Ultra-Luxury Multi Shower - Unlike normal shower kit that can only use single function at a time, Bostingner Shower head system can be used both the rainfall shower head and handheld spray SIMULTANEOUSLY that strikes the perfect balance of GENEROUS COVERAGE and HIGH PRESSURE. The PUSH BUTTON design makes it easy to switch settings, so children and elderly can also use it without a problem Anti-scald & Water Hammer Prevention - The Upgrade cUPC Certified Anti-scald pressure balance valve is key to safe homes, effectively control water pressure balance to prevent scald and + +Price is $",100 +"How much does this cost to the nearest dollar? + +PIT66 Front Bumper Turn Signal Lights, Compatible with Mazda MX-5 Miata 1990 1991 1992 1993 1994 1995 1996 1997 W/Bulbs Smoked Lens Left Driver Side and Right Passenger Side. +Compatibility - Compatible with Mazda MX-5 Miata. Reference Oem Part Number Perfect Design - Smooth circular arc design, more beautiful appearance, tight fitting to the vehicle, unique lighting system, providing all-round lighting, evenly distributing light output in all directions, maximizing visibility. Safety - It can effectively help you to remind passing vehicles and identify whether the distance on both sides of the road is suitable for traffic in bad weather or other times of poor visibility, as well as determine the distance on both sides of the vehicle when + +Price is $",100 +"How much does this cost to the nearest dollar? + +Caseology Bumpy Compatible with Google Pixel Buds Pro Case (2022) - Sage Green +Hybrid layer clear case with ultra-clear PC body and TPU frame for drop-proof shock absorbance Slim yet durable Pixel Buds Pro case made with military grade materials Side colored TPU with rugged sandstone texture provides non-slip grip Wireless charging compatible and carabiner included for easy carrying Caseology Bumpy Compatible with Google Pixel Buds Pro Case (2022) / Not Compatible with Google Pixel Buds A series, Google Pixel Buds 2 Dimensions 2.67 x 2.54 x 1.15 inches, Weight 1.76 ounces, model number Rank Cell Phones & Accessories 34945, Cell Phone Basic Cases 14563, Connectivity technologies wireless, Special features + +Price is $",20 +"How much does this cost to the nearest dollar? + +Fleck 2510 Timer Mechanical Filter Control Head +- Mechanical 2510 control head for filter systems - - 12-day timer initiated backwash - - Maximum 17 GPM backwash (includes 7 GPM DLFC) - - Standard 2. 5 -8 NPSM mounting base - - Requires yoke or bypass to connect to plumbing -Heavy duty 2510 electromechanical control valve provides simple and durable backwash control for most common backwashing filters. A maximum of 17 GPM available backwash can handle even dense iron filter medias. Dedicated piston motor provides powerful piston movement that reduces system maintenance. New Fleck 2510 filter valve replacement for filter tanks Fully adjustable cycles for backwash and rinse times For back-washing filters, allows for strong + +Price is $",80 +"How much does this cost to the nearest dollar? + +Haloview MC7108 Wireless RV Backup Camera System 7'' Monitor Built in DVR Rear View Camera with Infrared Night Vision and Wide Viewing Angle for Truck/Trailer/RV/Pickups/Camping Car/Van/Farm +7 LCD digital monitor, Built-in recorder. Real time recording, video playback 10-32V wide voltage input, Support 4 wireless camera wide viewing angle, Wireless Line of Sight Range Up to 984 feet (This kit include 1 monitor+ 1 camera) Split mode, auto-scan mode and single-display mode available, Normal, mirror, FLIP, MIRROR-FLIP viewing options available HD 720P Digital Wireless Backup camera system, it has far better image resolution, stronger lens and longer transmission distance, which will bring you a + +Price is $",250 +"How much does this cost to the nearest dollar? + +Schmidt Spiele - Manhattan +The product of this place is a board game of Hans im Gluck's / Andreas Seyfarth work. It is with a Japanese manual in Japan version package of Mobius Games Inc.. Number of players 2-4 people Target age 10 years old or older Playing Time 60 minutes one point sales easy-to-understand rules, fun easy-to-understand, easy-to-understand strategy, the progress of the game can be seen in the eyes. It is a game that can be recommended to anyone. 2-4 people for Age 10 years old - adult Travel Time 60 minutes From Germany With a Japanese manual Dimensions 11.81 x 5.91 x 3.94 inches, Weight 10.6 ounces, model number + +Price is $",30 +"How much does this cost to the nearest dollar? + +Corsa 14333 Tip Kit (Ford Mustang GT) +CORSA Performance Pro-Series tips are dual-walled stainless steel. The dual-wall is designed to protect against heat distortion and maintain visual appeal. Each tip is adorned with high-definition, precision laser engraving that provides unmatched detail. CORSA Performance tip kits are designed to fit directly to CORSA exhaust systems. Mustang GT 5.0L Fits Premium Package ONLY Requires Roush Quad Tip Rear Valance Modification Dual walled design protects against heat distortion CORSA Performance tip kits are designed to fit directly to CORSA exhaust systems Premium Stainless Steel Construction Manufacturer Corsa Performance, Brand Corsa, Weight 7 pounds, Dimensions 23 x 11 x 7 inches, model number 14333, Is Discontinued No, Manufacturer Part + +Price is $",80 +"How much does this cost to the nearest dollar? + +Hoshizaki FM116A Fan Motor Kit 1 +Product Description Hoshizaki FM116A Fan Motor Kit 1 is a genuine OEM (original equipment manufacturer) replacement part. Hoshizaki is committed to developing original products that bring comfort and convenience to your life. Approved by original equipment manufacturer (OEM) and intended only for designed and specified use. From the Manufacturer FAN MOTOR KIT 1. Hoshizaki Genuine OEM replacement part. Hoshizaki is committed to developing original products that bring comfort and convenience to your life. Use genuine OEM parts for safety reliability and performance. Genuine OEM replacement part Hoshizaki is committed to developing original products that bring comfort and convenience to your life Genuine OEM provides safety, reliability, and optimal performance Approved by original + +Price is $",80 +"How much does this cost to the nearest dollar? + +BAINUO Antler Chandelier Light Antler Deer Chandelier for Living Room Dining Room Balcony Bedroom Cafe Bar Cabin Hanging Light Fixtures +Specification Product Type Vintage Style Resin Deer Faux Antler Chandelier Style vintage/farmhouse/rustic/transitional/art deco Bulb Base 6* Not Included) Bulbs Category LED/INCANDESCENT/HALOGEN Product Rating Voltage 110V Maximum Wattage 40W for single lamp Weight 16.5LB Dimension Diameter 34.4 inches, Height 17.7 inches. Material Resin Color Brown Features antler chandelier also works on sloped, slanted or vaulted ceiling; 2.Deer horn 6 light pendant light wide application - The indoor dining table chandelier is perfectly used + +Price is $",280 +"How much does this cost to the nearest dollar? + +DNA MOTORING Smoke Lens Amber Headlights Replacement For 06-10 Explorer +A headlight (headlamp) is a lamp attached to the front of a vehicle to light the road ahead. Headlight performance has steadily improved throughout the automobile age, spurred by the great disparity between daytime and nighttime traffic fatalities. Headlights are one of the most important components of your vehicle; they allow you to see the road in front of you clearly during the night and in any bad weather that may arise. Our headlights upgrade the face of your vehicle with clear style and extreme range. Compatible with 06-10 Ford explorer. Plug-n-Play Operation, Direct Bolt-On OE Fitment or Replacement for the Stock Unit Uses H13 High Beam & Low Beam / Bulbs are NOT Included Brings a Different Appearance + +Price is $",100 +"How much does this cost to the nearest dollar? + +Wera Stainless 3840/1 TS 2.5mm Hex Insert Bit, Drive +Wera 3840/1 TS 2.5mm hex insert bit for 1/4 hex drive is designed to keep rust at bay. Wera’s Stainless tool line is manufactured from 100% Stainless steel, preventing extraneous rust caused by use of conventional tools contaminating stainless fasteners. Wera’s unique vacuum ice-hardened process gives the necessary hardness for industrial applications. Torsion (TZ) bits are designed to prevent premature wear for improved service life. Hex-Plus technology prevents rounding of screw recess and transfers up to 20% more torque. Stainless bits partnered with Wera’s stainless Rapidaptor will protect the full length of the bit against extraneous rust. Ice + +Price is $",30 +"How much does this cost to the nearest dollar? + +Celestron - PowerSeeker 127EQ Telescope - Manual German Equatorial Telescope for Beginners - Compact and Portable - Bonus Astronomy Software Package - 127mm Aperture & 1.25 Moon Filter +Celestron 1.25 Moon FilterThe Moon has the distinction of being the most often viewed celestial object through backyard telescopes. It is undoubtedly beautiful and mysterious, and is one of those constants in our lives that connects us to every other being on our planet. Regardless of who we are or where we live or travel, we all look at the same moon. It is difficult to look at the Moon through a telescope and see all of the details due to its brightness. Our eyes are not ready for the bright beam of light that emits from the eyepiece, and they “ + +Price is $",100 +"How much does this cost to the nearest dollar? + +NHOPEEW Android Car Radio Carplay for Jeep Wrangler 2015 2016 2017 Touchscreen Bluetooth Car Stereo with AHD Backup Camera/Mic/HiFi +Compatible with jeep wrangler 2015 2016 2017 Android 11 System The Android head unit fit for jeep wrangler 2015 2016 2017, it is plug&play, easy to install. Android 11 operating system, 2GB RAM & 32GB ROM, ensures smooth operation and faster response. Android Auto & Apple Carplay Support wireless and wired connect your phone to the carplay jeep wrangler radio, with Siri voice control allows you make calls, send and receive messages, enjoy music and Navigating. Large Screen with Hi-Fi The jeep wrangler + +Price is $",199 +"How much does this cost to the nearest dollar? + +Other Harmonica A) +For the sound of yesterday from the technology of today, The new Suzuki 2 timers are sure to bring back memories. 2 timers have a traditional sound quality made possible by a dual hole octave/tremolo tuning action, Excellent for folk and country style playing. Laser tuned reeds are extra lightweight to give that special harmonica octave effect. Available in 21 and 24 note Models, Includes a soft lined hard Shell Case. Available in the keys of C & a. Excellent for folk and country style playing Package Dimensions 3.302 H x 18.541 L x 5.588 W (centimetres) Package Weight 0.349 pounds Country of Origin Japan Weight 4 Ounces, Dimensions 6 x 3 x 1 inches + +Price is $",40 +"How much does this cost to the nearest dollar? + +Harley Air Filter Venturi Intake Air Cleaner Motorcycle Cnc Cut Chrome Kit for Touring Street Glide 2008 - 2016 Softail 2016 - 2017 Fitment - C (Gray) +Package x Air Cleaner Intake Filter with Accessories Fitment for Harley Touring Street Glide 2008 - 2016, Touring Road Glide 2008 - 2016, Softail 2016 - 2017, Dyna FXDLS 2017, FLSTNSE 2014 - 2015, FLSTSE 2011 - 2012, FXSBSE 2013 - 2014 NOTE Before purchase, please check electric or non-electric throttle on your Touring models. If Electric - use fitment C. If non + +Price is $",100 +"How much does this cost to the nearest dollar? + +Elite Screens Edge Free Ambient Light Rejecting Fixed Frame Projection Projector Screen,Aeon CineGrey 3D Series, 16 9 for Home Theater, Movie and Office Presentations +DIMENSION SIZE Diagonal, 16 9 Aspect Ratio. View Size 66.2 H x 117.6 W. Overall Size 66.7 H x 118.1 W x 1. 3 D. SCREEN MATERIAL Ceiling Ambient Light Rejecting Material (CLR/ALR) CineGrey 3D with 65% rejection ratio and Features a 90° Viewing Angle with 1.2 Gain. It is best for family rooms, educational facilities, conference rooms or any applications in which incident light is a factor. Fixed Frame Projector Screen is compatible with Standard Throw + +Price is $",250 \ No newline at end of file diff --git a/week6/community-contributions/lisekarimi/helpers/__init__.py b/week6/community-contributions/lisekarimi/helpers/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/week6/community-contributions/lisekarimi/helpers/items.py b/week6/community-contributions/lisekarimi/helpers/items.py new file mode 100644 index 0000000..a594e27 --- /dev/null +++ b/week6/community-contributions/lisekarimi/helpers/items.py @@ -0,0 +1,120 @@ +from typing import Optional # A variable might be a certain type or None +from transformers import AutoTokenizer +import re + +BASE_MODEL = "meta-llama/Meta-Llama-3.1-8B" + +MIN_TOKENS = 150 # Minimum tokens required to accept an item +MAX_TOKENS = 160 # We limit to 160 tokens so that after adding prompt text, the total stays around 180 tokens. + +MIN_CHARS = 300 # Reject items with less than 300 characters +CEILING_CHARS = MAX_TOKENS * 7 # Truncate long text to about 1120 characters (approx 160 tokens) + +class Item: + """ + An Item is a cleaned, curated datapoint of a Product with a Price + """ + + # Load tokenizer for the model + tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code=True) + + # Define PRICE_LABEL and question for the training prompt + PRICE_LABEL = "Price is $" + QUESTION = "How much does this cost to the nearest dollar?" + + # A list of useless phrases to remove to reduce noise for price prediction + REMOVALS = ['"Batteries Included?": "No"', '"Batteries Included?": "Yes"', '"Batteries Required?": "No"', '"Batteries Required?": "Yes"', "By Manufacturer", "Item", "Date First", "Package", ":", "Number of", "Best Sellers", "Number", "Product "] + + # Attributes for each item + title: str + price: float + category: str + token_count: int = 0 # How many tokens in the final prompt + + # Optional fields + details: Optional[str] # The value can be a string or can be None + prompt: Optional[str] = None + include = False # Whether to keep the item or not + + def __init__(self, data, price): + self.title = data['title'] + self.price = price + self.parse(data) + + def scrub_details(self): + """ + Removes useless phrases from details, which often has repeated specs or boilerplate text. + """ + details = self.details + for remove in self.REMOVALS: + details = details.replace(remove, "") + return details + + def scrub(self, stuff): + """ + Clean up the provided text by removing unnecessary characters and whitespace + Also remove words that are 7+ chars and contain numbers, as these are likely irrelevant product numbers + """ + stuff = re.sub(r'[:\[\]"{}【】\s]+', ' ', stuff).strip() + stuff = stuff.replace(" ,", ",").replace(",,,",",").replace(",,",",") + words = stuff.split(' ') + select = [word for word in words if len(word)<7 or not any(char.isdigit() for char in word)] + return " ".join(select) + + def parse(self, data): + """ + Prepares the text, checks length, tokenizes it, and sets include = True if it’s valid. + """ + # Builds a full contents string by combining description, features, and cleaned details. + contents = '\n'.join(data['description']) + if contents: + contents += '\n' + features = '\n'.join(data['features']) + if features: + contents += features + '\n' + self.details = data['details'] + if self.details: + contents += self.scrub_details() + '\n' + + # If content is long enough, trim it to max char limit before processing. + if len(contents) > MIN_CHARS: + contents = contents[:CEILING_CHARS] + + # Clean and tokenize text, then check token count. + text = f"{self.scrub(self.title)}\n{self.scrub(contents)}" + tokens = self.tokenizer.encode(text, add_special_tokens=False) + + if len(tokens) > MIN_TOKENS: + # Truncate tokens, decode them back and create the training prompt + tokens = tokens[:MAX_TOKENS] + text = self.tokenizer.decode(tokens) + self.make_prompt(text) + + # Mark the item as valid and ready to be used in training + self.include = True # Only items with MIN_TOKENS <= tokens <= MAX_TOKENS are kept + + + def make_prompt(self, text): + """ + Builds the training prompt using the question, text, and price. Then counts the tokens. + """ + self.prompt = f"{self.QUESTION}\n\n{text}\n\n" + self.prompt += f"{self.PRICE_LABEL }{str(round(self.price))}.00" + self.token_count = len(self.tokenizer.encode(self.prompt, add_special_tokens=False)) + + def test_prompt(self): + """ + Returns the prompt without the actual price, useful for testing/inference. + """ + return self.prompt.split(self.PRICE_LABEL )[0] + self.PRICE_LABEL + + def __repr__(self): + """ + Defines how the Item object looks when printed — it shows the title and price. + """ + return f"<{self.title} = ${self.price}>" + + + + + \ No newline at end of file diff --git a/week6/community-contributions/lisekarimi/helpers/loaders.py b/week6/community-contributions/lisekarimi/helpers/loaders.py new file mode 100644 index 0000000..4314c65 --- /dev/null +++ b/week6/community-contributions/lisekarimi/helpers/loaders.py @@ -0,0 +1,106 @@ +from datetime import datetime # Measure how long loading takes +from tqdm import tqdm # Shows a progress bar while processing data +from datasets import load_dataset # Load a dataset from Hugging Face Hub +from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor # For parallel processing (speed) +from items import Item + +CHUNK_SIZE = 1000 # Process the dataset in chunks of 1000 datapoints at a time (for efficiency) +MIN_PRICE = 0.5 +MAX_PRICE = 999.49 +WORKER = 4 # Set the number of workers here + +class ItemLoader: + + def __init__(self, name): + """ + Initialize the loader with a dataset name. + """ + self.name = name # Store the category name + self.dataset = None #Placeholder for the dataset (we load it later in load()) + + def process_chunk(self, chunk): + """ + Convert a chunk of datapoints into valid Item objects. + """ + batch = [] # Initialize the list to hold valid items + + # Loop through each datapoint in the chunk + for datapoint in chunk: + try: + # Extract price from datapoint + price_str = datapoint['price'] + if price_str: + price = float(price_str) + + # Check if price is within valid range + if MIN_PRICE <= price <= MAX_PRICE: + item = Item(datapoint, price) + + # Keep only valid items + if item.include: + batch.append(item) + except ValueError: + continue # Skip datapoints with invalid price format + return batch # Return the list of valid items + + + def load_in_parallel(self, workers): + """ + Split the dataset into chunks and process them in parallel. + """ + results = [] + size = len(self.dataset) + chunk_count = (size // CHUNK_SIZE) + 1 + + # Build chunks directly here (no separate function) + chunks = [ + self.dataset.select(range(i, min(i + CHUNK_SIZE, size))) + for i in range(0, size, CHUNK_SIZE) + ] + + # Process chunks in parallel using multiple CPU cores + with ProcessPoolExecutor(max_workers=workers) as pool: + for batch in tqdm(pool.map(self.process_chunk, chunks), total=chunk_count): + results.extend(batch) + + # Add the category name to each result + for result in results: + result.category = self.name + + return results + + + def load(self, workers=WORKER): + """ + Load and process the dataset, returning valid items. + """ + # Record start time + start = datetime.now() + + # Print loading message + print(f"Loading dataset {self.name}", flush=True) + + # Load dataset from Hugging Face (based on category name) + self.dataset = load_dataset( + "McAuley-Lab/Amazon-Reviews-2023", + f"raw_meta_{self.name}", + split="full", + trust_remote_code=True + ) + + # Process the dataset in parallel and collect valid items + results = self.load_in_parallel(workers) + + # Record end time and print summary + finish = datetime.now() + print( + f"Completed {self.name} with {len(results):,} datapoints in {(finish-start).total_seconds()/60:.1f} mins", + flush=True + ) + + # Return the list of valid items + return results + + + + \ No newline at end of file diff --git a/week6/community-contributions/lisekarimi/helpers/testing.py b/week6/community-contributions/lisekarimi/helpers/testing.py new file mode 100644 index 0000000..9422182 --- /dev/null +++ b/week6/community-contributions/lisekarimi/helpers/testing.py @@ -0,0 +1,84 @@ +import math +import matplotlib.pyplot as plt + +GREEN = "\033[92m" +YELLOW = "\033[93m" +RED = "\033[91m" +RESET = "\033[0m" +COLOR_MAP = {"red":RED, "orange": YELLOW, "green": GREEN} + +class Tester: + + def __init__(self, predictor, data, title=None, size=250): + self.predictor = predictor + self.data = data + self.title = title or predictor.__name__.replace("_", " ").title() + self.size = size + self.guesses = [] + self.truths = [] + self.errors = [] + self.sles = [] + self.colors = [] + + def color_for(self, error, truth): + if error<40 or error/truth < 0.2: + return "green" + elif error<80 or error/truth < 0.4: + return "orange" + else: + return "red" + + def run_datapoint(self, i): + datapoint = self.data[i] + guess = self.predictor(datapoint) + truth = datapoint["price"] + error = abs(guess - truth) + log_error = math.log(truth+1) - math.log(guess+1) + sle = log_error ** 2 + color = self.color_for(error, truth) + title = datapoint["text"][:40] + "..." if len(datapoint["text"]) > 40 else datapoint["text"] + self.guesses.append(guess) + self.truths.append(truth) + self.errors.append(error) + self.sles.append(sle) + self.colors.append(color) + # print(f"{COLOR_MAP[color]}{i+1}: Guess: ${guess:,.2f} Truth: ${truth:,.2f} Error: ${error:,.2f} SLE: {sle:,.2f} Item: {title}{RESET}") + + def chart(self, title): + max_error = max(self.errors) + plt.figure(figsize=(15, 6)) + max_val = max(max(self.truths), max(self.guesses)) + plt.plot([0, max_val], [0, max_val], color='deepskyblue', lw=2, alpha=0.6) + plt.scatter(self.truths, self.guesses, s=3, c=self.colors) + plt.xlabel('Ground Truth') + plt.ylabel('Model Estimate') + plt.xlim(0, max_val) + plt.ylim(0, max_val) + plt.title(title) + + # Add color legend + from matplotlib.lines import Line2D + legend_elements = [ + Line2D([0], [0], marker='o', color='w', label='Accurate (green)', markerfacecolor='green', markersize=8), + Line2D([0], [0], marker='o', color='w', label='Medium error (orange)', markerfacecolor='orange', markersize=8), + Line2D([0], [0], marker='o', color='w', label='High error (red)', markerfacecolor='red', markersize=8) + ] + plt.legend(handles=legend_elements, loc='upper left') + plt.show() + + def report(self): + average_error = sum(self.errors) / self.size + rmsle = math.sqrt(sum(self.sles) / self.size) + hits = sum(1 for color in self.colors if color=="green") + title = f"{self.title} Error=${average_error:,.2f} RMSLE={rmsle:,.2f} Hits={hits/self.size*100:.1f}%" + self.chart(title) + + def run(self): + self.error = 0 + for i in range(self.size): + self.run_datapoint(i) + self.report() + + @classmethod + def test(cls, function, data): + cls(function, data).run() \ No newline at end of file diff --git a/week7/community_contributions/lisekarimi/09_part5_llama31_8b_quant.ipynb b/week7/community_contributions/lisekarimi/09_part5_llama31_8b_quant.ipynb new file mode 100644 index 0000000..6a10dd6 --- /dev/null +++ b/week7/community_contributions/lisekarimi/09_part5_llama31_8b_quant.ipynb @@ -0,0 +1,612 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "4WDyBU0Vm0Zl" + }, + "source": [ + "# 🔍 Predicting Item Prices from Descriptions (Part 5)\n", + "---\n", + "- Data Curation & Preprocessing\n", + "- Model Benchmarking – Traditional ML vs LLMs\n", + "- E5 Embeddings & RAG\n", + "- Fine-Tuning GPT-4o Mini\n", + "- ➡️ Evaluating LLaMA 3.1 8B Quantized\n", + "- Fine-Tuning LLaMA 3.1 with QLoRA\n", + "- Evaluating Fine-Tuned LLaMA\n", + "- Summary & Leaderboard\n", + "\n", + "---\n", + "\n", + "# 🦙 Part 5: Evaluating LLaMA 3.1 8B Quantized\n", + "\n", + "- 🧑‍💻 Skill Level: Advanced\n", + "- ⚙️ Hardware: ⚠️ GPU required - use Google Colab\n", + "- 🛠️ Requirements: 🔑 HF Token\n", + "- Tasks:\n", + " - Quantize LLaMA 3.1 8B to 4-bit\n", + " - Define prediction function\n", + " - Evaluate with Tester\n", + "\n", + "We know LLaMA 3.1 won’t beat frontier models — but how far behind is it without any tuning?\n", + "\n", + "---\n", + "📢 Find more LLM notebooks on my [GitHub repository](https://github.com/lisekarimi/lexo)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "MDyR63OTNUJ6", + "outputId": "7e9e5b6b-d11c-45df-d774-2da5f6455d51" + }, + "outputs": [], + "source": [ + "# Install required packages in Google Colab\n", + "%pip install -q datasets torch transformers bitsandbytes accelerate matplotlib" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "-yikV8pRBer9" + }, + "outputs": [], + "source": [ + "# imports\n", + "\n", + "import re\n", + "import math\n", + "import torch\n", + "from huggingface_hub import login\n", + "from datasets import load_dataset\n", + "from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, set_seed\n", + "from google.colab import userdata\n", + "import matplotlib.pyplot as plt\n", + "%matplotlib inline" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "uuTX-xonNeOK" + }, + "outputs": [], + "source": [ + "# Google Colab User Data\n", + "# Ensure you have set the following in your Google Colab environment:\n", + "hf_token = userdata.get('HF_TOKEN')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Constants\n", + "\n", + "BASE_MODEL = \"meta-llama/Meta-Llama-3.1-8B\"\n", + "HF_USER = \"lisekarimi\"\n", + "DATASET_NAME = f\"{HF_USER}/pricer-data\"\n", + "\n", + "login(hf_token, add_to_git_credential=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "DTMo_1msQb9X" + }, + "source": [ + "## 📥 Load Dataset" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# #If you face NotImplementedError: Loading a dataset cached in a LocalFileSystem is not supported run:\n", + "# %pip install -U datasets" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 177, + "referenced_widgets": [ + "048d1b454cdc400ea5979230703770b8", + "7dd26897141a4d87bc3893bb1b1bafb3", + "c4f6e0b6237c46b98d393d27b35cabe7", + "dd525aced49e4ebe8395514601c20b20", + "69a35e3916ae488296a70687b5e890de", + "e3442871bdb5445ba86e5aa0f0ee8de9", + "8981816dd709488e9ace85e6b160892d", + "6edd1bd723324c128fec4de5f1758330", + "f77a98060e9d48dc8ac4919902ffc88d", + "ed6dfe86de2e4957b4e24df3f564c5db", + "e253ec71b5104dd291455753a160c7f1", + "b802074124bc4d7d979c28ba9e84a432", + "76abd56919414e2b8b2b4683d4cb2bd5", + "2bb62653ed2d4e86b9eb0476a0333a3a", + "58a799a559ff4f2681b586650c35b12e", + "878d6cea9b2c40d3b3b58b1c1bff902f", + "d00a41c676034c38881da90ae961e936", + "da539e354ea540509a2ea7d13dc8dc45", + "4f27fc91cbb14aa08d08b848c6689937", + "abea7cdfa8624614aca8d8ab3c07a671", + "b5ed6e3c852c49c1b904a19e05f5a90b", + "ac7225138dfa48b086b30f154f9a1111", + "70da4d47bd4c4b57a7f65d82d7a01829", + "19f8ee6f626845beaa7154efe4802045", + "13556136763b49bda041c92445ee2ad4", + "a2f5735e3c314155be432484fcf72fe7", + "81c15499cc8e4011b9bd392f660a3b6e", + "96b6a830727d48539c181343efada938", + "0ead4e0b3435492693636130d2782c25", + "50a8e3f2c06c4595931788b18f5152e9", + "cebc935dafae4d4eba105d3107c46ddc", + "f566351d5c504181b00a53c3c654090f", + "4238f42b624142fea3746fb2f03bcc2d", + "be8416b487d04d769fd93973b7fe916a", + "1bbada4a48444e60a360aba596af77b3", + "75436fb8e4eb43e4b0a309871e4d3cc1", + "e4ae815e69d3448296e4c3bcb713710e", + "72ca180ed6604f148f2f2e61ac97259a", + "c0b34963c7a446908fadfb38c958b612", + "38fa12125f024935852122d434c2cbf6", + "ff564729da354497b606bfb809ac4e33", + "9ee352287f8b4e27af617e3427cb3012", + "15522242cf72440ca8895496ad5144c4", + "e7dc05ebb11a4b30b4806c2628ec6bde", + "413fafe61f7c415a9c1c90dea56aa301", + "ac2522256e73492d9b5d0e7976d92ff5", + "53266635573042b4b94496f38915e6d4", + "8da5f5529fef4f1bb884793e503e5fc5", + "80b7529e0ad541749bf464a1d8927225", + "2203154b7c464105b12f1ef8caf410cd", + "87b8c46fa98a4940ab90422ab44d33de", + "6dee11eaa4a849bbb58488a233d3719e", + "be947f2a2b8a484daa61f45ae06c5232", + "0b1de2365ad1497ab2ebfec1be33a720", + "326453121cd84c1e95b3b5da0166c931" + ] + }, + "id": "5PVXACKHQhh4", + "outputId": "80dc4772-ea31-4752-8f97-573efaa43917" + }, + "outputs": [], + "source": [ + "dataset = load_dataset(DATASET_NAME)\n", + "train = dataset['train']\n", + "test = dataset['test']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "pGJR24lbQlRS", + "outputId": "a1bb5e66-1aa9-40b7-c361-562eafae5d8c" + }, + "outputs": [], + "source": [ + "test[0]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "vrxH6h00P9qc" + }, + "source": [ + "## 🦙 Load Tokenizer and Quantized LLaMA Model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 418, + "referenced_widgets": [ + "8698049df4ce440d8a16bc34d69231e7", + "b0f74f9ed33649ebb16952d0fb0aecab", + "00241684d4f64383b032a1362d174d55", + "b74ea8552d8145d28f43cf7ad8450d0b", + "6838953f363945d79e079c12aeb2232b", + "57c4a682571e401f9fec996603fa587d", + "e2b93774cb4a4648a4210c7693864ccc", + "766c8e2406ce407faf3489229dada75a", + "a52ea6c6672b4ab2bae4f669dc45deb6", + "381094cbfef64531a58df85e6d453847", + "a6a14894a06a4d4b839f3b791cfadd34", + "b43b118cae174c3c810f08c2249b80dc", + "6502e59cbd344bf6be966923bb3e38fa", + "f86fcf5f6bf24ce399c5d64dc83c3690", + "46598b397fbc4461bb83ad45000e5569", + "dd307f2b56824c6c91b8fe1c028a1704", + "b87577323cb44a2f9f3cc0a8219eadf4", + "0ed52815268a4d90a224029e9e23e09f", + "fafd3011a3cb4e0099a0db77caf843aa", + "89bb151875e349118677c9677bec4137", + "b8722d98e81d4e3494c9c0b8b01319d4", + "b8f661dd658e45a49b5bac2bdf7f3b78", + "f031fadfbba14031a944c351fd99b032", + "a6bf09a98289481abb0c9882702eb575", + "09d54c271e55463495cb9b617d2ec41d", + "32bdcc7f7efd47679258f398184dd0ab", + "0f090959768e40aa8fabfacdf772d8df", + "e40a9c85694c446d84d3493274138178", + "ceb1bbb613f0438aa6996d5551f713f3", + "59b59d4040e04b65a66f578160d13d43", + "079ab8a1fae64a0782ea8035f494f2fd", + "1e425099c1044c14986386e5a4ce0b48", + "1e3e028ec53448c691abb2cfad4ffd8c", + "d2142e9de5ea4dd2b8d00b56354352c3", + "cf1d7a58189b4a5bbe6d005de998548c", + "5b6e10b9e5a146be85e519c1bc476fcd", + "6d9feded70b84654ab79f9d13b065c83", + "ec5afa847d364fbc974b35d821ccd931", + "fa392b34ae8647668e94aca22c1a5edb", + "4d315c92d7c84191a165218d403c0f8d", + "c4279579368841b99a46f529c55125e9", + "572bb151402e4940a8d7c92156f9711b", + "d4536337378b4146b054371b18f83fdc", + "14ddae4a15d74c80b1712443853e3f96", + "46e827e5d7a94a619536ff08127b6172", + "ad7977cc642e4cae890e52d03d753788", + "73ba16b54d314d94aaeebea2ba291a94", + "1bc2120a87ab4ad99798b1706342bd89", + "4c4772ace8c246b9a5c8f870ed27c11b", + "d179e366c02f4bb2897cc9f531955e5e", + "1134fd00384740d0a39b6de241ca17cd", + "5a120718aa934959bd50cf4864b137b4", + "46917a8997f942fea0aaf00a95459f93", + "6076b184b66c4d50a91bc477c8eea53e", + "2538bc7fc4594363934266f25bcd52bc", + "f6144dfa2a20416b9e5c28615a5ff129", + "35941d364234488da6fcc0997a5cccf7", + "8c5e160cc4434ca99f694f5e195a2005", + "fa82a4c6e8fc4591aa5652d7d95c6e40", + "2a82034aebcb4e3fbaff825ca59817d0", + "edcabc56841a4ba68ee53385fe2dc0f6", + "e8bf66aa640e433d8d890ae541b21dcd", + "eb7b76b25ce44dc1b8eba7cac8bc9671", + "c9648aceda71470284f6ed7ce2add462", + "7df6ddb46ae3419fbc3fac488eb8a6b6", + "3b2aa7ca49e4451fbfc65560a2d3d43d", + "6af2831aa4e641568d72df6d13fc074a", + "732cb67ef916489298655df845773934", + "cc96573e39e148dbac8b0bd299f0f0b0", + "4e9cbcdc1cfb495a850be45cf752d3c4", + "b811921bdbe84b0dbd9add0f69271ef0", + "a1053fdea18348119949b326f3a12651", + "90c10be928c54821aacf11705c0513ff", + "a543366ec93c486bb2d28d1ff9567197", + "97ff48d2660444a1a7503e735e2b2a55", + "1fd84a85c98246adb2e18e41c8a9d88f", + "fed4a63b10ea4788af8cd181d8d24863", + "8bd5f65dda734db1a253897f85428d4c", + "2744cec152a44fd483f5cdd8f4de8c70", + "47780d4dab77454ab898f6707d8d4168", + "6653b71e07bc488ebbb4ed5728564ccd", + "59d44fcd08114cb4aaeea768b1438bcf", + "18cf08eb051d48c9a5c0b6b827507b7f", + "fee935f9fb354a67a37d42641ff0d81e", + "22ec450031234856a304ccee34d452f4", + "4944567015cc46be83a8524c0542722a", + "b80d78f92da64255991b4fcfde98b1d4", + "e239b0fe8301409f9dd7e5e801949ec2", + "6e7533e6b43c4f1dbb1e0421b99fdc47", + "4be15c8712e340b3b9d9a3bd1c7c7516", + "337d98c0886948929a48411422a81ff8", + "83d8d49cf93c4af7bb3e3cfa3234c6c6", + "a1fb4ca7292e4cdc85b522248fdddaf6", + "ce26e74cc006450ca4e44bee2d14d80c", + "c01c7b35b1914ae681550421c0035a8c", + "c7966f356f80422abb3dcb45dbc541db", + "a86bb39581e1430a8314a616951af75e", + "446c7ef56bcc437388d4a99859c1b9fb", + "5c16ffe6a5504f2585aa6bc3132ff2ee", + "9d466ef4939c43f2846f22a5a21e5cd0", + "bec4a9e185074743848c04c4aff12037", + "cff028485bae4d96b4f7a48b738f6b61", + "c02af33357e64469aeb01a7af5a9ab37", + "fd0ff0c9933d4238a373c286f8e1dd5d", + "c517e6db93f04398b9a3ccc86e090499", + "e75b68d16bc443e39974922342952de9", + "0536d41437f54df38624a7d290e45325", + "eec3717367b348a388bb76eb6482ce25", + "e16f1ef5ee06493fac2d5871806a3b3a", + "24f7575f0f47498480b2a2f79f0d4ce5", + "c17fe53a4a2b4266a3cbd24c9f145cde", + "b4d715f23ada4ee48fdfd9af463f7124", + "7d9102b6a7b44e14809ecf8fa421ee70", + "8d640aa311f34b33b0967e128c138130", + "3680065e53494bad98e74fd7c81185dd", + "11fb0bacbcc44352b3b25d9f0923c332", + "7319ccdb3e3349328d6f9b4bb5445776", + "35017cc6cb484eeaa12714532e872f99", + "ae22d146f6f24981bde97896ad3d8b14", + "3feeca46c382431c9868e4852ca04d49", + "0e3b239635704ab391f1801b762b7f93", + "90d35d2eda00413eba027093309f6c31", + "d61446d3664a455baaada9761a1715be", + "92b7d7f81ebc441d8e6d6e20477aa37c", + "8b5b230489104f6bba63720fa9fad0ae", + "0afc992c54ec4a10a7f9fd3e45fa7761", + "4fbbb9ba6f4e44d6b2ccc5197dad5488", + "18e5f93ef3b64301b7c1548d17843d64", + "ee997f8eeccc4dd98aea71b930531cf5", + "e11a6cfa4615457090d4c87815fdb716", + "4d74f0ec93f54e09a22c3cb93a042570", + "31cdf14402f34270bdc1b1efd2a0d011" + ] + }, + "id": "TAit9IzsQLcc", + "outputId": "176a77ad-0245-4a3d-b9f3-e139de359da7" + }, + "outputs": [], + "source": [ + "quant_config = BitsAndBytesConfig(\n", + " load_in_4bit=True,\n", + " bnb_4bit_use_double_quant=True,\n", + " bnb_4bit_compute_dtype=torch.bfloat16,\n", + " bnb_4bit_quant_type=\"nf4\"\n", + ")\n", + "\n", + "tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code=True)\n", + "tokenizer.pad_token = tokenizer.eos_token\n", + "tokenizer.padding_side = \"right\"\n", + "\n", + "base_model = AutoModelForCausalLM.from_pretrained(\n", + " BASE_MODEL,\n", + " quantization_config=quant_config,\n", + " device_map=\"auto\",\n", + ")\n", + "base_model.generation_config.pad_token_id = tokenizer.pad_token_id\n", + "\n", + "print(f\"Memory footprint: {base_model.get_memory_footprint() / 1e9:.1f} GB\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 🤖 Prediction" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "1FfMJ2JbzEr3" + }, + "outputs": [], + "source": [ + "def extract_price(s):\n", + " if \"Price is $\" in s:\n", + " contents = s.split(\"Price is $\")[1]\n", + " contents = contents.replace(',','').replace('$','')\n", + " match = re.search(r\"[-+]?\\d*\\.\\d+|\\d+\", contents)\n", + " return float(match.group()) if match else 0\n", + " return 0" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "CgN8eRttRAZx" + }, + "outputs": [], + "source": [ + "def model_predict(prompt):\n", + " set_seed(42)\n", + " inputs = tokenizer.encode(prompt, return_tensors=\"pt\").to(\"cuda\")\n", + " attention_mask = torch.ones(inputs.shape, device=\"cuda\")\n", + " outputs = base_model.generate(inputs, max_new_tokens=4, attention_mask=attention_mask, num_return_sequences=1)\n", + " response = tokenizer.decode(outputs[0])\n", + " return extract_price(response)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "hpdEk2-FW6aT", + "outputId": "f8913c56-1a8f-4a13-9084-21acfdb64ceb" + }, + "outputs": [], + "source": [ + "model_predict(test[0]['text']), test[0]['price']" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "auFzPUJKTLln" + }, + "source": [ + "## 🧪 Run Evaluation" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "jnoI1EWGTUau" + }, + "outputs": [], + "source": [ + "# Helper class for evaluating model predictions\n", + "\n", + "GREEN = \"\\033[92m\"\n", + "YELLOW = \"\\033[93m\"\n", + "RED = \"\\033[91m\"\n", + "RESET = \"\\033[0m\"\n", + "COLOR_MAP = {\"red\":RED, \"orange\": YELLOW, \"green\": GREEN}\n", + "\n", + "class Tester:\n", + "\n", + " def __init__(self, predictor, data, title=None, size=100):\n", + " self.predictor = predictor\n", + " self.data = data\n", + " self.title = title or predictor.__name__.replace(\"_\", \" \").title()\n", + " self.size = size\n", + " self.guesses = []\n", + " self.truths = []\n", + " self.errors = []\n", + " self.sles = []\n", + " self.colors = []\n", + "\n", + " def color_for(self, error, truth):\n", + " if error<40 or error/truth < 0.2:\n", + " return \"green\"\n", + " elif error<80 or error/truth < 0.4:\n", + " return \"orange\"\n", + " else:\n", + " return \"red\"\n", + "\n", + " def run_datapoint(self, i):\n", + " datapoint = self.data[i]\n", + " guess = self.predictor(datapoint[\"text\"])\n", + " truth = datapoint[\"price\"]\n", + " error = abs(guess - truth)\n", + " log_error = math.log(truth+1) - math.log(guess+1)\n", + " sle = log_error ** 2\n", + " color = self.color_for(error, truth)\n", + " # title = datapoint[\"text\"].split(\"\\n\\n\")[1][:20] + \"...\"\n", + " self.guesses.append(guess)\n", + " self.truths.append(truth)\n", + " self.errors.append(error)\n", + " self.sles.append(sle)\n", + " self.colors.append(color)\n", + " # print(f\"{COLOR_MAP[color]}{i+1}: Guess: ${guess:,.2f} Truth: ${truth:,.2f} Error: ${error:,.2f} SLE: {sle:,.2f} Item: {title}{RESET}\")\n", + "\n", + " def chart(self, title):\n", + " # max_error = max(self.errors)\n", + " plt.figure(figsize=(12, 8))\n", + " max_val = max(max(self.truths), max(self.guesses))\n", + " plt.plot([0, max_val], [0, max_val], color='deepskyblue', lw=2, alpha=0.6)\n", + " plt.scatter(self.truths, self.guesses, s=3, c=self.colors)\n", + " plt.xlabel('Ground Truth')\n", + " plt.ylabel('Model Estimate')\n", + " plt.xlim(0, max_val)\n", + " plt.ylim(0, max_val)\n", + " plt.title(title)\n", + "\n", + " # Add color legend\n", + " from matplotlib.lines import Line2D\n", + " legend_elements = [\n", + " Line2D([0], [0], marker='o', color='w', label='Accurate (green)', markerfacecolor='green', markersize=8),\n", + " Line2D([0], [0], marker='o', color='w', label='Medium error (orange)', markerfacecolor='orange', markersize=8),\n", + " Line2D([0], [0], marker='o', color='w', label='High error (red)', markerfacecolor='red', markersize=8)\n", + " ]\n", + " plt.legend(handles=legend_elements, loc='upper right')\n", + "\n", + " plt.show()\n", + "\n", + "\n", + " def report(self):\n", + " average_error = sum(self.errors) / self.size\n", + " rmsle = math.sqrt(sum(self.sles) / self.size)\n", + " hits = sum(1 for color in self.colors if color==\"green\")\n", + " title = f\"{self.title} Error=${average_error:,.2f} RMSLE={rmsle:,.2f} Hits={hits/self.size*100:.1f}%\"\n", + " self.chart(title)\n", + "\n", + " def run(self):\n", + " self.error = 0\n", + " for i in range(self.size):\n", + " self.run_datapoint(i)\n", + " self.report()\n", + "\n", + " @classmethod\n", + " def test(cls, function, data):\n", + " cls(function, data).run()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 692 + }, + "id": "1wA5uVgpTWLC", + "outputId": "5a597437-50c8-419c-c1da-af0166dabe0f" + }, + "outputs": [], + "source": [ + "Tester.test(model_predict, test)" + ] + }, + { + "attachments": { + "image.png": { + "image/png": "" + } + }, + "cell_type": "markdown", + "metadata": { + "id": "lSfPbebIq2Ml" + }, + "source": [ + "![image.png](attachment:image.png)\n", + "\n", + "Alright — now that we know where things stand, it’s time to shake things up.\n", + "\n", + "Can QLoRA fine-tuning unlock the true power of LLaMA 3.1?\n", + "\n", + "👀 Let’s find out... in the [next notebook](https://github.com/lisekarimi/lexo/blob/main/09_part6_ft_llama_qlora.ipynb)" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "gpuType": "T4", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.11" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/week7/community_contributions/lisekarimi/09_part6_ft_llama_qlora.ipynb b/week7/community_contributions/lisekarimi/09_part6_ft_llama_qlora.ipynb new file mode 100644 index 0000000..af4b5e7 --- /dev/null +++ b/week7/community_contributions/lisekarimi/09_part6_ft_llama_qlora.ipynb @@ -0,0 +1,907 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 🔍 Predicting Item Prices from Descriptions (Part 6)\n", + "---\n", + "- Data Curation & Preprocessing\n", + "- Model Benchmarking – Traditional ML vs LLMs\n", + "- E5 Embeddings & RAG\n", + "- Fine-Tuning GPT-4o Mini\n", + "- Evaluating LLaMA 3.1 8B Quantized\n", + "- ➡️ Fine-Tuning LLaMA 3.1 with QLoRA\n", + "- Evaluating Fine-Tuned LLaMA\n", + "- Summary & Leaderboard\n", + "\n", + "---\n", + "\n", + "# ⚙️ Part 6: Fine-Tuning LLaMA 3.1 with QLoRA\n", + "\n", + "- 🧑‍💻 Skill Level: Advanced\n", + "- ⚙️ Hardware: ⚠️ GPU required - use Google Colab (A100)\n", + "- 🛠️ Requirements: 🔑 HF Token, wandb API Key ([Weights & Biases](https://wandb.ai))\n", + "- Tasks:\n", + " - Load and split dataset (Train/validation); set up [Weights & Biases](https://wandb.ai) logging\n", + " - Load quantized LLaMA 3.1 8B and tokenizer\n", + " - Prepare data with a collator for fine-tuning\n", + " - Configure QLoRA (LoRAConfig), training settings (SFTConfig), and tune key hyperparameters\n", + " - Fine-tune and push best model to Hugging Face Hub\n", + "\n", + "⚠️ I attempted to fine-tune the model on the full 400K dataset using an A100 on Google Colab, but it consistently crashed. So for now, I’m training on a 20K subset to understand the process, play with hyperparameters, track progress in Weights & Biases, and push the best checkpoint to the Hub.\n", + "\n", + "⏱️ Training on 20,000 examples took over 2 hours.\n", + "\n", + "The full model fine-tuned on the complete 400K dataset is available thanks to our instructor, [Ed](https://www.linkedin.com/in/eddonner) — much appreciated! \n", + "We’ll dive into that model in the next notebook — **stay tuned** 😉\n", + "\n", + "---\n", + "📢 Find more LLM notebooks on my [GitHub repository](https://github.com/lisekarimi/lexo)" + ], + "outputs": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "MDyR63OTNUJ6", + "outputId": "525372ce-f614-44f1-b894-80e289958197" + }, + "outputs": [], + "source": [ + "# Install required packages in Google Colab\n", + "%pip install -q datasets transformers torch peft bitsandbytes trl accelerate wandb" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "-yikV8pRBer9" + }, + "outputs": [], + "source": [ + "# imports\n", + "\n", + "import os\n", + "import torch\n", + "import wandb\n", + "from google.colab import userdata\n", + "from datetime import datetime\n", + "from datasets import load_dataset\n", + "from huggingface_hub import login\n", + "from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, EarlyStoppingCallback\n", + "from peft import LoraConfig\n", + "from trl import SFTTrainer, SFTConfig, DataCollatorForCompletionOnlyLM" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Google Colab User Data\n", + "# Ensure you have set the following in your Google Colab environment:\n", + "hf_token = userdata.get('HF_TOKEN')\n", + "login(hf_token, add_to_git_credential=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "B48QsPsvUs_x" + }, + "source": [ + "## 🔀 Load Dataset from HF and Split into Train/Validation" + ], + "outputs": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# #If you face NotImplementedError: Loading a dataset cached in a LocalFileSystem is not supported run:\n", + "# %pip install -U datasets (for Google Colab)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 177, + "referenced_widgets": [ + "6f1f8dca2a334818a36fae380818001e", + "6d3be1ece4a949d3b8d3736db02bcb5c", + "c8c6bbacfe254c539f4acda8cdd5c04d", + "db87c136ff15430892aa75fa47521b0c", + "1d56af1140034021b2aecc5df846e499", + "6238783102084e0c99626bf948ff5bb6", + "f523b67e652049f7b13131d2750325bb", + "f03cc2cf18c140c8b4a076ab99ac86e3", + "472bb957b0e149df8ef0c26c3a3ffc19", + "86dfcc161f2d41a7a33041848766d091", + "6a7ed9e79ebb4f9c9962d08c78b424ca", + "efc4817d5f734852a844640ebe7eceed", + "0b473a8e944c4b028f51f53f62b72deb", + "1fd89859568440f58f3ab56f32183dd4", + "2e4bd8853acc4faa92e461210df2c689", + "3fb588f271db4b7abb9a3631582cc7d6", + "8f9c00ca63ca47e9873ec2a743fa1512", + "afdae504b36845b9a98874cced112721", + "8afd0ddfdeca43b59207a8b35a35e13c", + "0be7a6fdb206420d88b2b2e45a37432c", + "00f0983c1d204862b589011100297ffe", + "8c7de85bcec742ec85f1e8b854351056", + "5847c75b6dd74bc1b13116d91431ccf2", + "bcb0ad86493f45848895c02c0b9deaf6", + "18d70754531248b1ab22e1fd0df061ae", + "028d806f909f42e2b6a7ec630f6e3cb5", + "ff00d3192c734b398f779c7fffde57c8", + "55388dcb89f84c7ebe7f5f7051f2d98b", + "d3cab2b162a740fb82f78f030ea32b45", + "cea0149336be4c92952bacb8aa820926", + "6b560f8a028c4ba39896fd97f48f18ad", + "2a3ed922dab44648b6d6ed63e21c549d", + "885e1f4b9c3d45d5acd8d0a368ca557d", + "73e42dca7c4b455f8be4b34236e6ced2", + "c36aec28025e4baab8a3c4a293297f15", + "7569e26e1e2b46e4a7018e1bd2bc92d5", + "9f5795d223e74f1e8e49709ec1e4ddf1", + "5638ccb893164fc79980eb48d06909f9", + "70a528a0a08e4931b845ecc0992e07d6", + "669bbecd55804849bff5a850438d905d", + "245de1eaef2840b69e6c82afee68b4dc", + "ad57405b8f474c0aa92833f83dde70e8", + "cb3391329a7f4d0b93f5efffb9b0dcfe", + "cb0007dffa284be8aff41efacdfc31cb", + "c7de048747a24f9a9ce85396b87b8250", + "066b3f278ec24b299504cea66b3c3e63", + "0e1069c5bf644531902c51283a6d68e1", + "06bd7477f9fe45d0ad4138fc21bd29dc", + "adb68e7a8bea4b77b960e412c67a6286", + "39ec099d38f04f4e8ea334d0c5335e2f", + "044bf34d53024427801e24fbca808dc1", + "e3d2839112ff4b7f9ab5bc04900ff522", + "f620e7774fa04ed0a88d2f78d2243906", + "7a12c0d7b32b445f978809c9aee2c62d", + "5a230441445746d59ea8a10a4d5bb467" + ] + }, + "id": "XEE1FrSIh-EF", + "outputId": "8cd19745-2f6f-41e0-96dd-5a2f72ac3a63" + }, + "outputs": [], + "source": [ + "HF_USER = \"lisekarimi\" # your HF name here!\n", + "\n", + "DATASET_NAME = f\"{HF_USER}/pricer-data\"\n", + "dataset = load_dataset(DATASET_NAME)\n", + "train = dataset['train']\n", + "test = dataset['test']\n", + "split_ratio = 0.1 # 10% for validation\n", + "\n", + "##############################################################################\n", + "# Optional: limit training dataset to TRAIN_SIZE for testing/debugging\n", + "# Comment the two lines below to use the full dataset\n", + "TRAIN_SIZE = 20000\n", + "train = train.select(range(TRAIN_SIZE))\n", + "##############################################################################\n", + "\n", + "total_size = len(train)\n", + "val_size = int(total_size * split_ratio)\n", + "\n", + "val_data = train.select(range(val_size))\n", + "train_data = train.select(range(val_size, total_size))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "lUPNqb2Bse21", + "outputId": "a3d09c8f-ce5a-46b0-e1b0-b4471a659f69" + }, + "outputs": [], + "source": [ + "print(f\"Train data size : {len(train_data)}\")\n", + "print(f\"Validation data size: {len(val_data)}\")\n", + "print(f\"Test data size : {len(test)}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "wixbM-VeVfsR" + }, + "source": [ + "## 🛠️ Hugging Face Configuration" + ], + "outputs": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 35 + }, + "id": "OixVUG06VmZk", + "outputId": "3cb523e0-fd03-4a18-913b-c22fa90e3bdd" + }, + "outputs": [], + "source": [ + "PROJECT_NAME = \"llama3-pricer\"\n", + "\n", + "# Run name for saving the model in the hub\n", + "\n", + "RUN_NAME = f\"{datetime.now():%Y-%m-%d_%H.%M.%S}-size{total_size}\"\n", + "PROJECT_RUN_NAME = f\"{PROJECT_NAME}-{RUN_NAME}\"\n", + "HUB_MODEL_NAME = f\"{HF_USER}/{PROJECT_RUN_NAME}\"\n", + "HUB_MODEL_NAME" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "1-t1nGgnVTU4" + }, + "source": [ + "## 🛠️ wandb Configuration" + ], + "outputs": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Load from Colab's secure storage\n", + "wandb_api_key = userdata.get('WANDB_API_KEY')\n", + "\n", + "# Load from environment variables (.env file) if running Locally (GPU setup)\n", + "# wandb_api_key = os.getenv('WANDB_API_KEY')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "os.environ[\"WANDB_API_KEY\"] = wandb_api_key\n", + "wandb.login()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 156 + }, + "id": "yJNOv3cVvJ68", + "outputId": "0c03623e-6887-49e3-8989-bbe45dfc5d35" + }, + "outputs": [], + "source": [ + "# Configure Weights & Biases to record against our project\n", + "\n", + "LOG_TO_WANDB = True\n", + "\n", + "os.environ[\"WANDB_PROJECT\"] = PROJECT_NAME\n", + "os.environ[\"WANDB_LOG_MODEL\"] = \"checkpoint\" if LOG_TO_WANDB else \"end\"\n", + "os.environ[\"WANDB_WATCH\"] = \"gradients\"\n", + "\n", + "if LOG_TO_WANDB:\n", + " wandb.init(project=PROJECT_NAME, name=RUN_NAME)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "qJWQ0a3wZ0Bw" + }, + "source": [ + "## 📥 Load the Tokenizer and Model" + ], + "outputs": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 418, + "referenced_widgets": [ + "1b88f6d4010f4451a58abe2c46b74f62", + "139758ba39964f49b65eb67182eef68e", + "9c138d12dcb644fe9b72bd9eb5d26637", + "3bf8626162904a15932480ddbcea0ebd", + "a919a41b53604ccd91331d3f713e1310", + "5b8cdfe01f9a4c248e3de30442411ad4", + "e14d38a4c3e04d68ac30d475b0db1a73", + "dadfd3c2a521420890092be265c0aa50", + "761e88b179104dbbb6455ba81bd1f833", + "11f5b4df0c7344ba9e188f4eca82886f", + "125aa3f0dbd744eb82f8e4de94199736", + "6ca21586e6fc4a608adedba7889eadb5", + "023eb92e8a2b4323bfd12582e3c23962", + "c7c76b9845174e9687107595df27c050", + "78d4a28e03db4775b6e8e071c0b02d5d", + "8483c625762c49679877a37ab0ddcef9", + "1df5f6fe2fc04e60bfcb1f78689824ba", + "add10c416e334928af303d51dfd745c6", + "5e9e9dac85014292b94d347cc4bad3fe", + "d665aa6480624ab697f4e426b51d59de", + "03cce0d3f3a443fc808915b101576e4b", + "f15714023f234c39863b34d1a3721a8e", + "8f7a48d803eb4d2182c9da07af743ac7", + "74892e7b343d410bbbef60c64a823a9a", + "d6a70560831144e39dc9762d397f4c90", + "9b969f7fbcdc491cab71aac42761cd2a", + "d31f9443d1c646309c7a5e1ec39ffc0e", + "0f5a81846ab143bebf6ec422cda3f145", + "f0b05f3f7f37414c9d09470c94e304d7", + "d18784692c9c4ca99e277e6ed51e2bf1", + "f58addfac7c3438a90ebf10c88348d56", + "451deac2eeec45598590579340be0d4b", + "848e0651caf34ef288cca451e3d11274", + "5adf041222f843429c3a9f1b99becfed", + "a4764f36570b4752a1ec4392d2f0146c", + "511a4c6a898346acac9d98fd3a7cdf2c", + "26da7435a2614201a9e5b8087749f0e0", + "6054fa015ae44659beb7473c084c7b5b", + "3b9fc447a9ae4506a1edaf0fa449d9d5", + "6acef8f1820545ef90b22d90ac80427d", + "2a5cbad0b8fd45dc9ee25715b1015aef", + "86a9428f39be4d65a1e922bd9afb3800", + "96d919a1a7f14e91b8e6c91d855e36d5", + "82d7484aa2774015b7ea18d933afa9b6", + "b9d2d4f2c44a4d7cad2b3803c7f6e7be", + "9f3a176a6ae6426a8c1567a835da8680", + "006763d2301f4205a588adf5c19876a0", + "b44eb6596c3441bbaab288030f953a04", + "bf91666a0c054c79acb03d2e1bb38c37", + "f0185f1b4b23445c920a873eb63a9372", + "8e1ac15b677d4c21ad42ea1dda68fe05", + "87746d8d6d3d413ebb46b4e12fb74cc8", + "bb5ea1e92c434a46838f943648de87bd", + "1abcfcba332b40eb901d1331ed84f9bd", + "52fa5fcc629742619fa3105f73d90767", + "1bcc2d5771034c2dbc372031e83a2384", + "221cfaa2a5db4cf1ac399363c3589025", + "793f9bdc92a545519dd3279023e4ab50", + "55e25f5cc12f44f3a39fae501fccd060", + "59463b5e6286483394dedb602991ac95", + "fc95344ea44d40f28702360542afcff7", + "ffb3af537d6c41548ad88027505b04d6", + "6afcf0f6131d4dddbeda796e9c0c5bc5", + "93f65b3bc071453f86fe8f0f6c17d8fd", + "2ac9926ee4644232b43d84cfa95c584d", + "0c5a7738132b4f0f8b4810333b37c588", + "99d41ffa37134be9a57fe5e50a59b67d", + "50e71304ab4f42c29f1994fed9b595b8", + "76b4b0d63e524eb783429169a25be74e", + "441cfadbe4b446f4b61391b7be4d2865", + "6751f0c35b634d7c9b06c4e41f9ff851", + "6a5dc276bbf64bf9b5a99751068ee228", + "b3ac6055014642a285435f877d5651f5", + "e9137600b29c4ecaad4ef8bca5fd5f91", + "634afb9c1b8c4e29b3ec7b76a1108ae4", + "6be0ac91035548fbbe778e3d7fd58e7e", + "e8e9d5c979ac4afba526e38b6d0851be", + "a4ae8ca9c0e7478fbad3b9ed67bc21a2", + "faf3a64e316a43ddbac8ba14573c4eb4", + "a395885e39434f9f98246d0fb1c94c8f", + "d13552c90ead4804a4d5a21121f25536", + "c25b94002c2246a9aa7f6ed1e4a22cfa", + "e3892cf602cb4a49948f26cae1e7644c", + "bc290a324a7147c5b6a722acb41ed05a", + "2b556f5aa6324958ac6fe36bddf17909", + "67c6a0534b3a4345b9c11af1bffdfbf0", + "d767921bb23c485396282cb79a4d1836", + "d598468ad8f94146976f70d873f0b56d", + "b547888cd5494b21911b7d457ab6fbac", + "28362e43274848109c2624e5668942b0", + "7a27fc65bc0b44ce9bd959f4be13514d", + "73bc97e6d9cc4ccd8d134092ce970026", + "c042bf08ab23410098e6d16e837d19ce", + "d2930ad2c08748d0883bb77c68acf940", + "c2a1291730874e8e94232c0d51575f81", + "cb92871b11a0410eb295cc323e5872a7", + "150a5ce5d8124b0eb9e44d8715b8b1ab", + "7a6f05ad1f2e483dbcdca102c66530b0", + "626a29aee42e4e6d8c18d8ea5889734a", + "c549ca0548d04a7d8749a0842c4aa62b", + "958c0ff0f47f4c0fa4e2085f5243d84f", + "a8171febcac94a4b902ff737592f3f47", + "22630cdb7d6f4975bc31cc189987573d", + "2f8a9ccee6ea4cdd8c8c225575cae0ce", + "e40f81c5c4334accbca947964146d238", + "d6849da8e89546469188dc047c66ea25", + "8a67d8a2ac0a4fd7a41aa5c890049525", + "5bf18445be0e46e087cbcd377ccfffbe", + "72b2020c9479471681ce0f42898cfe1c", + "c114fd62eb4b4fdca94654668c8f2374", + "401580df26fc40abb2b774c3d9684921", + "e756b825b211476994a69fb65f4bbf7c", + "b2c26cf10e5a4d4fa8961f5c9cca18ce", + "c288256c73dd44d08916db4e9cf989f0", + "250a72e9650845d2b274bc3c157439f8", + "94281c7e5be049c1a9f3dfa082805133", + "f004f9f743ae4229aa90c92abba6ded6", + "bd8ca5b8aaed4809a93f553d5cb4a887", + "4cec4c2d73de4d52b2143082645536ac", + "893b96616a0e47bfaa0434e10eca1341", + "74e7d88dd4894894ac2c16fdfd29233b", + "9e1f1e4288df407fa03415664dc361d5", + "81dc3f390b9a49f8b1be5c43580b070d", + "917a225a9bb74f8ab034dcdcee3c7247", + "bc6c698857ce4f8eabc1571ba0ff0edf", + "e9ae1c247ae5409f9da4db84ce71a6e3", + "55071660223e4022a6a7836572077c0c", + "8364e661011743af9fd40dabc5a7dfe4", + "ac65442e0d5e43e2998d7c700573228a", + "666f3434ae8a495f8ada8fedb50b7051", + "1977e9f07f104faead7dfcfa8aaed6f2", + "ebe2257c07f345fea72f162542a45142" + ] + }, + "id": "R_O04fKxMMT-", + "outputId": "29aa1cf7-2a2e-492e-adc9-cd0a5bfb123e" + }, + "outputs": [], + "source": [ + "BASE_MODEL = \"meta-llama/Meta-Llama-3.1-8B\"\n", + "\n", + "quant_config = BitsAndBytesConfig(\n", + " load_in_4bit=True, # Reduce the precision to 4 bits\n", + " bnb_4bit_use_double_quant=True,\n", + " bnb_4bit_compute_dtype=torch.bfloat16,\n", + " bnb_4bit_quant_type=\"nf4\"\n", + ")\n", + "\n", + "tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code=True)\n", + "tokenizer.pad_token = tokenizer.eos_token\n", + "tokenizer.padding_side = \"right\"\n", + "\n", + "base_model = AutoModelForCausalLM.from_pretrained(\n", + " BASE_MODEL,\n", + " quantization_config=quant_config,\n", + " device_map=\"auto\",\n", + ")\n", + "base_model.generation_config.pad_token_id = tokenizer.pad_token_id\n", + "\n", + "print(f\"Memory footprint: {base_model.get_memory_footprint() / 1e6:.1f} MB\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "SrCE2Le7RBRj" + }, + "source": [ + "## ⚙️ Fine-tune our LLaMA 3 8B (4-bit quantized) model with QLoRA\n", + "- 1. Prepare the Data with a Data Collator\n", + "- 2. Define the QLoRA Configuration (LoraConfig)\n", + "- 3. Set the Training Parameters (SFTConfig)\n", + "- 4. Initialize the Fine-Tuning Trainer (SFTTrainer)\n", + "- 5. Run Fine-Tuning and Push to Hub" + ], + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "9BYO0If4uWys" + }, + "source": [ + "### 🔄 1. Prepare the Data with a Data Collator\n", + "\n", + "We only want the model to learn the price, not the product description. Everything before \"Price is $\" is context, not training target. HuggingFace’s DataCollatorForCompletionOnlyLM handles this masking automatically:\n", + "\n", + "1. Tokenizes the response_template (\"Price is $\")\n", + "2. Finds its token position in each input\n", + "3. Masks all tokens before it (context)\n", + "4. Trains the model only on tokens after it (the price)\n", + "\n", + "\n", + "Example:\n", + "\n", + "Input: \"Product: Red T-shirt. Price is $12.99\"\n", + "\n", + "Masked: \"Product: Red T-shirt. Price is $\" → masked (no loss)\n", + "\n", + "\"12.99\" → not masked (model is trained to predict this)\n", + "\n", + "So the model learns to generate 12.99 given the context, but isn’t trained to repeat or memorize the description." + ], + "outputs": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "2omVEaPIVJZa" + }, + "outputs": [], + "source": [ + "response_template = \"Price is $\"\n", + "collator = DataCollatorForCompletionOnlyLM(response_template, tokenizer=tokenizer)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "4DaOeBhyy9eS" + }, + "source": [ + "### 🧠 2. Define the QLoRA Configuration (LoraConfig)" + ], + "outputs": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "0HKuVS_XR3cw" + }, + "outputs": [], + "source": [ + "LORA_R = 32\n", + "LORA_ALPHA = 64\n", + "TARGET_MODULES = [\"q_proj\", \"v_proj\", \"k_proj\", \"o_proj\"]\n", + "LORA_DROPOUT = 0.1\n", + "\n", + "lora_parameters = LoraConfig(\n", + " r=LORA_R,\n", + " lora_alpha=LORA_ALPHA,\n", + " target_modules=TARGET_MODULES,\n", + " lora_dropout=LORA_DROPOUT,\n", + " bias=\"none\",\n", + " task_type=\"CAUSAL_LM\", # Specifies we're doing causal language modeling\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "uLfFsfNQSBAm" + }, + "source": [ + "### ⚙️ 3. Set the Training Parameters (SFTConfig)" + ], + "outputs": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "7PKXdhPXSJot" + }, + "outputs": [], + "source": [ + "# 📦 Training Setup:\n", + "EPOCHS = 1\n", + "BATCH_SIZE = 16 # A100 GPU can go up to 16\n", + "GRADIENT_ACCUMULATION_STEPS = 2\n", + "MAX_SEQUENCE_LENGTH = 182 # Max token length per input\n", + "\n", + "# ⚙️ Optimization:\n", + "LEARNING_RATE = 1e-4\n", + "LR_SCHEDULER_TYPE = 'cosine'\n", + "WARMUP_RATIO = 0.03\n", + "OPTIMIZER = \"paged_adamw_32bit\"\n", + "\n", + "# 💾 Checkpointing & Logging:\n", + "SAVE_STEPS = 200 # Checkpoint\n", + "STEPS = 20 # Log every 20 steps\n", + "save_total_limit = 10 # Keep latest 10 only\n", + "\n", + "\n", + "LOG_TO_WANDB = True\n", + "\n", + "HUB_MODEL_NAME = f\"{HF_USER}/{PROJECT_RUN_NAME}\"\n", + "\n", + "train_parameters = SFTConfig(\n", + " # Output & Run\n", + " output_dir=PROJECT_RUN_NAME,\n", + " run_name=RUN_NAME,\n", + " dataset_text_field=\"text\",\n", + " max_seq_length=MAX_SEQUENCE_LENGTH,\n", + "\n", + " # Training\n", + " num_train_epochs=EPOCHS,\n", + " per_device_train_batch_size=BATCH_SIZE,\n", + " gradient_accumulation_steps=GRADIENT_ACCUMULATION_STEPS,\n", + " max_steps=-1,\n", + " group_by_length=True,\n", + "\n", + " # Evaluation\n", + " eval_strategy=\"steps\",\n", + " eval_steps=STEPS,\n", + " per_device_eval_batch_size=1,\n", + "\n", + " # Optimization\n", + " learning_rate=LEARNING_RATE,\n", + " lr_scheduler_type=LR_SCHEDULER_TYPE,\n", + " warmup_ratio=WARMUP_RATIO,\n", + " optim=OPTIMIZER,\n", + " weight_decay=0.001,\n", + " max_grad_norm=0.3,\n", + "\n", + " # Precision\n", + " fp16=False,\n", + " bf16=True,\n", + "\n", + " # Logging & Saving\n", + " logging_steps=STEPS, # See loss after each {STEP} batches\n", + " save_strategy=\"steps\",\n", + " save_steps=SAVE_STEPS, # Model Checkpointed locally\n", + " save_total_limit=save_total_limit,\n", + " report_to=\"wandb\" if LOG_TO_WANDB else None,\n", + "\n", + " # Hub\n", + " push_to_hub=True,\n", + " hub_strategy=\"end\", # Only push once, at the end\n", + " load_best_model_at_end=True, # Loads the best eval_loss checkpoint\n", + " metric_for_best_model=\"eval_loss\", # Monitors eval_loss\n", + " greater_is_better=False, # Lower eval_loss = better model\n", + ")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "1q-a3LHDSoxQ" + }, + "source": [ + "### 🧩 4. Initialize the Fine-Tuning Trainer (SFTTrainer)\n", + "Combining everything" + ], + "outputs": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 290, + "referenced_widgets": [ + "6753caf741414a4c8fa309978253c8cd", + "aeade430d57b4338910ad0c3645fd06a", + "eb7081b71cc14aff9b99dba8f9368def", + "8eb16171df804d06a02351f74bb28dc4", + "9d60a205ebda49ca88220cc4eec716ca", + "d8ff973b90374423b4b5e17a1937111c", + "4bf3bf107f2c4e28a58387c96916e97f", + "d66cb8c1829c439095f4691fa32d7b6e", + "567c8321685045c5a873b3b1edecdc96", + "96ff596facb94acab611201b4adac13f", + "de65507ce09a4ef4ad8f28d46d335acc", + "e40fe92fe9094a58b53f0eeb97d3d629", + "592615cc81624de5a9934f5671d6c188", + "fadf75d91df54f49acef3f178ea53ce3", + "5ccca8ab6cb94a88bb27bd482f7948a9", + "d74dcc2ef9b8442d9ae99db2a79e0c48", + "580ebfa370d34426933e8c7389872e2b", + "1187f05dc99641e9a68d9cf49216c370", + "7deffbba68ba4f018374bd6bec62dd18", + "d24cdc40a6a34d6eb0efbfde17505d6f", + "31d44a308b4b4557934ec887e0b6a817", + "76112ce6fdc4496dba783451efa28cfd", + "15a85e4a77484c9392b2e5cb8767b336", + "4524d775b9034a1f890673a9c005d123", + "5ab6a6b427f84ec685ac52f6ff0d63b5", + "427ee9e90a844313989f623aba124498", + "6d2b7c059e6b42afa955fe01bf38011d", + "5d821ed8ffe14927be799c4d31043a82", + "12f9fab59e9849dcb7b3b17c5674580f", + "dd4a2876db37476fa438e8758c855393", + "f115f97428764c53ac780131fd75bd17", + "1a1e0e562a844ed098e97ce8a62695ee", + "0a7ae7cc902243a5996f730f0fe05cdb", + "07205ea24c3f4959bf9ebd393f5c921d", + "723bb8342ac84eedabd91e3eef178967", + "28714d0cf3d84a48975c8ad31e29691d", + "dd1d90d76d914839a1dad1cddab2c09f", + "e2d55edf98784523bcbeaad0cc2be494", + "d00ecfa9dc44428b989ec1a9deb27eae", + "ba2717985bc342e9827f8901ef655b00", + "6669dc8f20e3461f93c95cef7a90b201", + "29cb36c1943c4e1b9898534aaf32bd37", + "14a1449c13a14afda16bc7c05b7fd840", + "259d315eb4584c699b1c738d411eab7e", + "a4bb13eb7cee4f87b0e3e1a3a1be18e7", + "14d8a699a92044cda33802d96aaa41a2", + "d345350fd5ad4a028fbbc45cfc9f6db3", + "6953210353f840d59457fc54f4f8b829", + "d6cd9e1196f04ecbba83dc0b446b2c65", + "9e380ef863204da5863c9b6e7a2c8340", + "1d1bb803831d46309619f6a0c51c2eeb", + "6a50aaf7ad304a5aa3f29113121e8fe0", + "7a573a39c2b245f5a84626d951584f67", + "a57e66367d4245f6bcd4ad0463535583", + "d6f3327d39a34ec5a44d976f239a61ce", + "8f450df9f161409a8102c1f0b63edad8", + "95d932d12cb8442da17adb8e9782c40c", + "41c5f295b45f4828a9327b699b85ca01", + "9e4f3fd6bf7749f88ccd7ba65dd9446f", + "a8f8cb0d9fb14f30a537977f3d51a2c4", + "4e9e4ed0f2db4d7ba5a5bb0d00676a0c", + "1fe2bab9c9aa4de48e6e2512f9a7d0a1", + "d93ac5affccf404fa3916e7f3dd62943", + "92346fc65f48493d80198ac6d7adf4d8", + "647bfb2a24cc44a0adaf69ced8e99213", + "5c96424cff314aa484e4bc905bcbd761", + "cec2fcfb30194d5ab8c0a3868bad3598", + "35df7031c4964cef9c53bba6eabbe91d", + "e15c772e14264c9889e6dae34015e04b", + "e85b65cb497c48c2b844ae3e5d9efc60", + "52c8495d46ca4a3c8c6694a700d05e95", + "3db6d8a5ce2a40daaae6714807a27997", + "051d74df7ef1468aa968cac5792e7b00", + "75838a7c887545ff9fbbf5887a1336bc", + "59f698c1829148ac90edda008d5c6f69", + "35921436c69643aab792bd1333c749ef", + "2dd51cc6033746e1a8def460e5e51ff5", + "a8a3e5973ee5441087d10dfb17bfa1d6", + "64c3b3c02e844df6bfd3acf1ee23d765", + "83016eccdd7f4dedab9d3ea6e6852977", + "9d4c5a62214f4649b77365349ae4ac88", + "07cb9756d1814a7ba7fb49cccb2763cb", + "492454ad524742bd8bb3f5c3d5b37feb", + "e98053f6b7f045da812088d1e76d3a31", + "f2aeb3ae99cc4b7ca97fb959df1150ad", + "f92e18b6ab0147b1b428724f5155ca61", + "14356b2447e349ee8478478eb231fa81", + "f244a7e331d941f5a99712dcbc5550ea" + ] + }, + "id": "fCwmDmkSATvj", + "outputId": "2b4adc75-e0db-4e0b-c90b-9f9ff2dfd3c6" + }, + "outputs": [], + "source": [ + "# The latest version of trl is showing a warning about labels - please ignore this warning\n", + "fine_tuning = SFTTrainer(\n", + " model=base_model,\n", + " train_dataset=train_data,\n", + " eval_dataset=val_data,\n", + " peft_config=lora_parameters, # QLoRA config\n", + " args=train_parameters, # SFTConfig\n", + " data_collator=collator,\n", + " callbacks=[EarlyStoppingCallback(early_stopping_patience=5)] # Early stop if no val improvement for 5 steps\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "vHz6JA5_XJ07" + }, + "source": [ + "### 🚀 5. Run Fine-Tuning and Push to Hub" + ], + "outputs": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "id": "GfvAxnXPvB7w", + "outputId": "d351d89a-b3d7-4e2b-fee2-5ba2e929837e" + }, + "outputs": [], + "source": [ + "fine_tuning.train()\n", + "print(f\"✅ Best model pushed to HF Hub: {HUB_MODEL_NAME}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![](https://github.com/lisek75/nlp_llms_notebook/blob/main/assets/09_train_eval_loss_steps.png?raw=true)\n", + "\n", + "![](https://github.com/lisek75/nlp_llms_notebook/blob/main/assets/09_train_eval_loss_wandb.png?raw=true)\n", + "\n", + "This chart shows training loss vs evaluation loss over steps during fine-tuning of Llama 31 8B 4-Bit FT (20K Samples).\n", + "\n", + "- Blue line (train/loss): Decreasing overall, with some noise. Final value: 1.8596.\n", + "- Orange line (eval/loss): Smoother and consistently lower than training loss. Final value: 1.8103.\n", + "\n", + "- No overfitting: Eval loss < train loss throughout — a good sign.\n", + "- Stable convergence: Both curves flatten around step 500, suggesting the model is reaching training stability.\n", + "- Final eval loss is low, indicating decent generalization to unseen data.\n", + "\n", + "This fine-tuning run looks healthy. We can likely push further with more data - 400K run." + ], + "outputs": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 938 + }, + "id": "32vvrYRVAUNg", + "outputId": "bb4ab0f6-c390-48f3-a71c-2d259bb0ec0b" + }, + "outputs": [], + "source": [ + "if LOG_TO_WANDB:\n", + " wandb.finish()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![](https://github.com/lisek75/nlp_llms_notebook/blob/main/assets/09_run_summary_qlora_llama.png?raw=true)" + ], + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "IyKZ0r38IfT3" + }, + "source": [ + "Now that our best model is pushed to Hugging Face, let’s put it to the test.\n", + "\n", + "🔜 See you in the [next notebook](https://github.com/lisekarimi/lexo/blob/main/09_part7_eval_llama_qlora.ipynb)" + ], + "outputs": [] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "gpuType": "A100", + "provenance": [] + }, + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.7" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} \ No newline at end of file diff --git a/week7/community_contributions/lisekarimi/09_part7_eval_llama_qlora.ipynb b/week7/community_contributions/lisekarimi/09_part7_eval_llama_qlora.ipynb new file mode 100644 index 0000000..bfe78d1 --- /dev/null +++ b/week7/community_contributions/lisekarimi/09_part7_eval_llama_qlora.ipynb @@ -0,0 +1,739 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "GHsssBgWM_l0" + }, + "source": [ + "# 🔍 Predicting Item Prices from Descriptions (Part 7)\n", + "---\n", + "- Data Curation & Preprocessing\n", + "- Model Benchmarking – Traditional ML vs LLMs\n", + "- E5 Embeddings & RAG\n", + "- Fine-Tuning GPT-4o Mini\n", + "- Evaluating LLaMA 3.1 8B Quantized\n", + "- Fine-Tuning LLaMA 3.1 with QLoRA\n", + "- ➡️ Evaluating Fine-Tuned LLaMA\n", + "- Summary & Leaderboard\n", + "\n", + "---\n", + "\n", + "# 🧪 Part 7: Evaluating the Fine-Tuned LLaMA 3.1 8B (Quantized)\n", + "\n", + "- 🧑‍💻 Skill Level: Advanced\n", + "- ⚙️ Hardware: ⚠️ GPU required - use Google Colab\n", + "- 🛠️ Requirements: 🔑 HF Token\n", + "- Tasks:\n", + " - Load the tokenizer and fine-tuned base model\n", + " - Load the PEFT adapter for the fine-tuned weights\n", + " - Run evaluation — the moment of truth!\n", + "\n", + "🔔 **Reminder:** \n", + "As mentioned in Part 6, I fine-tuned the model on only 20K samples. \n", + "In this notebook, we’ll evaluate both this model and the full 400K-sample version fine-tuned by our instructor.\n", + "\n", + "---\n", + "📢 Find more LLM notebooks on my [GitHub repository](https://github.com/lisekarimi/lexo)" + ], + "outputs": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "MDyR63OTNUJ6" + }, + "outputs": [], + "source": [ + "# Install required packages in Google Colab\n", + "%pip install -q datasets transformers torch peft bitsandbytes matplotlib" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "-yikV8pRBer9" + }, + "outputs": [], + "source": [ + "# imports\n", + "\n", + "import math\n", + "import torch\n", + "from huggingface_hub import login\n", + "import torch.nn.functional as F\n", + "from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, set_seed\n", + "from datasets import load_dataset\n", + "from peft import PeftModel\n", + "import matplotlib.pyplot as plt\n", + "from google.colab import userdata" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "WyFPZeMcM88v" + }, + "outputs": [], + "source": [ + "# Google Colab User Data\n", + "# Ensure you have set the following in your Google Colab environment:\n", + "hf_token = userdata.get('HF_TOKEN')\n", + "login(hf_token, add_to_git_credential=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "30lzJXBH7BcK" + }, + "outputs": [], + "source": [ + "# Helper class for evaluating model predictions\n", + "\n", + "GREEN = \"\\033[92m\"\n", + "YELLOW = \"\\033[93m\"\n", + "RED = \"\\033[91m\"\n", + "RESET = \"\\033[0m\"\n", + "COLOR_MAP = {\"red\":RED, \"orange\": YELLOW, \"green\": GREEN}\n", + "\n", + "class Tester:\n", + "\n", + " def __init__(self, predictor, data, title=None, size=250):\n", + " self.predictor = predictor\n", + " self.data = data\n", + " self.title = title or predictor.__name__.replace(\"_\", \" \").title()\n", + " self.size = size\n", + " self.guesses = []\n", + " self.truths = []\n", + " self.errors = []\n", + " self.sles = []\n", + " self.colors = []\n", + "\n", + " def color_for(self, error, truth):\n", + " if error<40 or error/truth < 0.2:\n", + " return \"green\"\n", + " elif error<80 or error/truth < 0.4:\n", + " return \"orange\"\n", + " else:\n", + " return \"red\"\n", + "\n", + " def run_datapoint(self, i):\n", + " datapoint = self.data[i]\n", + " guess = self.predictor(datapoint[\"text\"])\n", + " truth = datapoint[\"price\"]\n", + " error = abs(guess - truth)\n", + " log_error = math.log(truth+1) - math.log(guess+1)\n", + " sle = log_error ** 2\n", + " color = self.color_for(error, truth)\n", + " # title = datapoint[\"text\"].split(\"\\n\\n\")[1][:20] + \"...\"\n", + " self.guesses.append(guess)\n", + " self.truths.append(truth)\n", + " self.errors.append(error)\n", + " self.sles.append(sle)\n", + " self.colors.append(color)\n", + " # print(f\"{COLOR_MAP[color]}{i+1}: Guess: ${guess:,.2f} Truth: ${truth:,.2f} Error: ${error:,.2f} SLE: {sle:,.2f} Item: {title}{RESET}\")\n", + "\n", + " def chart(self, title):\n", + " # max_error = max(self.errors)\n", + " plt.figure(figsize=(12, 8))\n", + " max_val = max(max(self.truths), max(self.guesses))\n", + " plt.plot([0, max_val], [0, max_val], color='deepskyblue', lw=2, alpha=0.6)\n", + " plt.scatter(self.truths, self.guesses, s=3, c=self.colors)\n", + " plt.xlabel('Ground Truth')\n", + " plt.ylabel('Model Estimate')\n", + " plt.xlim(0, max_val)\n", + " plt.ylim(0, max_val)\n", + " plt.title(title)\n", + "\n", + " # Add color legend\n", + " from matplotlib.lines import Line2D\n", + " legend_elements = [\n", + " Line2D([0], [0], marker='o', color='w', label='Accurate (green)', markerfacecolor='green', markersize=8),\n", + " Line2D([0], [0], marker='o', color='w', label='Medium error (orange)', markerfacecolor='orange', markersize=8),\n", + " Line2D([0], [0], marker='o', color='w', label='High error (red)', markerfacecolor='red', markersize=8)\n", + " ]\n", + " plt.legend(handles=legend_elements, loc='upper right')\n", + "\n", + " plt.show()\n", + "\n", + "\n", + " def report(self):\n", + " average_error = sum(self.errors) / self.size\n", + " rmsle = math.sqrt(sum(self.sles) / self.size)\n", + " hits = sum(1 for color in self.colors if color==\"green\")\n", + " title = f\"{self.title} Error=${average_error:,.2f} RMSLE={rmsle:,.2f} Hits={hits/self.size*100:.1f}%\"\n", + " self.chart(title)\n", + "\n", + " def run(self):\n", + " self.error = 0\n", + " for i in range(self.size):\n", + " self.run_datapoint(i)\n", + " self.report()\n", + "\n", + " @classmethod\n", + " def test(cls, function, data):\n", + " cls(function, data).run()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 📥 Load Dataset" + ], + "outputs": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# #If you face NotImplementedError: Loading a dataset cached in a LocalFileSystem is not supported run:\n", + "# %pip install -U datasets" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 177, + "referenced_widgets": [ + "61f42f612e98467684716cc7421c7554", + "a7e864c2ae21482e8bcdbc42a5a65309", + "63405c5e47da4652b052ee6099ead31e", + "0864a38b1c494308a07defced89f4fe3", + "8f089946a97d4becb3ff06b7a65595a2", + "42b865ac9e4f4ecaa475c4d69929e401", + "3478290afe1d48268c7c07206c212eda", + "f21c0db9205f4c40a2f9ea1ddd66b59e", + "4604f38122454bc1b1826311a326eb12", + "6e2b95e33cab4fe9b9f555195b634fac", + "b8f0f357a61c4502962f385291c3bac8", + "fa49b7e56b054faca67334e08bbf622c", + "243d84401ba24360a42c2636d7984772", + "bbcf01edcbcd425b9ca1e61e80f6df4f", + "17b41698c33044c7942e66e63c5c2d2d", + "14dfccde2f6a47679cea42ce965b6ef2", + "6a1570c8980b4d5ebac78348f79c4f1b", + "44f1922676f3417fb7baccd92bf53cea", + "176b023546bc4053a4d484205d7ab200", + "b02018254c4b4fb680e382974380c331", + "766aba35ebf54996990e075e4f692f96", + "24ceffd3b8c64e5f983e52d743ebef8d", + "5b9076b6c05a4454a7233302114b9d8c", + "4bfbd393271844de825a53c7d639fa60", + "3313091548bf414fabf84f5aa2c85d14", + "f98c7fe4ad6d4649a7a104f973992be0", + "fd1eb06d0aa64ba59ae9bb214f2c94ed", + "24237203b2c44709b20ca84b95387849", + "7910e6a4881a43638c4e91dd0f024092", + "f22dad57ee324ca8b927f9a3b8cc6edc", + "20a702b1ccbe499eabf70af974561417", + "48f72254ce6f408c94bf56a3919c032e", + "6bf00cd26256489fb209b8b51ca9fb0e", + "da3c453facaf41b6bc89d311d9f1ce74", + "78487c1a13e84e7bb35a72a07ad9b681", + "3866fe39fcc34120a0b4c4b36c8eaa6c", + "54de8e445909429f9d7ca9ad02e8f190", + "eeda8994cb8d46cc9d5c2212907ab869", + "b670675ee9bc4689a34f997d0da13b82", + "56727a21bb4648fe8ae46d3a61b39f4a", + "da89c856fbf746b496d37cbef92305b9", + "2f4ba348ef7246af8b1cd04352bcbd1d", + "0d86b4a93411494eb8e725440e393cff", + "203c4888674c46bba1033639ad4286a2", + "005dac04aacb4955ae079d36bfc4cd19", + "68ff796bdee44aa380324374ae38fd25", + "411691dce3f1457cb3ee9e8ad652d61d", + "f0fc209cb9e74d0ca3c0c9b14b1450e0", + "6e2155c3ad3243508dff34919eecd0a2", + "68891d88fe7e417abbd508d2089e7960", + "8e1ab77817bc4ec2835b195a0beb1096", + "c638e3a09f6b4caaa078e242b010744e", + "ee9abd78adb54984868ebee19f638e25", + "8280e432938b4e9794c95e47bb9c02fa", + "abdd2ff8028b432091434805f81c455c" + ] + }, + "id": "cvXVoJH8LS6u", + "outputId": "6308b124-a922-4e82-fb6a-5933d3c324e0" + }, + "outputs": [], + "source": [ + "DATASET_NAME = \"lisekarimi/pricer-data\"\n", + "dataset = load_dataset(DATASET_NAME)\n", + "train = dataset['train']\n", + "test = dataset['test']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "xb86e__Wc7j_", + "outputId": "8b699099-7414-4663-fab1-d069d3ec3d35" + }, + "outputs": [], + "source": [ + "test[0]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "qJWQ0a3wZ0Bw" + }, + "source": [ + "## 📥 Load Tokenizer and Model\n", + "The fine-tuned model (PeftModel) only holds the LoRA adapters, so it requires the base model to apply them correctly." + ], + "outputs": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 401, + "referenced_widgets": [ + "aee2cb6b13d64f1dab9f8190a274bbc0", + "547a2807263e4295af11da5a43ccf5b7", + "00b57ab6c0c44e39ad6fa27b7e5a085b", + "d51c826dc6d749b38ce7e5fdfc730086", + "f276602665c148999240ef916aa8a9c8", + "9d83d7056aed43a59d82955bdb8f272d", + "7a71aedc0f49430ba7c71040c5fa2529", + "108880a9a7bb4a73837889ad2a25fd77", + "0163275024a041e2bc9fba947c371269", + "555a494cbcda41e79ee4584a8122774b", + "5dfbe2af8afc43c691c34c52a47c9790", + "25edb5ab02c3402998b75cffc13d0a55", + "9a4f0361323540aa8428054a0d98ecb3", + "cf149d1eceae43a9808e142fbfe5d4ff", + "de86c9338690424fa0052e5b055cea88", + "2acb3368945a48aeaf9fbc6d22e9238d", + "4c4c4b1507814037bcea0519ec43ba26", + "6d37385e79904b7ca267ad165774f962", + "b14a5f0f71094aa98403edd429cb882e", + "31b28b6183c644f9b5601208a1f72499", + "d3cddc62e0fb4256bf4c74f6a59e686b", + "82cb2192839e451292b27a186daaa7c1", + "2e038c429eed4abdae8d27a7226d7298", + "364c4658aba64512a1f50cdad9cc12f8", + "fde7b1ab1e224fec8e9b761e703b53dd", + "ad5db9c88ce64f73992d2e274ca1206a", + "0e7ada829b22485ca7a628d2c464f3f1", + "ec4f7d2076db4f6a856ab0d5e8edffbd", + "3f00114026a4417db1b142e5bcb7a695", + "e4e9cf32b99848baa6a587fb235ce6b5", + "a109b5ee80574e40a14fa1e186f4f9f4", + "af569da703694c27aa9ca2ddce6c4923", + "886bb94abf2c437eb8505222c4336e85", + "f668156d681e47f39e553f127a44261d", + "9bb3d0deaac6439e9ad67c2bc0565ff4", + "762b36fde5ac4a2982152f3babfa3ed9", + "141911ee360d42ab8dd3b7fa3563bbf0", + "340eae69eeaf4e458e6d8134018f4ad4", + "3226e3a8c4564f6fbd6ffb3eeb7b45e7", + "6ed52680f866470da1e8d4a48b6e42fb", + "6d8a206edb824c5eb06c803e8cab14de", + "86fd4472a7a84940a54f24104689a74d", + "916c0e20af5e4b78a5e86532b0c9a3e8", + "62dd475c101e4859a48ee57a272f71bc", + "a8b7185a12c94adca0e63563d7df3ce4", + "47d57186838d466fb91b6666df85d1b4", + "9d37814d818c466c90892bf1f6e9a190", + "b5fdba30791649a792d192a131890a4e", + "789fe6f5489345c6a8b6a889d20e0ca0", + "5ce12a0983bb49f1a871598a6b9a0a13", + "d9eb89d218a44f21bb4447040e5c8925", + "b04aaa7931e74297a55bca3ebf4ded1d", + "837708f48ded4d78b7ad2e0dc6464e9c", + "32236e0d0b3e46e4b2c26b7ccb63c89e", + "499acde0cedf4ea1a90415f98660aaa5", + "840d3e7824944889ac2091b35f0c17c0", + "08f2fae4688b45729d8f5bf53837e56d", + "133bb5607eb0457888b1fe4e8d3fab3e", + "46bfe5feb9074050b556d804a544140d", + "4c3b0c2d04d24ec6abe8acbadb420712", + "eda1fcca6987495b87cf2206f93a0ecb", + "00b803cf92754db1bbea8ca909e5ccef", + "17e17b928555462abfbfa4caf7992427", + "35f90fa89e8842cdaa487b59da45b3e8", + "2887ef88074c4591b710688fa76329bf", + "0a0c5f00b3cc477e8b7e06550fc6f1cb", + "3b079fe81b7b44d796c531bec1754637", + "e82f8ac6e8eb4ed6a6743e10b8b99904", + "1f7de1e2970c4c8fbfe1ab400297e1a7", + "7ea0d8782a1f4cca9a64b95fe47e8a2e", + "689b49d52b8f4efb94f80d76a0fefab3", + "2005939305c442f7bed3b83ea16e13b1", + "1a6f2631e29444818fdbd9a0de265367", + "6bfc89e091a5448d94d2ea559ce43a21", + "bfc12d40caf4481280888506dfa01505", + "a1fb82d5761843a49a0993ff937cb40d", + "4c9c567918ee478a817b51e2a204d915", + "305623f276ba45e5a57727d1829158e1", + "b2722e271f78405b9151804ffc522530", + "963435e51a7a4ce98510c0372cd05030", + "d394cfc6af384a39b87c72ac6a3788d9", + "2c621a7a90ed4bfd8b52cea9c79e11c1", + "59ac0bb5c046448fbf16a27d2c3205f8", + "7617f5670879416d9dbc2dabda76ef4d", + "b32d6d6ff5dd4ac4adfb063205111707", + "38f3a7159fc34d89bc18e4225473615d", + "2a2c386e432f429f86c303d71472b480", + "ece25eb325004ae48ec5ec00055dd845", + "68e2b37bbd9a44f8a6032526acbf9ea6", + "3af191957e3f453ba803a1c01d6969ae", + "29dba394a6664e0f8984bcb966ccf19b", + "d84373a3f97245ae94bfb666c7e93a17", + "9f917250ccbf4078a90fda1eec71c6f4", + "8171dd4382d24f0a83484fbf967fec03", + "6f97606a500548e980c6481d756c72eb", + "6d1054047d4645a69c272484fd9e0c04", + "7fd14d942d2246bf8df28eca28e13fb2", + "0dabd208524f426bb5c643791e736413", + "368dea7bbf144cf0a667493cb23bddab", + "d6b14f8e43754283ad96543c4c1ffee6", + "f78562ef15524795bb9be326dcaab502", + "b01c8091b96444f687a49c5c51b5faf7", + "baab647e635a46ababa58993965a8159", + "25d9a9b78d554f8fbe92d7e805640c3b", + "95726f4b9bc34434b9d00fcdfe2ff87e", + "a7b835a668ef40c986a6fd51e464d1f4", + "188cac6192fc4b91be3ca5b01bab1d91", + "3537ef715f3447388625ee606555bb85", + "322ca0ccce644c48a2a0f4b44a38776d", + "cc3726d026594cb6ac2d6bafb16562ac", + "f48cc4a0a5d041cf9391a99353ff46af", + "05134ca3a9954341951ff958ff30fe0a", + "3a6aa623f1dd41b8940a41b509fa7500", + "fd58111bb44347b8bdcb984a0e86f9b7", + "c16cfb96177640a991c5509e652c85b9", + "adc0ffacba0846fabd76ed7955397077", + "e074da8f28d84ec891f22e30b86fb954", + "0b53df078f4a4a259b677ccccbdf46cd", + "954d5fa3b18a49589717cfc31fb58779", + "af0beb46b198458794c85803fe5af47f", + "c7322d41ae4c4068880521a136e923b4", + "391d834aa8734d7b9a97c03cab5e1e7d", + "5d779fc6bb1244449a68cf62dfd15698", + "197ca7f2357a4a2c89f5f3da3844c606", + "df4d22e6876b4c0082a7ace3281ff4e5", + "28d44cfae7de4b62be11020d9015f92c", + "3e8d7274ee3a4dfbbdd44ea0b2cd61b6", + "fa768ce193b94a4882a1e796e69cffea", + "c37a4882e4474f8690c4b479baf2d785", + "68a033bbcb4d4774bdb115e09d78365b", + "10b5e7970aa04bd6b3384aa645c48d92", + "f838b073dd254bb091a7db7175cd2ce8" + ] + }, + "id": "lAUAAcEC6ido", + "outputId": "b2983922-5036-4083-8cba-0cb3f51fbc51" + }, + "outputs": [], + "source": [ + "BASE_MODEL = \"meta-llama/Meta-Llama-3.1-8B\"\n", + "\n", + "quant_config = BitsAndBytesConfig(\n", + " load_in_4bit=True, # Reduce the precision to 4 bits\n", + " bnb_4bit_use_double_quant=True,\n", + " bnb_4bit_compute_dtype=torch.bfloat16,\n", + " bnb_4bit_quant_type=\"nf4\"\n", + ")\n", + "\n", + "# Load the Tokenizer and the Model\n", + "\n", + "tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code=True)\n", + "tokenizer.pad_token = tokenizer.eos_token\n", + "tokenizer.padding_side = \"right\"\n", + "\n", + "base_model = AutoModelForCausalLM.from_pretrained(\n", + " BASE_MODEL,\n", + " quantization_config=quant_config,\n", + " device_map=\"auto\",\n", + ")\n", + "base_model.generation_config.pad_token_id = tokenizer.pad_token_id\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "2RJ0G-WRJGMK" + }, + "source": [ + "## 🧪 Load and Evaluate the Fine-Tuned Model with PEFT Adapters" + ], + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 20K Sample Fine-Tuned Model" + ], + "outputs": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000, + "referenced_widgets": [ + "f0c0a20172294f77a0306801f8d76fb7", + "f68ee0810c2a4ac087ac6ece5279fb09", + "8aa12b380191454ebf55e8b42d0e0f2b", + "63f6cfa30a274ee3835671d8e39a85ef", + "0b980946a50d4248a4c63ef117fc2e8f", + "18283c6dee9447ddaca34ad267773e48", + "a7d10d9147df4adebf913e3023c2a3a4", + "5886ca455d4d4aefa617478f4f69a3ca", + "8c0e83bce4f74e7ba337fc9af5b977b8", + "00dbc32bdb0440c0bc3ba2cc6677b04c", + "243e6d8479ac4958a8d877e28f9b514a", + "10b7df1ecfab4e5cb146932fc4fb2c17", + "07c6fd1fe1ac442dbeb7037161841b78", + "88adf6ab3f3e476fa66ad22e9ff49aa8", + "fe522e9cee55448a9c13a5daaad5e7e7", + "4b1b9e5a67e54a3b90f2c113355e735a", + "5cdbdf93af9344ccabd7c3f236446541", + "c4af3ca6696d4fcd9b831d825456c7fa", + "525b1673c902412db32691056d49fd35", + "42de37b9a74143b4a851a178c484a706", + "f5f42d9201dc4fbaaa9c684fdb748d4a", + "10a0e99256a149a0a94ff652a4fd259a" + ] + }, + "id": "R_O04fKxMMT-", + "outputId": "06fc64f8-3407-460b-e093-0293e958915e" + }, + "outputs": [], + "source": [ + "# Load lisekarimi model (trained on 20K datapoints)\n", + "\n", + "FINETUNED_MODEL = \"lisekarimi/llama3-pricer-2025-04-08_18.44.04-size20000\"\n", + "fine_tuned_model = PeftModel.from_pretrained(base_model, FINETUNED_MODEL)\n", + "print(f\"Memory footprint: {fine_tuned_model.get_memory_footprint() / 1e6:.1f} MB\")\n", + "fine_tuned_model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Je5dR8QEAI1d" + }, + "outputs": [], + "source": [ + "# Gets top 3 predicted tokens from the model\n", + "# Filters valid numeric outputs (prices)\n", + "# Returns a weighted average based on token probabilities\n", + "\n", + "# This code would be more complex if we couldn't take advantage of the fact\n", + "# That Llama generates 1 token for any 3 digit number\n", + "\n", + "top_K = 3\n", + "\n", + "def improved_model_predict(prompt, device=\"cuda\"):\n", + " set_seed(42) # Reproducibility : same prompt = same o/p every time\n", + " inputs = tokenizer.encode(prompt, return_tensors=\"pt\").to(device)\n", + " attention_mask = torch.ones(inputs.shape, device=device)\n", + "\n", + " with torch.no_grad(): # Do not track gradients during inference\n", + " outputs = fine_tuned_model(inputs, attention_mask=attention_mask)\n", + " next_token_logits = outputs.logits[:, -1, :].to('cpu')\n", + "\n", + " next_token_probs = F.softmax(next_token_logits, dim=-1)\n", + " top_prob, top_token_id = next_token_probs.topk(top_K)\n", + "\n", + " prices, weights = [], [] # weights = corresponding probabilities\n", + "\n", + " for i in range(top_K):\n", + " predicted_token = tokenizer.decode(top_token_id[0][i])\n", + " probability = top_prob[0][i]\n", + "\n", + " try:\n", + " result = float(predicted_token)\n", + " except ValueError as e:\n", + " result = 0.0\n", + "\n", + " if result > 0:\n", + " prices.append(result)\n", + " weights.append(probability)\n", + "\n", + " if not prices:\n", + " return 0.0, 0.0\n", + "\n", + " total = sum(weights)\n", + "\n", + " weighted_prices = [price * weight / total for price, weight in zip(prices, weights)]\n", + "\n", + " return sum(weighted_prices).item()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "t_GHfTwHXD5f", + "outputId": "056b0fc2-5632-4be8-ee24-b6bcefe14ab9" + }, + "outputs": [], + "source": [ + "improved_model_predict(test[0][\"text\"], device=\"cuda\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 718 + }, + "id": "W_KcLvyt6kbb", + "outputId": "fba4200d-b911-467b-ab3c-17b78aa3b408" + }, + "outputs": [], + "source": [ + "Tester.test(improved_model_predict, test)" + ] + }, + { + "attachments": { + "0dcb25a7-83fa-4313-a94f-d3a56a0f07bc.png": { + "image/png": "" + } + }, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![image.png](attachment:0dcb25a7-83fa-4313-a94f-d3a56a0f07bc.png)" + ], + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 400K Sample Fine-Tuned Model" + ], + "outputs": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000, + "referenced_widgets": [ + "dd1b57e03f2641d3b702f2cc66942b8f", + "e1d477dccbfc44a8a6da301486180e82", + "c312a5111a284c3db88f22290869c023", + "ce118d8b8146497f9c7fdd3b38188e72", + "bc46c271637341bb82d6b87df22ab2af", + "602adf3242f54731938b68d3cf68465e", + "39fae5e74834421795729a259a046fb8", + "0618d8626e2e46cb9a17f86444de3c48", + "1cd43b5b2fe445088c84e19773ad861e", + "f70a29870ab34f34a1900b2df2bf177e", + "41a96c5e35a44b898b872c189f531d3a", + "0a524a73d5d6478db81256371bf2bc9b", + "275f6179dc624bceaa5d0639fe0b1b00", + "79c41b26746344bc9a220f2376360110", + "287a6430766c44e5a71dda1048fa2a2c", + "3bbe1a454a854747a96fe83e91d6cb3c", + "8a93759afe21414fb0d6684f0a591d60", + "a3d76b3ce67a495db861bac80cfc0864", + "8fc794262ed14fc785c8f06e734c57d4", + "7dc967baa0e7427bb66cf3e26849d508", + "2d7a6dbd15304347a37dbfb6e5ec7203", + "288393e05947444bad11034071015baf" + ] + }, + "id": "Kl6n_0sAbU0g", + "outputId": "2fb53efb-da22-4c29-a594-c2cf5a079388" + }, + "outputs": [], + "source": [ + "FINETUNED_MODEL = \"ed-donner/pricer-2024-09-13_13.04.39\"\n", + "REVISION = \"e8d637df551603dc86cd7a1598a8f44af4d7ae36\"\n", + "fine_tuned_model = PeftModel.from_pretrained(base_model, FINETUNED_MODEL, revision=REVISION)\n", + "print(f\"Memory footprint: {fine_tuned_model.get_memory_footprint() / 1e6:.1f} MB\")\n", + "fine_tuned_model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 718 + }, + "id": "R0YlorBhbeSE", + "outputId": "f42de9bf-d45a-4d2d-c218-fe000d716e54" + }, + "outputs": [], + "source": [ + "Tester.test(improved_model_predict, test)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "🎉 And there it is — the open-source, quantized, and fine-tuned model outperforms the rest. 🙌 \n", + "\n", + "📘 We'll continue in [the next notebook](https://github.com/lisekarimi/lexo/blob/main/09_part8_summary.ipynb) with a final wrap-up and summary of key insights.\n" + ], + "outputs": [] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "gpuType": "T4", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.11" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} \ No newline at end of file diff --git a/week7/community_contributions/lisekarimi/09_part8_summary.ipynb b/week7/community_contributions/lisekarimi/09_part8_summary.ipynb new file mode 100644 index 0000000..f7983a4 --- /dev/null +++ b/week7/community_contributions/lisekarimi/09_part8_summary.ipynb @@ -0,0 +1,75 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "GHsssBgWM_l0" + }, + "source": [ + "# 🔍 Predicting Item Prices from Descriptions (Part 8)\n", + "---\n", + "- Data Curation & Preprocessing\n", + "- Model Benchmarking – Traditional ML vs LLMs\n", + "- E5 Embeddings & RAG\n", + "- Fine-Tuning GPT-4o Mini\n", + "- Evaluating LLaMA 3.1 8B Quantized\n", + "- Fine-Tuning LLaMA 3.1 with QLoRA\n", + "- Evaluating Fine-Tuned LLaMA\n", + "- ➡️ Summary & Leaderboard\n", + "\n", + "---\n", + "\n", + "# 🧪 Part 8: Summary & Leaderboard\n", + "\n", + "![](https://github.com/lisekarimi/lexo/blob/main/assets/09_ft_leaderboard.png?raw=true)\n", + "\n", + "# 🥇 The winner is the LLaMA 3.1 8B (4-bit) fine-tuned on 400K samples \n", + "\n", + "LLaMA 3.1 8B (4-bit) fine-tuned on 400K samples is outperforming even the big guy GPT-4o — with the lowest error and highest accuracy (75.6%).\n", + "\n", + "RAG + GPT-4o Mini also did well, proving that retrieval adds real value.\n", + "\n", + "On the other hand, traditional ML models and even human guesses, gave weaker results and fell behind the top models.\n", + "\n", + "💡 As we’ve seen, a **well-tuned open-source small model** can do amazing things on a focused task — sometimes even better than large, closed models.\n", + "It’s not about size — it’s about fit, focus, and fine-tuning.\n", + "\n", + "# ✨ Conclusion\n", + "What a journey! From classic ML to state-of-the-art LLMs, from embeddings to retrieval and fine-tuning — we explored it all to answer: who predicts prices best?\n", + "\n", + "Thanks for following along — see you in the next challenge! 🚀\n", + "\n", + "---\n", + "📢 Find more LLM notebooks on my [GitHub repository](https://github.com/lisekarimi/lexo)" + ], + "outputs": [] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "gpuType": "T4", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.11" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} \ No newline at end of file diff --git a/week8/community_contributions/lisekarimi/10_part1_ensemble_model.ipynb b/week8/community_contributions/lisekarimi/10_part1_ensemble_model.ipynb new file mode 100644 index 0000000..5635a9f --- /dev/null +++ b/week8/community_contributions/lisekarimi/10_part1_ensemble_model.ipynb @@ -0,0 +1,1126 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "3ede0360-00f4-404e-b0d2-4a83cc385654", + "metadata": { + "id": "3ede0360-00f4-404e-b0d2-4a83cc385654" + }, + "source": [ + "🔗 Ensemble Model\n", + "---\n", + "We’ll reuse core components built earlier:\n", + "\n", + "- A fine-tuned LLaMA model\n", + "- An XGBoost regression model, stored in Hugging Face\n", + "- A ChromaDB vector store, stored on Google Drive and also available on AWS S3\n", + "- A GPT-4o mini + RAG pipeline\n", + "\n", + "We'll run all three models on the same test data, gather their predictions, and train a Linear Regression Ensemble. The ensemble learns how to combine these predictions to output a more accurate final price.\n", + "\n", + "Once trained, we'll save the ensemble as ensemble_model.pkl, ready for later use.\n", + "\n", + "- 🧑‍💻 Skill Level: Advanced\n", + "- ⚙️ Hardware: ⚠️ GPU required (use Google Colab)\n", + "- 🛠️ Requirements: \n", + "\n", + " - 🔑 Hugging Face Token and OpenAI Key — must be set in Google Colab secrets or .env files if you are running with your own GPU\n", + " - completion of Part 9 of [this series of notebooks](https://github.com/lisekarimi/lexo)\n", + "- 🎯 Task: Train and save the Ensemble Model\n", + "\n", + "---\n", + "📢 Find more LLM notebooks on my [GitHub repository](https://github.com/lisekarimi/lexo)" + ], + "outputs": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "mzYB4XYQeWRQ", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "mzYB4XYQeWRQ", + "outputId": "f474ce9b-09fb-4a47-93d7-273fe2d2ba10" + }, + "outputs": [], + "source": [ + "# Install required packages in Google Colab\n", + "%pip install -q tqdm huggingface_hub numpy sentence-transformers datasets chromadb xgboost peft torch bitsandbytes" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b3caecd1-8712-4acd-80b5-e8059c16f43f", + "metadata": { + "id": "b3caecd1-8712-4acd-80b5-e8059c16f43f" + }, + "outputs": [], + "source": [ + "# imports\n", + "\n", + "import os\n", + "import re\n", + "import zipfile\n", + "import chromadb\n", + "import joblib\n", + "import numpy as np\n", + "import pandas as pd\n", + "import requests\n", + "import torch\n", + "from datasets import load_dataset\n", + "from google.colab import userdata\n", + "from huggingface_hub import HfApi, hf_hub_download, login\n", + "from openai import OpenAI\n", + "from peft import PeftModel\n", + "from sentence_transformers import SentenceTransformer\n", + "from sklearn.linear_model import LinearRegression\n", + "from sklearn.metrics import r2_score, mean_squared_error\n", + "from sklearn.metrics import r2_score\n", + "from tqdm import tqdm\n", + "from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "05d9523f-b6c9-4132-bd2b-6712772b3cd2", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "05d9523f-b6c9-4132-bd2b-6712772b3cd2", + "outputId": "7077320e-43e2-4b03-ca7d-e7ea9a3407f8" + }, + "outputs": [], + "source": [ + "# Mount Google Drive to access saved ChromaDB and XGBoost model files\n", + "\n", + "from google.colab import drive\n", + "drive.mount(\"/content/drive\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "z9735RD_TUHw", + "metadata": { + "id": "z9735RD_TUHw" + }, + "outputs": [], + "source": [ + "# Load from Colab's secure storage\n", + "\n", + "openai_api_key = userdata.get(\"OPENAI_API_KEY\")\n", + "openai = OpenAI(api_key=openai_api_key)\n", + "\n", + "hf_token = userdata.get(\"HF_TOKEN\")\n", + "login(hf_token, add_to_git_credential=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "DtswsfBQxxJF", + "metadata": { + "id": "DtswsfBQxxJF" + }, + "outputs": [], + "source": [ + "# Configuration\n", + "\n", + "HF_USER = \"lisekarimi\"\n", + "ROOT = \"/content/drive/MyDrive/snapr\"\n", + "os.makedirs(ROOT, exist_ok=True)\n", + "\n", + "api = HfApi(token=hf_token)\n", + "REPO_NAME = \"smart-deal-finder-models\"\n", + "REPO_ID = f\"{HF_USER}/{REPO_NAME}\"" + ] + }, + { + "cell_type": "markdown", + "id": "qByarIFiTYa1", + "metadata": { + "id": "qByarIFiTYa1" + }, + "source": [ + "### 📥 Load Test Dataset" + ], + "outputs": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f9ca3e34", + "metadata": {}, + "outputs": [], + "source": [ + "# #If you face NotImplementedError: Loading a dataset cached in a LocalFileSystem is not supported run:\n", + "# %pip install -U datasets" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0eKakxSFTVcA", + "metadata": { + "id": "0eKakxSFTVcA" + }, + "outputs": [], + "source": [ + "DATASET_NAME = f\"{HF_USER}/pricer-data\"\n", + "dataset = load_dataset(DATASET_NAME)\n", + "test = dataset[\"test\"]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cWqvs8JRTggE", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 110 + }, + "id": "cWqvs8JRTggE", + "outputId": "bf7f0113-de82-422a-aaec-54efbb2b9d16" + }, + "outputs": [], + "source": [ + "# Format description function (no price in text)\n", + "def description(item):\n", + " text = item[\"text\"].replace(\n", + " \"How much does this cost to the nearest dollar?\\n\\n\", \"\"\n", + " )\n", + " text = text.split(\"\\n\\nPrice is $\")[0]\n", + " return f\"passage: {text}\"\n", + "\n", + "\n", + "description(test[0])" + ] + }, + { + "cell_type": "markdown", + "id": "alpkYSc2UX0n", + "metadata": { + "id": "alpkYSc2UX0n" + }, + "source": [ + "### 📥 Load Models and ChromaDB" + ], + "outputs": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "pjPBEgXqmHOA", + "metadata": { + "id": "pjPBEgXqmHOA" + }, + "outputs": [], + "source": [ + "# ChromaDB\n", + "\n", + "CHROMA_PATH = f\"{ROOT}/chroma\"\n", + "COLLECTION_NAME = \"price_items\"\n", + "CHROMA_ZIP_URL = \"https://aiprojects-lise-karimi.s3.eu-west-3.amazonaws.com/smart-deal-finder/chroma.zip\"\n", + "\n", + "# Download and unzip if CHROMA_PATH doesn't exist\n", + "if not os.path.exists(CHROMA_PATH):\n", + " os.makedirs(CHROMA_PATH, exist_ok=True)\n", + " r = requests.get(CHROMA_ZIP_URL)\n", + " with open(\"/tmp/chroma.zip\", \"wb\") as f:\n", + " f.write(r.content)\n", + " with zipfile.ZipFile(\"/tmp/chroma.zip\", \"r\") as zip_ref:\n", + " zip_ref.extractall(CHROMA_PATH)\n", + "\n", + "client = chromadb.PersistentClient(path=CHROMA_PATH)\n", + "collection = client.get_or_create_collection(name=COLLECTION_NAME)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8fi1BS71XCv1", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 337, + "referenced_widgets": [ + "c60f1153084a493ea31fac10bf986aef", + "6de41ac188dd48aea5d30a90bc52c38c", + "d2b4cdcaef6a4c41972f8c96af2814ab", + "2180dfb4e6e74df5bf9985c481b6e420", + "dcff84c8c3bf4f4bae334e0484207d10", + "ca1ed709ecaa4a0e8ac96ffb930e6613", + "e5297e7d36334c57aece043f62c79841", + "5fdca4e0987a4788983c418941711d7e", + "200e8e9b0df84affa177567243bf18d1", + "ef6e6dcff8b444bba62c1f76e1127d7c", + "7c94357c0d4e444489e8d47d2151437b", + "4ffdcd2ec96046ffb5121def27c95c9d", + "dd47ad1efe46496cb096a7714cf27c19", + "194ad4f8707b4d288e88cdbdfa33605d", + "c8a05ae3f5854f24998cc615a8849c88", + "b5ec411e72a946f8b4a470de5827c949", + "1a0be25b030d43858cee804da65d67a1", + "88d1f6a56f9b4a50854aee82c0945cc9", + "73e5967ae96942e080f3b05638583bc8", + "8434bfa06abf42c98e8ffb0e7b83c9f9", + "43d872e632da4d9883ea3d71dc91bdf9", + "86729f54df1b4967b2730b48f84a98aa", + "83fcebcf2b2c4213835334a998ba91e9", + "4ef06b10bfcb418d85534a8b73688eff", + "88c466cc89234d8f9f21147882fc5faf", + "f87e958c639544c0b925646fc28c4604", + "a52988b97dff4759a456398ecea1eaaf", + "c1116a13be86401bbbf6e51de0df7d12", + "d6ed27ce322748d29ed864808f619ee3", + "4a6fbedd3333496081695800cae8bdda", + "a7badc083fb34e69bd6f27bc9a805e7b", + "a78bcdbac2f74c72938d87c431f23e78", + "1d627cf1043642a3815a2902f65b4ded", + "3b8cc480ded24f66b03779fd25844670", + "0ce0073368c64339b3c1f960861e4b56", + "ffc973a4347943ebaa4ead16e04c05f6", + "aafba411ee984946a3ec0760580b60b6", + "0dd2501d917f48739b2817d598541660", + "213ca3afc47945a68e28a6ae005c3b7d", + "6355e004d7c34b969b2d2c6ccbc12620", + "9359b873cb4a4187b67a1732d78c7534", + "32f86a2b9e0547a6bc0a523ca3cfa088", + "0f446cb8ee3147438ef1e98e665a2831", + "64bb7ebae66d42f2a4d6a3039bf67d4b", + "e2b51ee511234ff2bc2cf33227fe2088", + "a76d3def06db41fab4ab2f077839d5bc", + "fa9598b858c14024aaf15d1417e9683d", + "df0bb9a9635643ebb679e115f45dde8e", + "527c4d1987334e3e9b2aa0de7d0527a7", + "0c6a889a9066484abbcb87b730d7e325", + "80a1f4c902154f2184c38ef844a1cca7", + "463c3cc65cd343108fe6049e4cde7142", + "fb015ce2fd9b48d79db67f80181964b7", + "07f46375dc594cd19ac5ab983083b2de", + "451ca5f213544cc8b24de6b7d55602fc", + "5bb2e645ea7741839e0f88ae484d94d2", + "19d1353070f643e08364500e9b1c30e6", + "fff94d7934cc4793876903d1c18efbfa", + "f0413b6310ef4510bf493e6814fa162d", + "a97c1662e64c44ee9f6e5be5617c07ec", + "da02a5ab5fe44cc297ab3048509a99e1", + "ee8c23aa2ca84b32a02a2500917559d8", + "6635ef559f72485e9453f87b3921f954", + "d9f2925a563d4d9fa332c15205f44d9b", + "73ec7891d53149e7a072a0e310716178", + "f7309076b36e4224acc42ade5d09bf37", + "cd53294ff44e4955afbfbd4660563b58", + "2ae42eb6385f43fab59f2bb56bb8a28e", + "fc0f1abaaf054d0d93a27c7ee0f6630d", + "c7a61078596c475784307480d26e3661", + "5bd26e4ff28f4639b52aab848ada03d1", + "ce710ef5ffa14cfe9842c63caebc81e5", + "63061726d47940c395a00d5d01556f4f", + "2b41598231d14f3ca6354c9543ec4351", + "0df33079f63c45d39de21439289aa4a8", + "8cc5d2eac9a64d68b72608bb5ae44c89", + "dd33a409204c4610a08e44c3e82e00da", + "b96b7ac71a6c48a9a6c888f2f34efea5", + "794d71dd5b734a3cb5607fc31aaddd18", + "0f6e7a2d9b8846178a7492e137d83bba", + "657bb839f0ea40eb9873385cecd06fd0", + "1ac174e8904943bb9a5e5483e58eef63", + "a1c9714fc4ea48af83669481e89c58c7", + "763a2d64c8e94ca1b0289264d9f868bc", + "ef78cf15ab914b3fa95ff95a86ec7a99", + "24350ebdc38a41e689f3e3b09dfc3e35", + "b8131b3c4c4c4b20809af9b0e91dd006", + "420c50ed8abe49ec9f4f2777e6cd2749", + "fc3f2d2c33ee40f8850710c2f4ce331f", + "90730ec699e84ddda2af799f8220e7a5", + "01f1c4b3b434474dbf2212a05869354d", + "912d6c1687324bb9b334bbf98a2b5b30", + "dc4106a0020b4b9fa21cd12a44967f2e", + "0084e537ffa74ab4a6d5f307b0916d2c", + "0edeb9ca771c4ca2a9a678e0e8a91614", + "b610d515ddb4405695e6972e45463194", + "4af72cb05f284d42bca73fcb88904255", + "86f34fb6325e4e878eee0be27946c88b", + "bfc26f456f1d440bb80eedac1cb14967", + "1f005c3cf7594275a37bb937a3c33db3", + "4347e7d3db4d4cc3836a4e69db032f27", + "809a4d0270dd4c05817ea224bb78ff5a", + "b403a344e84342e4b076e64e829d7354", + "6b3cbb0ac0b14e3fa193cd5cb3f8f521", + "2d06aaf8d15b456b8fadeb54dd2ea73d", + "74842e94dda648d18cd055220a3d2b39", + "5a3818bde07841fbb6077bf20b7dec4b", + "2a5224c8b3004d249a07297a2111493f", + "ff02ac7f08974426b3f70b71e59ed5bb", + "0dd550d3e39f42809fa16770231af7e5" + ] + }, + "id": "8fi1BS71XCv1", + "outputId": "9256b509-1371-4bb3-bd84-98bb75725ac3" + }, + "outputs": [], + "source": [ + "# Embedding Model\n", + "\n", + "embedding_model = SentenceTransformer(\"intfloat/e5-small-v2\", device=\"cuda\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "zmwIbufXUzMo", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 483, + "referenced_widgets": [ + "36b26207185e4e10a8c60f0f5918aa7c", + "96a0c5044c824968a701e20319c8d037", + "a567759a6e554c2eb9334559e880a56e", + "dcc68ce908ac4cfc9ffdcd0333cce14b", + "06d7ac4abde74e1496ef80b9e22cd148", + "94ba3ecd90f84291a46462a51ba001b8", + "00b94b9d1e4e4e428c7467b4f99e45af", + "02219374302849bd93d7aba7f65ee42b", + "82223db2023045e88dc3d9652bc6b183", + "6e08a796d2c440c58d5e6bb20f39be16", + "6ac04725da4f485f8d7efd738de0940a", + "230a6add3adb4dbd9f0329412e3f1455", + "db5f777e8ec143fe89a2e6296031c9c2", + "4f87c3aba240471d96349a037777732e", + "74b04b71ce0746a89d146c4044c85890", + "582e3cd91e3048639ab48492b5ca4b15", + "1d8e31a4418c49e0bfa28b399673f718", + "ec17f9e6bbbc4584baf11c0e7c504f84", + "19779fea3c8c4a5cbea953b059775110", + "3c82450a6db649f59f36668d5521d203", + "3447d41776ef406bbfbe3e6c277b82d2", + "4195859f8457499ba8e61a9f1662931c", + "6f80cc0bed9b42e89f9691675ff5a484", + "dc39eee63325479a915101649ab04273", + "99e1e2c842f845d1b2ded34736b60ad7", + "20ff78d4df09401d8aee462b57c57a48", + "1aff04abbd0b4f15bd58154b00591264", + "9cf024b488154926861d695137268da1", + "6470d6b03e3548d783ed252d128ec361", + "230a79f3d05d4c9eac73ae6962cb5d2d", + "adfe1cc7a6f94b0ab43170c75688374d", + "b6d24621c29f4352862f37ae69f2d6ad", + "303a6f79669e40d98ed2998f4f5e47cc", + "b737799377c948a99edf34c014a105c4", + "daf9616a687f406da1d9ee2bd147c850", + "cfd34b617326486498a916531bae9a87", + "201ea14e31a244a3aa2aeed2c12fb255", + "8454d5f263eb45daa0e6f7db6aa3f92d", + "f9787ee50d634421ae5f0325126dcb73", + "c2200daaf2994ed9ae16587d8d52236c", + "93a0ec9723074e9199d1f9db988c30dc", + "b1cb687d58ba40f49961d5485a466ef7", + "ccc580852d66401993d675c254832379", + "7321b89aea1c4746901ef40548bcf056", + "8fae24599e174689adbaa52a16270785", + "01605d6fb53d4e56aca9a746b2c75566", + "45cb352704fe41f1be0f01c61511323d", + "881550789f4845dda8561f6b26aff204", + "654bf64ec165449993b195209d75f4ba", + "9eaa6e09335047d5987c0a6528d5e77e", + "39331a837a644795bada1e2b034fe14d", + "a339562553394755811bb7077a81843d", + "4f58f5fa385f44e0ae09e2019294d597", + "431cc587951845d6af39f3e4ab0f2f76", + "ac879c2c923b423dabd6d0d60b12266d", + "4a3eb0fc1d2d4606a8acb382085a57af", + "f82bbcbd14ad48a78ebdfdfb43916bd1", + "a634449526034dbdb945c4905f4edeba", + "da4133a915ad4d449876a43468203842", + "a56f3a61d1ba4011ab6fce4067fa8418", + "46727d5afdf7487fb073e7e2d25cc75d", + "1480c5a1a0ad4151a12d47bc22685f04", + "5720fc31c90344908d9eeb49fe83df1b", + "47bd422d424a47e48edd304773162082", + "6a68ecb89be34255a0e0fc6db41c1f4d", + "12b1b3c7f0914030ad756b676cc97962", + "780f5b6ad91142f991a936b55219f61d", + "45723f91352b49688469a95e7f47aa9d", + "90bbf502500340a1993a957c27ad3d33", + "dcbb25b2a082476d905bcf124a849322", + "38e6bd6d64b54f9a8cdb4f40eaf41cde", + "7544a101cad94a15a2f4eb5639d22525", + "501184a0fc424a02a80aecd3f62fb9fd", + "1cbf5d28bced46ac8712a4609b5a5867", + "9ba6ecc0a422472681d8e61bdb32f87b", + "8383700148dd44538ed81ec5a261b7b5", + "740e930ff17c4f668818a8c762a5470e", + "0d995d8da0464c9ca7b1b444c22de025", + "bc8a5e6d27ba402996434f00918c8b0e", + "75f225b1a6f845148361b029878b63ea", + "c16b051d3cb44607b339770f5f8b6f2e", + "7857ed1e0b0f45bdb48269fbad68653d", + "e65ffc77bd6740c7924aa5b93297cb89", + "3ba97a41b4654dc0bb9bcaaa685b4518", + "6504931865a74cb5a80f2ac60da47430", + "ff8d5791b13c440d81312a6b96c9592f", + "c31bd8cb693b4e248a29f2ded032fb70", + "c25b1a42547a422ba7597c99ca4ce249", + "5c6338fcad9344e092f5077bf73c4910", + "dca176a7a6ea4fe9b025f851976f436a", + "611fc076771a4fcca5c46367b711d61a", + "0cfde45e26cf4c05b67755c2274f2df8", + "5d572d2f46ea484587e085c29318b616", + "8ad59c1261844a06b7abecebd7b60377", + "82d610cd077c47bd9efd609f2399c861", + "07fd58c9d07144a7a0aacab6b8252125", + "aa0932b4e66b4f33ba9f5237ea1470da", + "0d867615a23a42988bb91b4f0d0cc942", + "9c28ff7b0f5c421390ac1ccb899f093f", + "0645e7ed6593410eaf9c9c0b25158667", + "ca68b1dc60a343f9bd7298a63cadd556", + "9597ec6b495c4298b87967ed3e4044db", + "d7dad0ae58814124af1e92a078122736", + "173575b8b5254537937206759d6b6262", + "d1efdc10d36441d88cf7705e846bfbef", + "3550d450f95f40eeae0c0d559ae9f4de", + "773df79ed7b44f698cce98ca9ed802cc", + "24a2b4b88e1d46488eabb9101536beac", + "0d61c01e6cdf445b9474f9d759676edd", + "9b3505aacd164a19b45aba89eee46378", + "bd6fb8b066be46aaa7d457bf89257e54", + "f1eca4e5d600407885264d340b4f47a7", + "ef1e6a69995845e09781f76a38fced30", + "c5f50067867a40b99cb9f312e8adc49f", + "4e9cf63dbef041aba2c7f0b9c74466c8", + "07baa025b3a14bf89d6f6b438b695bbd", + "fb8609ef5b8d4653b25e52f853b7be1f", + "2f90ac58752347319d1203b5e8765c0e", + "d9815cabf324472a8eab585afabfa47e", + "120687e04065424595571941d816a134", + "a6d09159931f4d3d91a0647d9fa9d8ba", + "833c755f7bf9479abeef0041a82a92ba", + "56d848676e644c739e28730af99d69c6", + "39654eca8add4c09815cb3e6a45616ec", + "02aa70e064744a29af0a68aaca33c741", + "7fd83f95cfef4b1dbd881be7083d7455", + "cdf56625053b417fad2e64a0bed6725b", + "4199341c09bd46fab8a3b649d0c8af7a", + "334e67f38b8243aa9072f52a32e46080", + "492cdb40ffbf444d8e256875663fc598", + "655c6e0f21ff4e9db7f35522355d847c", + "926c1be6e26f4d9eba332881f975ed38", + "47641f7363be4252b9f5e53846bee057", + "887f8a2b268541eab71804a44ba1479b", + "5b6e78d1727e473ab3b66d6ff042aeca", + "acc60a1210104049983341db3010be0a", + "c028c37980e14b3ea07b1da6f558651e", + "9d1085906e3548078e5e393a86337c3e", + "259c86a51f4e4cab9648cc603fc25c7e", + "8ce05076e77643a88b062687e2b24493", + "8b3b7f947f4d4401bbca47d5720f7450", + "9bf9dccee248425da698dbb4526fcad9", + "b991477124184bf3b4397762649a6596", + "fb139bcae29f49778bf172eb503c0668", + "93ba52daa9aa4dee8da91bba6c7d0269", + "fefedf36efc94ed287bdeceae698d5b5", + "8288b87b06b34ba4b2c7a343d6cea827", + "5a85d212a6b5468fbc10e6aeb0ad8bee", + "22274b08e14c4c77a6223131779f6f48", + "edc0e436da954a33bbea8e80629eb43c", + "1d9b1c594680467f9c8a6682d8aeb2e7", + "d82f940e0a8a478e8b1ee8f169f798fc", + "ab1616f507594b27b898ede4504b4e39", + "1fed11f1c7484251a2a7400627ad5f6a" + ] + }, + "id": "zmwIbufXUzMo", + "outputId": "2acb6897-4c41-4447-e029-ffcc1b3b4da1" + }, + "outputs": [], + "source": [ + "# Fine Tuned Llama Model\n", + "\n", + "BASE_MODEL = \"meta-llama/Meta-Llama-3.1-8B\"\n", + "FINETUNED_MODEL = \"ed-donner/pricer-2024-09-13_13.04.39\"\n", + "REVISION = \"e8d637df551603dc86cd7a1598a8f44af4d7ae36\"\n", + "\n", + "# Quantization config (4-bit)\n", + "quant_config = BitsAndBytesConfig(\n", + " load_in_4bit=True,\n", + " bnb_4bit_use_double_quant=True,\n", + " bnb_4bit_compute_dtype=torch.bfloat16,\n", + " bnb_4bit_quant_type=\"nf4\",\n", + ")\n", + "\n", + "# Load tokenizer\n", + "tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code=True)\n", + "tokenizer.pad_token = tokenizer.eos_token\n", + "tokenizer.padding_side = \"right\"\n", + "\n", + "# Load base model\n", + "base_model = AutoModelForCausalLM.from_pretrained(\n", + " BASE_MODEL, quantization_config=quant_config, device_map=\"auto\"\n", + ")\n", + "\n", + "# Load fine-tuned model\n", + "fine_tuned_model = PeftModel.from_pretrained(\n", + " base_model, FINETUNED_MODEL, revision=REVISION\n", + ")\n", + "\n", + "# Align generation config\n", + "fine_tuned_model.generation_config.pad_token_id = tokenizer.pad_token_id\n", + "\n", + "print(f\"Memory footprint: {fine_tuned_model.get_memory_footprint() / 1e6:.1f} MB\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0IHiJNU7a4XC", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 49, + "referenced_widgets": [ + "0264a3987fbf4040860ffa3fc47940d8", + "06d1db35940b469797c39c653741ea36", + "84c4d2fdaf734a559ee3eee09f1be295", + "fddde0bfed544b18ba39bfaa40eb9e1b", + "d40cc525cc28416cad4a45b3631798c9", + "e1372af176154902b1f555f30c28c007", + "5a1352c5ceb84320b14353b7aa21650d", + "522d0ed9e705457e9c72d276e2a26dbd", + "4de73aa76f044811990c379737a8e5c0", + "9305e96697ab4854ac89a6636991101d", + "b00e41d1051340fd904ba719111a907d" + ] + }, + "id": "0IHiJNU7a4XC", + "outputId": "c68bc44e-6b15-46c3-c8d9-3f256f368317" + }, + "outputs": [], + "source": [ + "# XGBoost Trained Model\n", + "\n", + "MODEL_FILENAME = \"xgboost_model.pkl\"\n", + "model_path = hf_hub_download(repo_id=REPO_ID, filename=MODEL_FILENAME, token=hf_token)\n", + "xgb_model = joblib.load(model_path)" + ] + }, + { + "cell_type": "markdown", + "id": "76BhcPjWa6C5", + "metadata": { + "id": "76BhcPjWa6C5" + }, + "source": [ + "### 📊 Model prediction collection" + ], + "outputs": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "LgGmUKJxayZ6", + "metadata": { + "id": "LgGmUKJxayZ6" + }, + "outputs": [], + "source": [ + "def extract_tagged_price(output: str):\n", + " \"\"\"Extracts a float price from a string based on 'Price is $' keyword.\"\"\"\n", + " try:\n", + " contents = output.split(\"Price is $\")[1].replace(\",\", \"\")\n", + " match = re.search(r\"[-+]?\\d*\\.\\d+|\\d+\", contents)\n", + " return float(match.group()) if match else 0.0\n", + " except Exception:\n", + " return 0.0" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ggKf1nSQbAnv", + "metadata": { + "id": "ggKf1nSQbAnv" + }, + "outputs": [], + "source": [ + "def ft_llama_price(description: str):\n", + " prompt = (\n", + " f\"How much does this cost to the nearest dollar?\\n\\n{description}\\n\\nPrice is $\"\n", + " )\n", + " inputs = tokenizer(prompt, return_tensors=\"pt\").to(\"cuda\")\n", + "\n", + " outputs = fine_tuned_model.generate(\n", + " **inputs, max_new_tokens=5, num_return_sequences=1\n", + " )\n", + "\n", + " result = tokenizer.decode(outputs[0])\n", + " price = extract_tagged_price(result)\n", + " return price" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "_cWyYUd4Ub-K", + "metadata": { + "id": "_cWyYUd4Ub-K" + }, + "outputs": [], + "source": [ + "def xgboost_price(description: str):\n", + " vector = embedding_model.encode([description], normalize_embeddings=True)[0]\n", + " pred = xgb_model.predict([vector])[0]\n", + " return round(float(max(0, pred)), 2)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3Skod8juXgnN", + "metadata": { + "id": "3Skod8juXgnN" + }, + "outputs": [], + "source": [ + "def gpt4o_price(item):\n", + " def get_embedding(text):\n", + " return embedding_model.encode([text], normalize_embeddings=True)\n", + "\n", + " def find_similars(text):\n", + " results = collection.query(\n", + " query_embeddings=get_embedding(text).astype(float).tolist(), n_results=5\n", + " )\n", + " docs = results[\"documents\"][0]\n", + " prices = [m[\"price\"] for m in results[\"metadatas\"][0]]\n", + " return docs, prices\n", + "\n", + " def format_context(similars, prices):\n", + " context = (\n", + " \"To provide some context, here are similar products and their prices:\\n\\n\"\n", + " )\n", + " for sim, price in zip(similars, prices):\n", + " context += f\"Product:\\n{sim}\\nPrice is ${price:.2f}\\n\\n\"\n", + " return context\n", + "\n", + " def build_messages(description, similars, prices):\n", + " system_message = (\n", + " \"You are a pricing expert. \"\n", + " \"Given a product description and a few similar products with their prices, \"\n", + " \"estimate the most likely price. \"\n", + " \"Respond ONLY with a number, no words.\"\n", + " )\n", + " context = format_context(similars, prices)\n", + " user_prompt = (\n", + " \"Estimate the price for the following product:\\n\\n\"\n", + " + description\n", + " + \"\\n\\n\"\n", + " + context\n", + " )\n", + " return [\n", + " {\"role\": \"system\", \"content\": system_message},\n", + " {\"role\": \"user\", \"content\": user_prompt},\n", + " {\"role\": \"assistant\", \"content\": \"Price is $\"},\n", + " ]\n", + "\n", + " docs, prices = find_similars(description(item))\n", + " messages = build_messages(description(item), docs, prices)\n", + " response = openai.chat.completions.create(\n", + " model=\"gpt-4o-mini\", messages=messages, seed=42, max_tokens=5\n", + " )\n", + " reply = response.choices[0].message.content\n", + " return float(\n", + " re.search(r\"[-+]?\\d*\\.\\d+|\\d+\", reply.replace(\"$\", \"\").replace(\",\", \"\")).group()\n", + " or 0\n", + " )" + ] + }, + { + "cell_type": "markdown", + "id": "98bf0aed", + "metadata": {}, + "source": [ + "### ✂️ Split dataset and process" + ], + "outputs": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8XQK5yrk8On4", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "8XQK5yrk8On4", + "outputId": "ec379798-8b73-4e66-a517-a818845c8353" + }, + "outputs": [], + "source": [ + "print(\"Splitting entire dataset...\")\n", + "np.random.seed(42)\n", + "all_indices = list(range(len(test)))\n", + "np.random.shuffle(all_indices)\n", + "\n", + "train_split_size = int(0.8 * len(all_indices))\n", + "train_indices = all_indices[:train_split_size] # 80% of total\n", + "test_indices = all_indices[train_split_size:] # 20% of total\n", + "\n", + "train_indices = train_indices[:250] # First 250 from training split\n", + "test_indices = test_indices[:50] # First 50 from testing split" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "XN7P5fkkXfgP", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "XN7P5fkkXfgP", + "outputId": "69f9d265-a402-48ab-a91e-8c6032ea4118" + }, + "outputs": [], + "source": [ + "# Process subset of TRAINING data\n", + "ft_llama_preds_train = []\n", + "gpt4omini_preds_train = []\n", + "xgboost_preds_train = []\n", + "true_prices_train = []\n", + "\n", + "for i in tqdm(train_indices):\n", + " item = test[i]\n", + " text = description(item)\n", + " true_prices_train.append(item[\"price\"])\n", + " ft_llama_preds_train.append(ft_llama_price(text))\n", + " gpt4omini_preds_train.append(gpt4o_price(item))\n", + " xgboost_preds_train.append(xgboost_price(text))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1_6_atEgHnFR", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "1_6_atEgHnFR", + "outputId": "956e4dcb-2300-44ab-a66b-9b1254216762" + }, + "outputs": [], + "source": [ + "print(\"True Prices:\", true_prices_train)\n", + "print(\"FT-LLaMA Predictions:\", ft_llama_preds_train)\n", + "print(\"GPT-4o-mini Predictions:\", gpt4omini_preds_train)\n", + "print(\"XGBoost Predictions:\", xgboost_preds_train)" + ] + }, + { + "cell_type": "markdown", + "id": "ygJsuvtLtOdR", + "metadata": { + "id": "ygJsuvtLtOdR" + }, + "source": [ + "Example :\n", + "- True Prices: [245.0, 24.99, 302.4, 737.0, ...]\n", + "- FT-LLaMA Predictions: [99.0, 53.0, 550.0, 852.0, ...]\n", + "- GPT-4o-mini Predictions: [179.99, 97.0, 348.0, 769.0, ...]\n", + "- XGBoost Predictions: [220.19, 59.85, 254.29, 335.76, 165.04, ...]" + ], + "outputs": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "tYWMhTrXcA7x", + "metadata": { + "id": "tYWMhTrXcA7x" + }, + "outputs": [], + "source": [ + "# Create features for TRAINING data\n", + "maxes_train = [\n", + " max(a, b, c)\n", + " for a, b, c in zip(ft_llama_preds_train, gpt4omini_preds_train, xgboost_preds_train)\n", + "]\n", + "means_train = [\n", + " np.mean([a, b, c])\n", + " for a, b, c in zip(ft_llama_preds_train, gpt4omini_preds_train, xgboost_preds_train)\n", + "]\n", + "\n", + "# Create TRAINING dataframe\n", + "X_train = pd.DataFrame(\n", + " {\n", + " \"FT_LLaMA\": ft_llama_preds_train,\n", + " \"GPT4oMini\": gpt4omini_preds_train,\n", + " \"XGBoost\": xgboost_preds_train,\n", + " \"Max\": maxes_train,\n", + " \"Mean\": means_train,\n", + " }\n", + ")\n", + "\n", + "y_train = pd.Series(true_prices_train)" + ] + }, + { + "cell_type": "markdown", + "id": "e1682cf0", + "metadata": {}, + "source": [ + "### 🏋️Train the Ensemble Model" + ], + "outputs": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "-WsFABEicOyo", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "-WsFABEicOyo", + "outputId": "42ae6421-fb4e-4ae6-ab54-b075e311b94d" + }, + "outputs": [], + "source": [ + "np.random.seed(42)\n", + "lr = LinearRegression()\n", + "lr.fit(X_train, y_train)\n", + "\n", + "# Print feature coefficients\n", + "feature_columns = X_train.columns.tolist()\n", + "for feature, coef in zip(feature_columns, lr.coef_):\n", + " print(f\"{feature}: {coef:.2f}\")\n", + "print(f\"Intercept={lr.intercept_:.2f}\")" + ] + }, + { + "cell_type": "markdown", + "id": "GnYPOslHFgGx", + "metadata": { + "id": "GnYPOslHFgGx" + }, + "source": [ + "- FT_LLaMA: 0.52\n", + "- GPT4oMini: 0.17\n", + "- XGBoost: -0.31\n", + "- Max: 0.45\n", + "- Mean: 0.13\n", + "- Intercept=-6.06\n", + "\n", + "---\n", + "FT_LLaMA is the most influential model in the ensemble.\n", + "\n", + "Max prediction also has strong positive impact.\n", + "\n", + "GPT4oMini and Mean contribute less, but still add value.\n", + "\n", + "XGBoost has a negative coefficient, acting as a counterbalance.\n", + "\n", + "\n", + "Overall: FT_LLaMA leads, max adds value, XGBoost corrects for overestimation—resulting in a balanced ensemble." + ], + "outputs": [] + }, + { + "cell_type": "markdown", + "id": "wyx39HEL9niI", + "metadata": { + "id": "wyx39HEL9niI" + }, + "source": [ + "### 🔮 Prediction" + ], + "outputs": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "W3F0nNBXlrUJ", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "W3F0nNBXlrUJ", + "outputId": "1dbd9702-50cf-4d80-b8ab-9b2000dd3b10" + }, + "outputs": [], + "source": [ + "# Process subset of TEST data\n", + "ft_llama_preds_test = []\n", + "gpt4omini_preds_test = []\n", + "xgboost_preds_test = []\n", + "true_prices_test = []\n", + "\n", + "print(\"Processing TEST data (50 items)...\")\n", + "for i in tqdm(test_indices):\n", + " item = test[i]\n", + " text = description(item)\n", + " true_prices_test.append(item[\"price\"])\n", + " ft_llama_preds_test.append(ft_llama_price(text))\n", + " gpt4omini_preds_test.append(gpt4o_price(item))\n", + " xgboost_preds_test.append(xgboost_price(text))\n", + "\n", + "# Create features for TEST data\n", + "maxes_test = [\n", + " max(a, b, c)\n", + " for a, b, c in zip(ft_llama_preds_test, gpt4omini_preds_test, xgboost_preds_test)\n", + "]\n", + "means_test = [\n", + " np.mean([a, b, c])\n", + " for a, b, c in zip(ft_llama_preds_test, gpt4omini_preds_test, xgboost_preds_test)\n", + "]\n", + "\n", + "# Create TEST dataframe\n", + "X_test = pd.DataFrame(\n", + " {\n", + " \"FT_LLaMA\": ft_llama_preds_test,\n", + " \"GPT4oMini\": gpt4omini_preds_test,\n", + " \"XGBoost\": xgboost_preds_test,\n", + " \"Max\": maxes_test,\n", + " \"Mean\": means_test,\n", + " }\n", + ")\n", + "\n", + "y_test = pd.Series(true_prices_test)" + ] + }, + { + "cell_type": "markdown", + "id": "mVn6AAGq96wm", + "metadata": { + "id": "mVn6AAGq96wm" + }, + "source": [ + "### 🧪 Evaluation" + ], + "outputs": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "y25l8rR791wG", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "y25l8rR791wG", + "outputId": "0a02a620-eb0d-46a6-8f54-1046c2394ab3" + }, + "outputs": [], + "source": [ + "# Evaluate on the test set\n", + "print(\"Evaluating model...\")\n", + "y_pred = lr.predict(X_test)\n", + "r2 = r2_score(y_test, y_pred)\n", + "print(f\"R² score: {r2:.4f}\")\n", + "\n", + "# Calculate RMSE\n", + "rmse = np.sqrt(mean_squared_error(y_test, y_pred))\n", + "print(f\"RMSE: {rmse:.2f}\")\n", + "\n", + "# Calculate MAPE\n", + "mape = np.mean(np.abs((y_test - y_pred) / y_test)) * 100\n", + "print(f\"MAPE: {mape:.2f}%\")" + ] + }, + { + "cell_type": "markdown", + "id": "vHJLe6LNEBrB", + "metadata": { + "id": "vHJLe6LNEBrB" + }, + "source": [ + "Evaluating model...\n", + "- R² score: 0.7376\n", + "- RMSE: 127.62\n", + "- MAPE: 29.70%\n", + "\n", + "---\n", + "\n", + "- R² = 0.74: This is a solid R² value, indicating our model explains about 74% of the variance in the price data\n", + "Generally, an R² above 0.7 is considered good for price prediction tasks\n", + "- RMSE = 127.6: Average error; good if prices are in the thousands.\n", + "- MAPE = 29.7%: This means our predictions are off by roughly 30% on average. Typical for price prediction, but there’s room for improvement.\n" + ], + "outputs": [] + }, + { + "cell_type": "markdown", + "id": "C6cEJ57WApkG", + "metadata": { + "id": "C6cEJ57WApkG" + }, + "source": [ + "### 🚀 Push to HF" + ], + "outputs": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "_n7n_MnscS4r", + "metadata": { + "id": "_n7n_MnscS4r" + }, + "outputs": [], + "source": [ + "# Serialize Ensemble model locally for Hugging Face upload\n", + "\n", + "MODEL_DIR = os.path.join(ROOT, \"models\")\n", + "MODEL_FILENAME = \"ensemble_model.pkl\"\n", + "LOCAL_MODEL = os.path.join(MODEL_DIR, MODEL_FILENAME)\n", + "\n", + "os.makedirs(MODEL_DIR, exist_ok=True)\n", + "joblib.dump(lr, LOCAL_MODEL)\n", + "\n", + "# Create the model repo if it doesn't exist\n", + "api.create_repo(repo_id=REPO_ID, repo_type=\"model\", private=True, exist_ok=True)\n", + "\n", + "# Upload the saved model\n", + "api.upload_file(\n", + " path_or_fileobj=LOCAL_MODEL,\n", + " path_in_repo=MODEL_FILENAME,\n", + " repo_id=REPO_ID,\n", + " repo_type=\"model\",\n", + ")" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "gpuType": "T4", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.11" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} \ No newline at end of file diff --git a/week8/community_contributions/lisekarimi/10_part2_modal.ipynb b/week8/community_contributions/lisekarimi/10_part2_modal.ipynb new file mode 100644 index 0000000..f1525d7 --- /dev/null +++ b/week8/community_contributions/lisekarimi/10_part2_modal.ipynb @@ -0,0 +1,387 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "44c6af6b-6fc3-44d5-a586-71618af7d09a", + "metadata": { + "jp-MarkdownHeadingCollapsed": true + }, + "source": [ + "# Modal (Part 2)\n", + "\n", + "---\n", + "✅ With all models and ChromaDB set up, it's time to integrate everything into a real system: **Snapr** — an app that scans online product listings, predicts their value, and alerts users to great deals.\n", + "\n", + "To power SSnapr, we’ll need:\n", + "- Price prediction models — ready for production \n", + "- Fast, on-demand predictions \n", + "- A scalable setup that handles real-world usage\n", + "\n", + "🔧 That’s where **Modal** comes in. Modal lets us deploy models and services to the cloud, with minimal setup, low latency, and clean Python APIs.\n", + "\n", + "- You can check out a [live demo](https://huggingface.co/spaces/lisekarimi/snapr) of the project\n", + "- The source code is available on [GitHub](https://github.com/lisekarimi/snapr)\n", + "\n", + "---\n", + "📢 Find more LLM notebooks on my [GitHub repository](https://github.com/lisekarimi/lexo)\n" + ], + "outputs": [] + }, + { + "cell_type": "markdown", + "id": "b8c175e7-ca0a-4664-bded-08ec131c5636", + "metadata": {}, + "source": [ + "## 📚 Pre-requisites\n", + "\n", + "To follow this project smoothly, it's helpful to know:\n", + "\n", + "- 🛰️ What an API is: You send a request → it’s processed remotely → you receive a result\n", + "- 🐳 What a Docker image & container are:\n", + " - Image = environment with code & dependencies\n", + " - Container = running instance of that image\n", + "- 🧑‍💻 Local vs Remote code execution:\n", + " - Local code runs on your machine\n", + " - Remote code runs in the cloud (via Modal" + ], + "outputs": [] + }, + { + "cell_type": "markdown", + "id": "440fffc2-9ec1-433d-9b71-e6fae3b46415", + "metadata": {}, + "source": [ + "## 🔧 Install & Setup Modal\n", + "- Before starting, install Modal in your environment (Run this once): `uv pip install modal`\n", + "- Create an account at modal.com (they give you $5 free to start).\n", + "- Then authenticate your environment: `modal setup`" + ], + "outputs": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ef286205", + "metadata": {}, + "outputs": [], + "source": [ + "!uv pip install modal" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d3906c01-b313-4dac-9a2e-6c7dbfdcc8fd", + "metadata": {}, + "outputs": [], + "source": [ + "import modal\n", + "import sys\n", + "sys.path.append(\".\") # Make sure your local modules are accessible" + ] + }, + { + "cell_type": "markdown", + "id": "43c59002-afe6-4dcc-a53e-b50d85857f7d", + "metadata": {}, + "source": [ + "## 🧠 Key Concepts\n", + "\n", + "Modal is a platform that lets you run Python code in the cloud. You can:\n", + "- Deploy code as APIs\n", + "- Run GPU workloads (e.g., LLMs)\n", + "- Automatically handle Docker, infra, deployment\n", + "\n", + "What is a Modal App?\n", + "An \"App\" is a containerized cloud service where you can run code remotely.\n", + "- Code runs in isolated containers (like Docker)\n", + "- These containers are created on-demand and destroyed when idle\n", + "- You define your logic in a file and deploy it to Modal\n", + "\n", + "Key Modal Concepts\n", + "- `modal.Image`: Defines the environment (like a Docker image)\n", + "- `@app.cls`: Runs classes remotely inside a container\n", + "- `modal.App`: Defines and registers the Modal app\n", + "- `.remote()`: Sends request to Modal API to execute the code remotely\n", + "- `modal deploy -m`: Deploys app permanently like a real cloud service" + ], + "outputs": [] + }, + { + "cell_type": "markdown", + "id": "79436b01-9623-4b0e-8ffc-0ea51a5783ac", + "metadata": {}, + "source": [ + "## ⚙️ Minimal Example" + ], + "outputs": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d62850fc-dbf4-48b0-a2f2-a1e9a200414d", + "metadata": {}, + "outputs": [], + "source": [ + "from modal_services.get_started import app, f\n", + "\n", + "with app.run(): # This spins up a container in Modal\n", + " print(f.local(1000)) # Run locally inside the notebook\n", + " print('*' * 5)\n", + " print(f.remote(1000)) # Run remotely via Modal API inside a container" + ] + }, + { + "attachments": { + "886d059a-a8ca-4552-86d2-fb87fb824441.png": { + "image/png": "" + } + }, + "cell_type": "markdown", + "id": "d07d54f3-dfb9-46b2-be50-d6d35f81b3c0", + "metadata": {}, + "source": [ + "🔄 What Happens When You Call .remote()?\n", + "\n", + "some_function.remote() → Modal SDK sends API request\n", + " → Spins up a container\n", + " → Runs the code remotely\n", + " → Sends the result back to your local machine\n", + "\n", + "![image.png](attachment:886d059a-a8ca-4552-86d2-fb87fb824441.png)\n", + "\n", + "What we have here is an **ephemeral app**: the container shuts down after finishing.\n", + "\n", + "For our project, we need a persistently running app that behaves like a production API. To achieve that, we should use `modal deploy -m`, making the app suitable for serving AI services reliably." + ], + "outputs": [] + }, + { + "cell_type": "markdown", + "id": "9422c5b0-573e-43a4-99b5-2c6199770f5c", + "metadata": {}, + "source": [ + "## 📦 Persistent Deployment with `modal deploy`" + ], + "outputs": [] + }, + { + "attachments": { + "b84a3557-9805-462f-a1d5-008b3aa4f4f5.png": { + "image/png": "" + } + }, + "cell_type": "markdown", + "id": "8edb6eb4-4489-4823-9d16-a01c8d0355b6", + "metadata": {}, + "source": [ + "Click the blue \"+\" button at the top left of JupyterLab, then choose \"Terminal\" to open a new terminal tab.\n", + "\n", + "There, you can run:\n", + "\n", + "```bash\n", + "conda activate llms\n", + "modal deploy -m modal_services.get_started\n", + "```\n", + "\n", + "This builds and deploys the app (`example-hello-world`), registers `f()`, and makes it callable via `.remote()` anytime — even outside the notebook.\n", + "\n", + "![image.png](attachment:b84a3557-9805-462f-a1d5-008b3aa4f4f5.png)" + ], + "outputs": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "656456f5-b3f9-40bf-8a2e-169da0a68fe8", + "metadata": {}, + "outputs": [], + "source": [ + "from modal_services.get_started import f\n", + "f = modal.Function.from_name(\"example-hello-world\", \"f\") # (app_name, function_name)\n", + "print(f.remote(20))" + ] + }, + { + "attachments": { + "b950fed1-8806-424c-830a-d8b99927801e.png": { + "image/png": "" + } + }, + "cell_type": "markdown", + "id": "7d2c4b43-97a7-4a8e-a0df-1e20de053d5a", + "metadata": {}, + "source": [ + "## 🚀 Deploy Our first Modal-powered model\n", + "\n", + "So far, we’ve seen how to run simple remote functions using `@app.function()` and call them via `modal.Function.from_name(...)` in a **persistent app** — good for basic tasks.\n", + "\n", + "But in our Smart Deal Finder project, we need more:\n", + "- Load and reuse a large model (like LLaMA) \n", + "- Keep the model in memory \n", + "- Expose one or more methods (like `price()`)\n", + "\n", + "That’s why we use `@app.cls` — it lets us define a class (e.g. `Pricer`) that lives in a Modal container, loads the model once in `setup()`, and handles remote requests efficiently.\n", + "\n", + "Full code : `\\modal_services.ft_pricer.py`\n", + "\n", + "---\n", + "\n", + "\n", + "🚀 In this step, we’ll deploy a class-based app using `modal.Cls.from_name`.\n", + "\n", + "Specifically, we’ll deploy `Pricer`, which loads our 4-bit quantized fine-tuned LLaMA model (trained in Notebook 9), and exposes a remote `.price()` method to estimate item prices.\n", + "\n", + "⚠️ Before deploying, add your HF_TOKEN in Modal\n", + "\n", + "Then open a terminal and run:\n", + "\n", + "```bash\n", + "modal deploy -m modal_services.ft_pricer\n", + "```\n", + "\n", + "This will:\n", + "- Build the image with your code and dependencies\n", + "- Deploy the app `llm-ft-pricer` and register the `Pricer` class and its methods\n", + "- Not start any container yet — setup() isn't run and the model isn’t loaded\n", + "- Prepare the app to handle `.remote()` calls when they come in\n", + "\n", + "![image.png](attachment:b950fed1-8806-424c-830a-d8b99927801e.png)" + ], + "outputs": [] + }, + { + "attachments": { + "1c697283-e5e2-4b09-b1f1-d1c11f18c8e4.png": { + "image/png": "" + }, + "4a22e438-6b25-4c69-9439-99d146ffd188.png": { + "image/png": "" + } + }, + "cell_type": "markdown", + "id": "afff309a-740b-443f-96f1-4b20618ada8b", + "metadata": {}, + "source": [ + "## 🔗 Connect to Our Deployed App\n", + "\n", + "Now that our app is deployed, we can connect to it and use it like a remote service.\n", + "\n", + "We'll do this using `modal.Cls.from_name(\"llm-ft-pricer\", \"Pricer\")`, which fetches the `Pricer` class from our deployed app via the Modal API.\n", + "\n", + "Then, calling `.price.remote(...)` sends a request to Modal, spins up a container if needed, loads the model, runs the method, and returns the result.\n", + "\n", + "This is how we turn our model into a cloud API.\n", + "\n", + "What happens under the hood when calling price.remote(...): \n", + "- First run = downloads model files → stores in volume (/cache) → loads into memory → runs \n", + "- Later runs = load from volume → memory → run (no re-download)\n", + "\n", + "---\n", + "\n", + "Since we added `min_containers=1`, a container is created and kept warm as soon as the app is deployed. Models remain loaded in memory, so there are no cold starts — unless the app is stopped or the container crashes. \n", + "\n", + "![image.png](attachment:1c697283-e5e2-4b09-b1f1-d1c11f18c8e4.png)\n", + "\n", + "⚠️ However, this **continuously consumes credits** if you forget to stop the container or app manually.\n", + "\n", + "To save credits, you can set `min_containers=0` and `scaledown_window=300` — this way, no container stays warm by default, and a new one will spin up only when `.remote()` is called (i.e., on cold start).\n", + "\n", + "![image.png](attachment:4a22e438-6b25-4c69-9439-99d146ffd188.png)\n" + ], + "outputs": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0b478ae8-f636-4ac7-bf53-0f3b23c21a72", + "metadata": {}, + "outputs": [], + "source": [ + "Pricer = modal.Cls.from_name(\"llm-ft-pricer\", \"Pricer\")\n", + "pricer = Pricer()\n", + "reply = pricer.price.remote(\"SEVERIN 28L Microwave, 900W, 5 power levels, 35-min timer, turntable (31.5 cm), Silver, MW 7772\")\n", + "print(reply)" + ] + }, + { + "cell_type": "markdown", + "id": "2e63efbf-344b-4b5f-8a0d-27b6e41f8508", + "metadata": {}, + "source": [ + "Now that we’ve deployed our model and learned how to call it remotely with `.remote()`,\n", + "let’s go one step further — wrap this logic inside a local Python class.\n", + "\n", + "In the next step, we'll build a local Agent that cleanly interacts with our deployed `Modal app`, using the same `Modal API` under the hood." + ], + "outputs": [] + }, + { + "cell_type": "markdown", + "id": "8fbc7696-e892-4f08-80c5-5199b03ed175", + "metadata": {}, + "source": [ + "## 🔌 Connect to Your Modal App with a Local Agent\n", + "\n", + "`ft_pricer.py` is now a deployed API on Modal. \n", + "\n", + "To use it locally, we’ll wrap it in a class called `FTPriceAgent` (Full code: `\\agents\\ft_price_agent.py)` that:\n", + "\n", + "- Connects to the remote app via `modal.Cls.from_name(...)` \n", + "- Calls `.price.remote(...)` to run predictions \n", + "\n", + "🔄 **Two API Calls:** happen\n", + "1. `modal.Cls.from_name(...)` → fetches the deployed class \n", + "2. `.price.remote(...)` → runs the remote method on Modal \n", + "\n", + "This keeps our code clean and modular." + ], + "outputs": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b80cd15a-e419-4c21-97e7-4a56ed4db680", + "metadata": {}, + "outputs": [], + "source": [ + "from agents.ft_price_agent import FTPriceAgent\n", + "\n", + "agent = FTPriceAgent()\n", + "agent.price(\"Apple AirPods Max wireless over-ear headphones with active noise cancellation and spatial audio\")" + ] + }, + { + "cell_type": "markdown", + "id": "65522b93-59c9-4d15-a12d-58e078b88545", + "metadata": {}, + "source": [ + "Now that we’ve seen how Modal agents work — connecting to remote services and running `.remote()` — we’ll use the same pattern for the rest of our models.\n", + "\n", + "✅ For each model — **XGBoost**, **GPT-4o RAG**, and the **Ensemble** — we’ll build a dedicated Agent. " + ], + "outputs": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.7" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} \ No newline at end of file diff --git a/week8/community_contributions/lisekarimi/agents/__init__.py b/week8/community_contributions/lisekarimi/agents/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/week8/community_contributions/lisekarimi/agents/base_agent.py b/week8/community_contributions/lisekarimi/agents/base_agent.py new file mode 100644 index 0000000..fe09e18 --- /dev/null +++ b/week8/community_contributions/lisekarimi/agents/base_agent.py @@ -0,0 +1,33 @@ +import logging + +class Agent: + """ + An abstract superclass for Agents + Used to log messages in a way that can identify each Agent + """ + + # Foreground colors + RED = '\033[31m' + GREEN = '\033[32m' + YELLOW = '\033[33m' + BLUE = '\033[34m' + MAGENTA = '\033[35m' + CYAN = '\033[36m' + WHITE = '\033[37m' + + # Background color + BG_BLACK = '\033[40m' + + # Reset code to return to default color + RESET = '\033[0m' + + name: str = "" + color: str = '\033[37m' + + def log(self, message): + """ + Log this as an info message, identifying the agent + """ + color_code = self.BG_BLACK + self.color + message = f"[{self.name}] {message}" + logging.info(color_code + message + self.RESET) \ No newline at end of file diff --git a/week8/community_contributions/lisekarimi/agents/ft_price_agent.py b/week8/community_contributions/lisekarimi/agents/ft_price_agent.py new file mode 100644 index 0000000..465f1bb --- /dev/null +++ b/week8/community_contributions/lisekarimi/agents/ft_price_agent.py @@ -0,0 +1,29 @@ +import modal +from agents.base_agent import Agent + + +class FTPriceAgent(Agent): + """ + An Agent that runs the fine-tuned LLM that's running remotely on Modal + """ + + name = "FTPrice Agent" + color = Agent.RED + + def __init__(self): + """ + Set up this Agent by creating an instance of the modal class + """ + self.log("FTPrice Agent is initializing - connecting to modal") + Pricer = modal.Cls.from_name("llm-ft-pricer", "Pricer") # 1st API call: to fetch Pricer (remote class) + self.pricer = Pricer() + self.log("FTPrice Agent is ready") + + def price(self, description: str) -> float: + """ + Make a remote call to return the estimate of the price of this item + """ + self.log("FTPrice Agent is calling remote fine-tuned model") + result = self.pricer.price.remote(description) # 2nd API call: to run the price method in the remote Pricer class + self.log(f"FTPrice Agent completed - predicting ${result:.2f}") + return result \ No newline at end of file diff --git a/week8/community_contributions/lisekarimi/helpers/__init__.py b/week8/community_contributions/lisekarimi/helpers/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/week8/community_contributions/lisekarimi/helpers/items.py b/week8/community_contributions/lisekarimi/helpers/items.py new file mode 100644 index 0000000..a594e27 --- /dev/null +++ b/week8/community_contributions/lisekarimi/helpers/items.py @@ -0,0 +1,120 @@ +from typing import Optional # A variable might be a certain type or None +from transformers import AutoTokenizer +import re + +BASE_MODEL = "meta-llama/Meta-Llama-3.1-8B" + +MIN_TOKENS = 150 # Minimum tokens required to accept an item +MAX_TOKENS = 160 # We limit to 160 tokens so that after adding prompt text, the total stays around 180 tokens. + +MIN_CHARS = 300 # Reject items with less than 300 characters +CEILING_CHARS = MAX_TOKENS * 7 # Truncate long text to about 1120 characters (approx 160 tokens) + +class Item: + """ + An Item is a cleaned, curated datapoint of a Product with a Price + """ + + # Load tokenizer for the model + tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code=True) + + # Define PRICE_LABEL and question for the training prompt + PRICE_LABEL = "Price is $" + QUESTION = "How much does this cost to the nearest dollar?" + + # A list of useless phrases to remove to reduce noise for price prediction + REMOVALS = ['"Batteries Included?": "No"', '"Batteries Included?": "Yes"', '"Batteries Required?": "No"', '"Batteries Required?": "Yes"', "By Manufacturer", "Item", "Date First", "Package", ":", "Number of", "Best Sellers", "Number", "Product "] + + # Attributes for each item + title: str + price: float + category: str + token_count: int = 0 # How many tokens in the final prompt + + # Optional fields + details: Optional[str] # The value can be a string or can be None + prompt: Optional[str] = None + include = False # Whether to keep the item or not + + def __init__(self, data, price): + self.title = data['title'] + self.price = price + self.parse(data) + + def scrub_details(self): + """ + Removes useless phrases from details, which often has repeated specs or boilerplate text. + """ + details = self.details + for remove in self.REMOVALS: + details = details.replace(remove, "") + return details + + def scrub(self, stuff): + """ + Clean up the provided text by removing unnecessary characters and whitespace + Also remove words that are 7+ chars and contain numbers, as these are likely irrelevant product numbers + """ + stuff = re.sub(r'[:\[\]"{}【】\s]+', ' ', stuff).strip() + stuff = stuff.replace(" ,", ",").replace(",,,",",").replace(",,",",") + words = stuff.split(' ') + select = [word for word in words if len(word)<7 or not any(char.isdigit() for char in word)] + return " ".join(select) + + def parse(self, data): + """ + Prepares the text, checks length, tokenizes it, and sets include = True if it’s valid. + """ + # Builds a full contents string by combining description, features, and cleaned details. + contents = '\n'.join(data['description']) + if contents: + contents += '\n' + features = '\n'.join(data['features']) + if features: + contents += features + '\n' + self.details = data['details'] + if self.details: + contents += self.scrub_details() + '\n' + + # If content is long enough, trim it to max char limit before processing. + if len(contents) > MIN_CHARS: + contents = contents[:CEILING_CHARS] + + # Clean and tokenize text, then check token count. + text = f"{self.scrub(self.title)}\n{self.scrub(contents)}" + tokens = self.tokenizer.encode(text, add_special_tokens=False) + + if len(tokens) > MIN_TOKENS: + # Truncate tokens, decode them back and create the training prompt + tokens = tokens[:MAX_TOKENS] + text = self.tokenizer.decode(tokens) + self.make_prompt(text) + + # Mark the item as valid and ready to be used in training + self.include = True # Only items with MIN_TOKENS <= tokens <= MAX_TOKENS are kept + + + def make_prompt(self, text): + """ + Builds the training prompt using the question, text, and price. Then counts the tokens. + """ + self.prompt = f"{self.QUESTION}\n\n{text}\n\n" + self.prompt += f"{self.PRICE_LABEL }{str(round(self.price))}.00" + self.token_count = len(self.tokenizer.encode(self.prompt, add_special_tokens=False)) + + def test_prompt(self): + """ + Returns the prompt without the actual price, useful for testing/inference. + """ + return self.prompt.split(self.PRICE_LABEL )[0] + self.PRICE_LABEL + + def __repr__(self): + """ + Defines how the Item object looks when printed — it shows the title and price. + """ + return f"<{self.title} = ${self.price}>" + + + + + \ No newline at end of file diff --git a/week8/community_contributions/lisekarimi/helpers/loaders.py b/week8/community_contributions/lisekarimi/helpers/loaders.py new file mode 100644 index 0000000..4314c65 --- /dev/null +++ b/week8/community_contributions/lisekarimi/helpers/loaders.py @@ -0,0 +1,106 @@ +from datetime import datetime # Measure how long loading takes +from tqdm import tqdm # Shows a progress bar while processing data +from datasets import load_dataset # Load a dataset from Hugging Face Hub +from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor # For parallel processing (speed) +from items import Item + +CHUNK_SIZE = 1000 # Process the dataset in chunks of 1000 datapoints at a time (for efficiency) +MIN_PRICE = 0.5 +MAX_PRICE = 999.49 +WORKER = 4 # Set the number of workers here + +class ItemLoader: + + def __init__(self, name): + """ + Initialize the loader with a dataset name. + """ + self.name = name # Store the category name + self.dataset = None #Placeholder for the dataset (we load it later in load()) + + def process_chunk(self, chunk): + """ + Convert a chunk of datapoints into valid Item objects. + """ + batch = [] # Initialize the list to hold valid items + + # Loop through each datapoint in the chunk + for datapoint in chunk: + try: + # Extract price from datapoint + price_str = datapoint['price'] + if price_str: + price = float(price_str) + + # Check if price is within valid range + if MIN_PRICE <= price <= MAX_PRICE: + item = Item(datapoint, price) + + # Keep only valid items + if item.include: + batch.append(item) + except ValueError: + continue # Skip datapoints with invalid price format + return batch # Return the list of valid items + + + def load_in_parallel(self, workers): + """ + Split the dataset into chunks and process them in parallel. + """ + results = [] + size = len(self.dataset) + chunk_count = (size // CHUNK_SIZE) + 1 + + # Build chunks directly here (no separate function) + chunks = [ + self.dataset.select(range(i, min(i + CHUNK_SIZE, size))) + for i in range(0, size, CHUNK_SIZE) + ] + + # Process chunks in parallel using multiple CPU cores + with ProcessPoolExecutor(max_workers=workers) as pool: + for batch in tqdm(pool.map(self.process_chunk, chunks), total=chunk_count): + results.extend(batch) + + # Add the category name to each result + for result in results: + result.category = self.name + + return results + + + def load(self, workers=WORKER): + """ + Load and process the dataset, returning valid items. + """ + # Record start time + start = datetime.now() + + # Print loading message + print(f"Loading dataset {self.name}", flush=True) + + # Load dataset from Hugging Face (based on category name) + self.dataset = load_dataset( + "McAuley-Lab/Amazon-Reviews-2023", + f"raw_meta_{self.name}", + split="full", + trust_remote_code=True + ) + + # Process the dataset in parallel and collect valid items + results = self.load_in_parallel(workers) + + # Record end time and print summary + finish = datetime.now() + print( + f"Completed {self.name} with {len(results):,} datapoints in {(finish-start).total_seconds()/60:.1f} mins", + flush=True + ) + + # Return the list of valid items + return results + + + + \ No newline at end of file diff --git a/week8/community_contributions/lisekarimi/helpers/testing.py b/week8/community_contributions/lisekarimi/helpers/testing.py new file mode 100644 index 0000000..9422182 --- /dev/null +++ b/week8/community_contributions/lisekarimi/helpers/testing.py @@ -0,0 +1,84 @@ +import math +import matplotlib.pyplot as plt + +GREEN = "\033[92m" +YELLOW = "\033[93m" +RED = "\033[91m" +RESET = "\033[0m" +COLOR_MAP = {"red":RED, "orange": YELLOW, "green": GREEN} + +class Tester: + + def __init__(self, predictor, data, title=None, size=250): + self.predictor = predictor + self.data = data + self.title = title or predictor.__name__.replace("_", " ").title() + self.size = size + self.guesses = [] + self.truths = [] + self.errors = [] + self.sles = [] + self.colors = [] + + def color_for(self, error, truth): + if error<40 or error/truth < 0.2: + return "green" + elif error<80 or error/truth < 0.4: + return "orange" + else: + return "red" + + def run_datapoint(self, i): + datapoint = self.data[i] + guess = self.predictor(datapoint) + truth = datapoint["price"] + error = abs(guess - truth) + log_error = math.log(truth+1) - math.log(guess+1) + sle = log_error ** 2 + color = self.color_for(error, truth) + title = datapoint["text"][:40] + "..." if len(datapoint["text"]) > 40 else datapoint["text"] + self.guesses.append(guess) + self.truths.append(truth) + self.errors.append(error) + self.sles.append(sle) + self.colors.append(color) + # print(f"{COLOR_MAP[color]}{i+1}: Guess: ${guess:,.2f} Truth: ${truth:,.2f} Error: ${error:,.2f} SLE: {sle:,.2f} Item: {title}{RESET}") + + def chart(self, title): + max_error = max(self.errors) + plt.figure(figsize=(15, 6)) + max_val = max(max(self.truths), max(self.guesses)) + plt.plot([0, max_val], [0, max_val], color='deepskyblue', lw=2, alpha=0.6) + plt.scatter(self.truths, self.guesses, s=3, c=self.colors) + plt.xlabel('Ground Truth') + plt.ylabel('Model Estimate') + plt.xlim(0, max_val) + plt.ylim(0, max_val) + plt.title(title) + + # Add color legend + from matplotlib.lines import Line2D + legend_elements = [ + Line2D([0], [0], marker='o', color='w', label='Accurate (green)', markerfacecolor='green', markersize=8), + Line2D([0], [0], marker='o', color='w', label='Medium error (orange)', markerfacecolor='orange', markersize=8), + Line2D([0], [0], marker='o', color='w', label='High error (red)', markerfacecolor='red', markersize=8) + ] + plt.legend(handles=legend_elements, loc='upper left') + plt.show() + + def report(self): + average_error = sum(self.errors) / self.size + rmsle = math.sqrt(sum(self.sles) / self.size) + hits = sum(1 for color in self.colors if color=="green") + title = f"{self.title} Error=${average_error:,.2f} RMSLE={rmsle:,.2f} Hits={hits/self.size*100:.1f}%" + self.chart(title) + + def run(self): + self.error = 0 + for i in range(self.size): + self.run_datapoint(i) + self.report() + + @classmethod + def test(cls, function, data): + cls(function, data).run() \ No newline at end of file diff --git a/week8/community_contributions/lisekarimi/modal_services/__init__.py b/week8/community_contributions/lisekarimi/modal_services/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/week8/community_contributions/lisekarimi/modal_services/ft_pricer.py b/week8/community_contributions/lisekarimi/modal_services/ft_pricer.py new file mode 100644 index 0000000..974aeb8 --- /dev/null +++ b/week8/community_contributions/lisekarimi/modal_services/ft_pricer.py @@ -0,0 +1,140 @@ +import modal +from modal import App, Volume, Image + +import logging +logging.basicConfig(level=logging.INFO) + +# ───────────────────────────────────────────────────────────────────────────── +# Constants +# ───────────────────────────────────────────────────────────────────────────── + +GPU = "T4" # Use a T4 GPU for inference +CACHE_PATH = "/cache" # Mount point for the Modal volume + +# Hugging Face model references +BASE_MODEL = "meta-llama/Meta-Llama-3.1-8B" +FINETUNED_MODEL = "ed-donner/pricer-2024-09-13_13.04.39" +REVISION = "e8d637df551603dc86cd7a1598a8f44af4d7ae36" # Commit of the fine-tuned model + +# Local cache paths (inside the volume) +BASE_MODEL_DIR = f"{CACHE_PATH}/llama_base_model" +FINETUNED_MODEL_DIR = f"{CACHE_PATH}/llama_finetuned_model" + +# ───────────────────────────────────────────────────────────────────────────── +# Structure +# ───────────────────────────────────────────────────────────────────────────── + +# Container (App: llm-ft-pricer) +# ├── /app ← Code + installed Python packages (from image) +# ├── /cache ← Mounted Modal volume (`hf-hub-cache`) +# │ └── meta-llama/Meta-Llama-3.1-8B/... ← HuggingFace model files downloaded via snapshot_download + + + +QUESTION = "How much does this cost to the nearest dollar?" +PREFIX = "Price is $" # Used to parse generated output + +# ───────────────────────────────────────────────────────────────────────────── +# Modal App, Image, Volume, Secrets +# ───────────────────────────────────────────────────────────────────────────── + +app = modal.App("llm-ft-pricer") # Define the Modal app + +image = ( + Image.debian_slim() + .pip_install("huggingface", "torch", "transformers", "bitsandbytes", "accelerate", "peft") # All needed libraries + .env({"HF_HUB_CACHE": CACHE_PATH}) # Hugging Face will store model files in /cache +) + +cache_vol = modal.Volume.from_name("hf-hub-cache", create_if_missing=True) # Persisted volume for caching models +secrets = [modal.Secret.from_name("HF_TOKEN")] # Hugging Face auth token + +# ───────────────────────────────────────────────────────────────────────────── +# Modal Class: Pricer +# ───────────────────────────────────────────────────────────────────────────── + +# All methods in this class run inside the container with the image, volume, secrets, and GPU you configured. +@app.cls( + image=image, + secrets=secrets, + volumes={CACHE_PATH: cache_vol}, # Mount volume into /cache + gpu=GPU, + timeout=1800, # 30-minute max runtime + min_containers=0, # = 1 : Keeping one container warm uses credits continuously if you forget to stop it. + scaledown_window=300, # Shuts down the container +) +class Pricer: + @modal.enter() + def setup(self): + import os, torch + import logging + from huggingface_hub import snapshot_download + from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig + from peft import PeftModel + + # Create cache path if it doesn't exist + os.makedirs(CACHE_PATH, exist_ok=True) + + # Download base and fine-tuned models into volume + logging.info("Downloading base model...") + snapshot_download(BASE_MODEL, local_dir=BASE_MODEL_DIR) + + logging.info("Downloading fine-tuned model...") + snapshot_download(FINETUNED_MODEL, revision=REVISION, local_dir=FINETUNED_MODEL_DIR) + + # Quantization config (4-bit) + quant_config = BitsAndBytesConfig( + load_in_4bit=True, + bnb_4bit_use_double_quant=True, + bnb_4bit_compute_dtype=torch.bfloat16, + bnb_4bit_quant_type="nf4" + ) + + # Load tokenizer + self.tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_DIR) + self.tokenizer.pad_token = self.tokenizer.eos_token + self.tokenizer.padding_side = "right" + + # Load base model (quantized) + base_model = AutoModelForCausalLM.from_pretrained( + BASE_MODEL_DIR, + quantization_config=quant_config, + device_map="auto" + ) + + # Apply fine-tuned weights + self.fine_tuned_model = PeftModel.from_pretrained( + base_model, + FINETUNED_MODEL_DIR, + revision=REVISION + ) + self.fine_tuned_model.generation_config.pad_token_id = self.tokenizer.pad_token_id + + @modal.method() + def price(self, description: str) -> float: + import re, torch + from transformers import set_seed + + set_seed(42) # Deterministic output + + # Construct prompt + prompt = f"{QUESTION}\n\n{description}\n\n{PREFIX}" + inputs = self.tokenizer.encode(prompt, return_tensors="pt").to("cuda") + attention_mask = torch.ones(inputs.shape, device="cuda") + + # Generate model output (max 5 tokens) + outputs = self.fine_tuned_model.generate( + inputs, + attention_mask=attention_mask, + max_new_tokens=5, + num_return_sequences=1 + ) + result = self.tokenizer.decode(outputs[0]) + + # Extract number after "Price is $" + contents = result.split("Price is $")[1] + contents = contents.replace(',', '') + match = re.search(r"[-+]?\d*\.\d+|\d+", contents) + return float(match.group()) if match else 0 # Return parsed price or 0 if not found + + diff --git a/week8/community_contributions/lisekarimi/modal_services/get_started.py b/week8/community_contributions/lisekarimi/modal_services/get_started.py new file mode 100644 index 0000000..510d7ad --- /dev/null +++ b/week8/community_contributions/lisekarimi/modal_services/get_started.py @@ -0,0 +1,12 @@ +import sys, modal + +app = modal.App("example-hello-world") + +@app.function() +def f(i: int) -> int: + if i % 2 == 0: + print("hello", i) + else: + print("world", i, file=sys.stderr) + + return i * i diff --git a/week8/day2.0.ipynb b/week8/day2.0.ipynb index 553880e..4f3b049 100644 --- a/week8/day2.0.ipynb +++ b/week8/day2.0.ipynb @@ -44,7 +44,6 @@ "from sentence_transformers import SentenceTransformer\n", "from datasets import load_dataset\n", "import chromadb\n", - "from items import Item\n", "from sklearn.manifold import TSNE\n", "import plotly.graph_objects as go" ] @@ -77,6 +76,18 @@ "login(hf_token, add_to_git_credential=True)" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "8491f550-df4a-4c8f-a260-a7a419e8efb6", + "metadata": {}, + "outputs": [], + "source": [ + "# Another import after Logging in to Hugging Face - thank you Trung N.!\n", + "\n", + "from items import Item" + ] + }, { "cell_type": "markdown", "id": "3d4995a4-f67f-4871-87df-8c6439b06366", diff --git a/week8/day2.1.ipynb b/week8/day2.1.ipynb index fac26d8..3151540 100644 --- a/week8/day2.1.ipynb +++ b/week8/day2.1.ipynb @@ -44,7 +44,6 @@ "from sentence_transformers import SentenceTransformer\n", "from datasets import load_dataset\n", "import chromadb\n", - "from items import Item\n", "from sklearn.manifold import TSNE\n", "import plotly.graph_objects as go" ] @@ -174,7 +173,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.11" + "version": "3.11.12" } }, "nbformat": 4, diff --git a/week8/day2.2.ipynb b/week8/day2.2.ipynb index f55ae2a..eebe634 100644 --- a/week8/day2.2.ipynb +++ b/week8/day2.2.ipynb @@ -44,7 +44,6 @@ "from sentence_transformers import SentenceTransformer\n", "from datasets import load_dataset\n", "import chromadb\n", - "from items import Item\n", "from sklearn.manifold import TSNE\n", "import plotly.graph_objects as go" ] @@ -166,7 +165,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.11" + "version": "3.11.12" } }, "nbformat": 4, diff --git a/week8/day2.3.ipynb b/week8/day2.3.ipynb index b607e45..c2eeb34 100644 --- a/week8/day2.3.ipynb +++ b/week8/day2.3.ipynb @@ -48,7 +48,6 @@ "from sentence_transformers import SentenceTransformer\n", "from datasets import load_dataset\n", "import chromadb\n", - "from items import Item\n", "from testing import Tester" ] }, @@ -66,6 +65,31 @@ "os.environ['HF_TOKEN'] = os.getenv('HF_TOKEN', 'your-key-if-not-using-env')" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "ce73b034-9ec1-4533-ba41-3e57c7878b61", + "metadata": {}, + "outputs": [], + "source": [ + "# Log in to HuggingFace\n", + "\n", + "hf_token = os.environ['HF_TOKEN']\n", + "login(hf_token, add_to_git_credential=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4c01daad-86b0-4bc0-91ba-20a64df043ed", + "metadata": {}, + "outputs": [], + "source": [ + "# Another import after Logging in to Hugging Face - thank you Trung N.!\n", + "\n", + "from items import Item" + ] + }, { "cell_type": "code", "execution_count": null, @@ -495,7 +519,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.11" + "version": "3.11.12" } }, "nbformat": 4, diff --git a/week8/day2.4.ipynb b/week8/day2.4.ipynb index 90bff83..c315c78 100644 --- a/week8/day2.4.ipynb +++ b/week8/day2.4.ipynb @@ -84,6 +84,31 @@ "os.environ['HF_TOKEN'] = os.getenv('HF_TOKEN', 'your-key-if-not-using-env')" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "1006966f-96b7-4e1a-93f0-2bb9a09057c8", + "metadata": {}, + "outputs": [], + "source": [ + "# Log in to HuggingFace\n", + "\n", + "hf_token = os.environ['HF_TOKEN']\n", + "login(hf_token, add_to_git_credential=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "de0e4b22-ee61-4b79-95bc-3cd707d5f83d", + "metadata": {}, + "outputs": [], + "source": [ + "# Another import after Logging in to Hugging Face - thank you Trung N.!\n", + "\n", + "from items import Item" + ] + }, { "cell_type": "code", "execution_count": null, diff --git a/week8/day4.ipynb b/week8/day4.ipynb index bb4c993..22385a8 100644 --- a/week8/day4.ipynb +++ b/week8/day4.ipynb @@ -78,7 +78,7 @@ " \n", " \n", "

Additional resource: more sophisticated planning agent

\n", - " The Planning Agent that we use in the next cell is simply a python script that calls the other Agents; frankly that's all we require for this project. But if you're intrigued to see a more Autonomous version in which we give the Planning Agent tools and allow it to decide which Agents to call, see my implementation of AutonomousPlanningAgent in my related repo, Agentic. This is an example with multiple tools that dynamically decides which function to call.\n", + " The Planning Agent that we use in the next cell is simply a python script that calls the other Agents; frankly that's all we require for this project. But if you're intrigued to see a more Autonomous version in which we give the Planning Agent tools and allow it to decide which Agents to call, see my implementation of AutonomousPlanningAgent in my related repo, Agentic. This is an example with multiple tools that dynamically decides which function to call.\n", " \n", " \n", " \n", @@ -144,7 +144,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.11" + "version": "3.11.12" } }, "nbformat": 4,