From 3e0d6de0e1986d5cc6eb61f75740a992fc196a1e Mon Sep 17 00:00:00 2001 From: Ransford Okpoti Date: Tue, 28 Oct 2025 21:27:57 +0000 Subject: [PATCH] supervised fine-tuning with openai --- .../ranskills-week6-fine-tuning-openai.ipynb | 2520 +++++++++++++++++ 1 file changed, 2520 insertions(+) create mode 100644 week6/community-contributions/ranskills-week6-fine-tuning-openai.ipynb diff --git a/week6/community-contributions/ranskills-week6-fine-tuning-openai.ipynb b/week6/community-contributions/ranskills-week6-fine-tuning-openai.ipynb new file mode 100644 index 0000000..a73f678 --- /dev/null +++ b/week6/community-contributions/ranskills-week6-fine-tuning-openai.ipynb @@ -0,0 +1,2520 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "41fb78a4-5aa1-4288-9cc2-6f742062f0a3", + "metadata": { + "id": "41fb78a4-5aa1-4288-9cc2-6f742062f0a3" + }, + "source": [ + "# Fine Tuning with Frontier Models" + ] + }, + { + "cell_type": "markdown", + "id": "f8d0713f-0f79-460f-8acb-47afb877d24a", + "metadata": { + "id": "f8d0713f-0f79-460f-8acb-47afb877d24a", + "jp-MarkdownHeadingCollapsed": true + }, + "source": [ + "## Utility" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2cdfe762-3200-4459-981e-0ded7c14b4de", + "metadata": { + "id": "2cdfe762-3200-4459-981e-0ded7c14b4de" + }, + "outputs": [], + "source": [ + "# Constants - used for printing to stdout in color\n", + "\n", + "GREEN = \"\\033[92m\"\n", + "YELLOW = \"\\033[93m\"\n", + "RED = \"\\033[91m\"\n", + "RESET = \"\\033[0m\"\n", + "COLOR_MAP = {\"red\":RED, \"orange\": YELLOW, \"green\": GREEN}" + ] + }, + { + "cell_type": "markdown", + "id": "d9f325d5-fb67-475c-aca0-01c0f0ea5ec1", + "metadata": { + "id": "d9f325d5-fb67-475c-aca0-01c0f0ea5ec1", + "jp-MarkdownHeadingCollapsed": true + }, + "source": [ + "### Item" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0832e74b-2779-4822-8e6c-4361ec165c7f", + "metadata": { + "id": "0832e74b-2779-4822-8e6c-4361ec165c7f" + }, + "outputs": [], + "source": [ + "from typing import Optional\n", + "from transformers import AutoTokenizer\n", + "import re\n", + "\n", + "BASE_MODEL = \"meta-llama/Meta-Llama-3.1-8B\"\n", + "\n", + "MIN_TOKENS = 150 # Any less than this, and we don't have enough useful content\n", + "MAX_TOKENS = 160 # Truncate after this many tokens. Then after adding in prompt text, we will get to around 180 tokens\n", + "\n", + "MIN_CHARS = 300\n", + "CEILING_CHARS = MAX_TOKENS * 7\n", + "\n", + "class Item:\n", + " \"\"\"\n", + " An Item is a cleaned, curated datapoint of a Product with a Price\n", + " \"\"\"\n", + "\n", + " tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code=True)\n", + " PREFIX = \"Price is $\"\n", + " QUESTION = \"How much does this cost to the nearest dollar?\"\n", + " REMOVALS = ['\"Batteries Included?\": \"No\"', '\"Batteries Included?\": \"Yes\"', '\"Batteries Required?\": \"No\"', '\"Batteries Required?\": \"Yes\"', \"By Manufacturer\", \"Item\", \"Date First\", \"Package\", \":\", \"Number of\", \"Best Sellers\", \"Number\", \"Product \"]\n", + "\n", + " title: str\n", + " price: float\n", + " category: str\n", + " token_count: int = 0\n", + " details: Optional[str]\n", + " prompt: Optional[str] = None\n", + " include = False\n", + "\n", + " def __init__(self, data, price):\n", + " self.title = data['title']\n", + " self.price = price\n", + " self.parse(data)\n", + "\n", + " def scrub_details(self):\n", + " \"\"\"\n", + " Clean up the details string by removing common text that doesn't add value\n", + " \"\"\"\n", + " details = self.details\n", + " for remove in self.REMOVALS:\n", + " details = details.replace(remove, \"\")\n", + " return details\n", + "\n", + " def scrub(self, stuff):\n", + " \"\"\"\n", + " Clean up the provided text by removing unnecessary characters and whitespace\n", + " Also remove words that are 7+ chars and contain numbers, as these are likely irrelevant product numbers\n", + " \"\"\"\n", + " stuff = re.sub(r'[:\\[\\]\"{}【】\\s]+', ' ', stuff).strip()\n", + " stuff = stuff.replace(\" ,\", \",\").replace(\",,,\",\",\").replace(\",,\",\",\")\n", + " words = stuff.split(' ')\n", + " select = [word for word in words if len(word)<7 or not any(char.isdigit() for char in word)]\n", + " return \" \".join(select)\n", + "\n", + " def parse(self, data):\n", + " \"\"\"\n", + " Parse this datapoint and if it fits within the allowed Token range,\n", + " then set include to True\n", + " \"\"\"\n", + " contents = '\\n'.join(data['description'])\n", + " if contents:\n", + " contents += '\\n'\n", + " features = '\\n'.join(data['features'])\n", + " if features:\n", + " contents += features + '\\n'\n", + " self.details = data['details']\n", + " if self.details:\n", + " contents += self.scrub_details() + '\\n'\n", + " if len(contents) > MIN_CHARS:\n", + " contents = contents[:CEILING_CHARS]\n", + " text = f\"{self.scrub(self.title)}\\n{self.scrub(contents)}\"\n", + " tokens = self.tokenizer.encode(text, add_special_tokens=False)\n", + " if len(tokens) > MIN_TOKENS:\n", + " tokens = tokens[:MAX_TOKENS]\n", + " text = self.tokenizer.decode(tokens)\n", + " self.make_prompt(text)\n", + " self.include = True\n", + "\n", + " def make_prompt(self, text):\n", + " \"\"\"\n", + " Set the prompt instance variable to be a prompt appropriate for training\n", + " \"\"\"\n", + " self.prompt = f\"{self.QUESTION}\\n\\n{text}\\n\\n\"\n", + " self.prompt += f\"{self.PREFIX}{str(round(self.price))}.00\"\n", + " self.token_count = len(self.tokenizer.encode(self.prompt, add_special_tokens=False))\n", + "\n", + " def test_prompt(self):\n", + " \"\"\"\n", + " Return a prompt suitable for testing, with the actual price removed\n", + " \"\"\"\n", + " return self.prompt.split(self.PREFIX)[0] + self.PREFIX\n", + "\n", + " def __repr__(self):\n", + " \"\"\"\n", + " Return a String version of this Item\n", + " \"\"\"\n", + " return f\"<{self.title} = ${self.price}>\"\n" + ] + }, + { + "cell_type": "markdown", + "id": "LaIwYGzItsEi", + "metadata": { + "id": "LaIwYGzItsEi" + }, + "source": [ + "### Tester" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "129470d7-a5b1-4851-8800-970cccc8bcf5", + "metadata": { + "id": "129470d7-a5b1-4851-8800-970cccc8bcf5" + }, + "outputs": [], + "source": [ + "class Tester:\n", + "\n", + " def __init__(self, predictor, data, title=None, size=250):\n", + " self.predictor = predictor\n", + " self.data = data\n", + " self.title = title or predictor.__name__.replace(\"_\", \" \").title()\n", + " self.size = size\n", + " self.guesses = []\n", + " self.truths = []\n", + " self.errors = []\n", + " self.sles = []\n", + " self.colors = []\n", + "\n", + " def color_for(self, error, truth):\n", + " if error<40 or error/truth < 0.2:\n", + " return \"green\"\n", + " elif error<80 or error/truth < 0.4:\n", + " return \"orange\"\n", + " else:\n", + " return \"red\"\n", + "\n", + " def run_datapoint(self, i):\n", + " datapoint = self.data[i]\n", + " guess = self.predictor(datapoint)\n", + " truth = datapoint.price\n", + " error = abs(guess - truth)\n", + " log_error = math.log(truth+1) - math.log(guess+1)\n", + " sle = log_error ** 2\n", + " color = self.color_for(error, truth)\n", + " title = datapoint.title if len(datapoint.title) <= 40 else datapoint.title[:40]+\"...\"\n", + " self.guesses.append(guess)\n", + " self.truths.append(truth)\n", + " self.errors.append(error)\n", + " self.sles.append(sle)\n", + " self.colors.append(color)\n", + " print(f\"{COLOR_MAP[color]}{i+1}: Guess: ${guess:,.2f} Truth: ${truth:,.2f} Error: ${error:,.2f} SLE: {sle:,.2f} Item: {title}{RESET}\")\n", + "\n", + " def chart(self, title):\n", + " max_error = max(self.errors)\n", + " plt.figure(figsize=(12, 8))\n", + " max_val = max(max(self.truths), max(self.guesses))\n", + " plt.plot([0, max_val], [0, max_val], color='deepskyblue', lw=2, alpha=0.6)\n", + " plt.scatter(self.truths, self.guesses, s=3, c=self.colors)\n", + " plt.xlabel('Ground Truth')\n", + " plt.ylabel('Model Estimate')\n", + " plt.xlim(0, max_val)\n", + " plt.ylim(0, max_val)\n", + " plt.title(title)\n", + " plt.show()\n", + "\n", + " def report(self):\n", + " average_error = sum(self.errors) / self.size\n", + " rmsle = math.sqrt(sum(self.sles) / self.size)\n", + " hits = sum(1 for color in self.colors if color==\"green\")\n", + " title = f\"{self.title} Error=${average_error:,.2f} RMSLE={rmsle:,.2f} Hits={hits/self.size*100:.1f}%\"\n", + " self.chart(title)\n", + "\n", + " def run(self):\n", + " self.error = 0\n", + " for i in range(self.size):\n", + " self.run_datapoint(i)\n", + " self.report()\n", + "\n", + " @classmethod\n", + " def test(cls, function, data):\n", + " cls(function, data).run()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6XywRUiUro69", + "metadata": { + "id": "6XywRUiUro69" + }, + "outputs": [], + "source": [ + "# A utility function to extract the price from a string\n", + "\n", + "def get_price(s):\n", + " s = s.replace('$','').replace(',','')\n", + " match = re.search(r'[-+]?\\d*\\.?\\d+', s) # Simplify regex\n", + " return float(match.group()) if match else 0" + ] + }, + { + "cell_type": "markdown", + "id": "10af1228-30b7-4dfc-a364-059ea099af81", + "metadata": { + "id": "10af1228-30b7-4dfc-a364-059ea099af81" + }, + "source": [ + "## Data Curation" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5faa087c-bdf7-42e5-9c32-c0b0a4d4160f", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "5faa087c-bdf7-42e5-9c32-c0b0a4d4160f", + "outputId": "b21530be-718f-4bed-aa23-16227f8a92c0" + }, + "outputs": [], + "source": [ + "%pip install --upgrade --quiet jupyterlab ipython ipywidgets huggingface_hub datasets transformers\n", + "\n", + "%matplotlib notebook\n" + ] + }, + { + "cell_type": "markdown", + "id": "3XTxVhq0xC8Z", + "metadata": { + "id": "3XTxVhq0xC8Z" + }, + "source": [ + "### Load from Hugging Face" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2bd6fc25-77c4-47a6-a2d2-ce80403f3c22", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 113, + "referenced_widgets": [ + "cb25b81f26f14748a0496f1dcb8a4079", + "f902469b330c44ecb0b5a27892543a62", + "d4a96323719e4b19877dab1f20df76c2", + "77adf1d23ac84b0d9465afdd5d604f12", + "8549a500ad1e46cdbc8e839bec7fb2d2", + "0e5f4a86c9fb497198dba931ae3f5e34", + "6f4d626b3d414744a420da9e2af420f7", + "801e742fd5a94edbb698d80266ff0a12", + "675e6aa4d44148afb3f8f6c55e94807b", + "c29c01940e1040af8ac9d4c2d9a5d4e5", + "9a6cf9fb89184f3db6f4524533500025", + "7cba9a5cce3a4b899c8bfd0a3c6c8413", + "3e2642a0bb8a48bb8861252b8d221893", + "c0ec87cef19e49989d6379ea7e63e7fd", + "a781882db7ad49a0ae8cd4614754d2b6", + "c45b2735d3864dd9ab87628bea74f618", + "8dde01b656894092bb33d691e4dbe49b", + "4b5b7e98540840c69fdda8e3a9ce8947", + "5a642ca6bc8b4a52b83e9ce708f7561a", + "eff7608645cc450f9dc5a69bc96839c8", + "82ce2f089eba44e5ba17db5afc1729e7", + "7ec30523eb084f1abdd5002173780c15", + "796cb9a1e7154922a9235070b4eb0e83", + "58aed351dc524306909d796fb7a4b511", + "d55b5af25c5f4eb2bce121ea61811ebe", + "a30ce766b7444d9d9910a64538eee263", + "07d582edd37f41298d6be880b2c09fac", + "e5f3117210224c008ef84f8879437510", + "a525cd4b3b794a208d634891ffb1d334", + "ed265ba4024b45d9a3b6fd5cbb2a00c6", + "14dcae832c584937b0dc7bc7e17f7517", + "66ced28daca8492b8eeabfbcc62eec0f", + "7da27317676b462c9ba143f47b336cab" + ] + }, + "id": "2bd6fc25-77c4-47a6-a2d2-ce80403f3c22", + "outputId": "eed4636b-5c2a-4f7d-8283-af38b6baa213" + }, + "outputs": [], + "source": [ + "from datasets import load_dataset, Dataset, DatasetDict\n", + "from transformers import AutoTokenizer\n", + "\n", + "\n", + "dataset = load_dataset('ranskills/Amazon-Reviews-2023-raw_meta_All_Beauty', split='full')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b66b59c2-80b2-4d47-b739-c59423cf9d7d", + "metadata": { + "id": "b66b59c2-80b2-4d47-b739-c59423cf9d7d" + }, + "outputs": [], + "source": [ + "from IPython.display import display, JSON\n", + "\n", + "\n", + "print(f'Number of datapoints: {dataset.num_rows:,}')\n", + "display(JSON(dataset.features.to_dict()))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e9620ed3-205e-48ee-b67a-e56b30bf6b6b", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 67, + "referenced_widgets": [ + "fa83d76f75034101a2a531b3244ed61b", + "240f07ca195141bd973b34ac7ff5bc69", + "191aeaabe850445d925fc686a5030919", + "f30dd057a4394b1bb555427244efc8be", + "225e7ee50f624dc3bbaf682e1140e0d4", + "886cf1b0c3464afbbaf5c6a9a8d383f0", + "3c4f8e4a71b2405080eecc702fa51091", + "bb5cc6df3f1a4ab6add74bd40b654d2f", + "974e53ceb3bf4739bf8d5e497b9a58f1", + "1abdc3eabae647a784faf50ca04a4664", + "36f1ab21cb4842f5bb5ece32772ff57b" + ] + }, + "id": "e9620ed3-205e-48ee-b67a-e56b30bf6b6b", + "outputId": "c573a50b-b6b0-42a3-b3e6-97bff7a1c872" + }, + "outputs": [], + "source": [ + "def non_zero_price_filter(datapoint: dict):\n", + " try:\n", + " price = float(datapoint['price'])\n", + " return price > 0\n", + " except:\n", + " return False\n", + "\n", + "filtered_dataset = dataset.filter(non_zero_price_filter)\n", + "\n", + "print(f'Prices with non-zero prices:{filtered_dataset.num_rows:,} = {filtered_dataset.num_rows / dataset.num_rows * 100:,.2f}%')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "834a3c4b-fc9c-4bc7-b6b9-bdf7e8d6d585", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "id": "834a3c4b-fc9c-4bc7-b6b9-bdf7e8d6d585", + "outputId": "5e2a46bb-3ca1-4727-9293-62877e0161b8" + }, + "outputs": [], + "source": [ + "from collections import defaultdict\n", + "\n", + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "\n", + "\n", + "data = defaultdict(lambda: [])\n", + "for datapoint in filtered_dataset:\n", + " price = float(datapoint['price'])\n", + " contents = datapoint[\"title\"] + str(datapoint[\"description\"]) + str(datapoint[\"features\"]) + str(datapoint[\"details\"])\n", + "\n", + " data['price'].append(price)\n", + " data['characters'].append(len(contents))\n", + "\n", + "%matplotlib inline\n", + "\n", + "df = pd.DataFrame(data)\n", + "\n", + "combined_describe = pd.concat(\n", + " [df['price'].describe(), df['characters'].describe()],\n", + " axis=1\n", + ")\n", + "\n", + "display(combined_describe)\n", + "\n", + "prices = data['price']\n", + "lengths = data['characters']\n", + "\n", + "plt.figure(figsize=(15, 6))\n", + "plt.title(f\"Prices: Avg {df['price'].mean():,.2f} and highest {df['price'].max():,}\\n\")\n", + "plt.xlabel('Length (chars)')\n", + "plt.ylabel('Count')\n", + "plt.hist(prices, rwidth=0.7, color=\"orange\", bins=range(0, 300, 10))\n", + "plt.show()\n", + "\n", + "plt.figure(figsize=(15, 6))\n", + "plt.title(f\"Characters: Avg {sum(lengths)/len(lengths):,.0f} and highest {max(lengths):,}\\n\")\n", + "plt.xlabel('Length (characters)')\n", + "plt.ylabel('Count')\n", + "plt.hist(lengths, rwidth=0.7, color=\"lightblue\", bins=range(0, 2500, 50))\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a506f42c-81c0-4198-bc0b-1e0653620be8", + "metadata": { + "id": "a506f42c-81c0-4198-bc0b-1e0653620be8" + }, + "outputs": [], + "source": [ + "BASE_MODEL = 'meta-llama/Meta-Llama-3.1-8B'\n", + "tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)\n", + "\n", + "tokenizer.encode('114', add_special_tokens=False)\n", + "\n", + "items = []\n", + "for datapoint in filtered_dataset:\n", + " price = float(datapoint['price'])\n", + " items.append(Item(datapoint, price))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5842ace6-332d-46da-a853-5ea5a2a1cf88", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "5842ace6-332d-46da-a853-5ea5a2a1cf88", + "outputId": "172d3a7e-5f0f-4424-ddcd-1ed909e9e02c" + }, + "outputs": [], + "source": [ + "print(items[0].test_prompt())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "42ee0099-0d2a-4331-a01c-3462363a6987", + "metadata": { + "id": "42ee0099-0d2a-4331-a01c-3462363a6987" + }, + "outputs": [], + "source": [ + "# filter out items with None prompt as a result of their content being below the minimum threshold\n", + "valid_items = [item for item in items if item.prompt is not None]\n", + "\n", + "data_size = len(valid_items)\n", + "\n", + "\n", + "training_size = int(data_size * 0.9)\n", + "train = valid_items[:training_size]\n", + "test = valid_items[training_size:]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1146d5a2-f93e-4fe9-864e-4ce7e01e257b", + "metadata": { + "id": "1146d5a2-f93e-4fe9-864e-4ce7e01e257b" + }, + "outputs": [], + "source": [ + "train_prompts = [item.prompt for item in train]\n", + "train_prices = [item.price for item in train]\n", + "test_prompts = [item.test_prompt() for item in test]\n", + "test_prices = [item.price for item in test]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "31ca360d-5fc6-487a-91c6-d61758b2ff16", + "metadata": { + "id": "31ca360d-5fc6-487a-91c6-d61758b2ff16" + }, + "outputs": [], + "source": [ + "# Create a Dataset from the lists\n", + "\n", + "train_dataset = Dataset.from_dict({\"text\": train_prompts, \"price\": train_prices})\n", + "test_dataset = Dataset.from_dict({\"text\": test_prompts, \"price\": test_prices})\n", + "dataset = DatasetDict({\n", + " \"train\": train_dataset,\n", + " \"test\": test_dataset\n", + "})" + ] + }, + { + "cell_type": "markdown", + "id": "05e6ca7e-bf40-49f9-bffb-a5b22e5800d8", + "metadata": { + "id": "05e6ca7e-bf40-49f9-bffb-a5b22e5800d8" + }, + "source": [ + "### Export Data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0ff2fe3-78bf-49e3-a682-6a46742d010c", + "metadata": { + "id": "b0ff2fe3-78bf-49e3-a682-6a46742d010c" + }, + "outputs": [], + "source": [ + "import pickle\n", + "\n", + "DATA_DIR = 'data'\n", + "\n", + "train_storage_file = lambda ext: f'{DATA_DIR}/all_beauty_train{ext}'\n", + "test_storage_file = lambda ext: f'{DATA_DIR}/all_beauty_test{ext}'\n", + "\n", + "with open(train_storage_file('.pkl'), 'wb') as file:\n", + " pickle.dump(train, file)\n", + "\n", + "with open(test_storage_file('.pkl'), 'wb') as file:\n", + " pickle.dump(test, file)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b2164662-9bc9-4a66-9e4e-a8a955a45753", + "metadata": { + "id": "b2164662-9bc9-4a66-9e4e-a8a955a45753", + "outputId": "7bd7ff39-93d6-4886-f223-22fc36634828" + }, + "outputs": [], + "source": [ + "dataset['train'].to_parquet(train_storage_file('.parquet'))\n", + "dataset['test'].to_parquet(test_storage_file('.parquet'))\n", + "\n", + "# How to load back the data\n", + "# loaded_dataset = load_dataset(\"parquet\", data_files='amazon_polarity_train.parquet')" + ] + }, + { + "cell_type": "markdown", + "id": "6fe428a2-41c4-4f7f-a43f-e8ba2f344013", + "metadata": { + "id": "6fe428a2-41c4-4f7f-a43f-e8ba2f344013" + }, + "source": [ + "### Predictions" + ] + }, + { + "cell_type": "markdown", + "id": "qX0c_prppnyZ", + "metadata": { + "id": "qX0c_prppnyZ" + }, + "source": [ + "#### Random Pricer" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7323252b-db50-4b8a-a7fc-8504bb3d218b", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "id": "7323252b-db50-4b8a-a7fc-8504bb3d218b", + "outputId": "5a2d6306-9709-4028-a83c-33170c629739" + }, + "outputs": [], + "source": [ + "import random\n", + "import math\n", + "\n", + "\n", + "def random_pricer(item):\n", + " return random.randrange(1,200)\n", + "\n", + "random.seed(42)\n", + "\n", + "# Run our TestRunner\n", + "Tester.test(random_pricer, test)" + ] + }, + { + "cell_type": "markdown", + "id": "O0xVXRXkp9sQ", + "metadata": { + "id": "O0xVXRXkp9sQ" + }, + "source": [ + "#### Constant Pricer" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6a932b0e-ba6e-45d2-8436-b740c3681272", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "id": "6a932b0e-ba6e-45d2-8436-b740c3681272", + "outputId": "d6ee472e-7a10-4eac-ce5f-3ddd755f4f21" + }, + "outputs": [], + "source": [ + "training_prices = [item.price for item in train]\n", + "training_average = sum(training_prices) / len(training_prices)\n", + "\n", + "def constant_pricer(item):\n", + " return training_average\n", + "\n", + "Tester.test(constant_pricer, test)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d3410bd4-98e4-42a6-a702-4423cfd034b4", + "metadata": { + "id": "d3410bd4-98e4-42a6-a702-4423cfd034b4", + "outputId": "2e57a75a-4873-4207-e28f-9d51a6359e56" + }, + "outputs": [], + "source": [ + "train[0].details" + ] + }, + { + "cell_type": "markdown", + "id": "44537051-7b4e-4b8c-95a7-a989ea51e517", + "metadata": { + "id": "44537051-7b4e-4b8c-95a7-a989ea51e517" + }, + "source": [ + "### Prepare Fine-Tuning Data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "47d03b0b-4a93-4f9d-80ac-10f3fc11ccec", + "metadata": { + "id": "47d03b0b-4a93-4f9d-80ac-10f3fc11ccec" + }, + "outputs": [], + "source": [ + "fine_tune_train = train[:100]\n", + "fine_tune_validation = train[100:125]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4d7b6f35-890c-4227-8990-6b62694a332d", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "4d7b6f35-890c-4227-8990-6b62694a332d", + "outputId": "43deeec1-bde8-4651-9ce8-6c4b4aa39f8a" + }, + "outputs": [], + "source": [ + "def messages_for(item):\n", + " system_message = \"You estimate prices of items. Reply only with the price, no explanation\"\n", + " user_prompt = item.test_prompt().replace(\" to the nearest dollar\",\"\").replace(\"\\n\\nPrice is $\",\"\")\n", + " return [\n", + " {\"role\": \"system\", \"content\": system_message},\n", + " {\"role\": \"user\", \"content\": user_prompt},\n", + " {\"role\": \"assistant\", \"content\": f\"Price is ${item.price:.2f}\"}\n", + " ]\n", + "\n", + "messages_for(train[0])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1a6e06f3-614f-4687-bd43-9ac03aaface8", + "metadata": { + "id": "1a6e06f3-614f-4687-bd43-9ac03aaface8" + }, + "outputs": [], + "source": [ + "import json\n", + "from pathlib import Path\n", + "DATA_DIR = 'data'\n", + "\n", + "data_path = Path(DATA_DIR)\n", + "\n", + "def make_jsonl(items):\n", + " result = \"\"\n", + " for item in items:\n", + " messages = messages_for(item)\n", + " messages_str = json.dumps(messages)\n", + " result += '{\"messages\": ' + messages_str +'}\\n'\n", + " return result.strip()\n", + "\n", + "# print(make_jsonl(train[:3]))\n", + "data_path.absolute()\n", + "if not data_path.exists():\n", + " data_path.mkdir(parents=True)\n", + "\n", + "\n", + "\n", + "train_jsonl_path = f'{data_path}/pricer_train.jsonl'\n", + "validation_jsonl_path = f'{data_path}/pricer_validation.jsonl'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d8dda552-8003-4fdc-b36a-7d0afa9b0b42", + "metadata": { + "id": "d8dda552-8003-4fdc-b36a-7d0afa9b0b42" + }, + "outputs": [], + "source": [ + "def write_jsonl(items, filename):\n", + " with open(filename, \"w\") as f:\n", + " jsonl = make_jsonl(items)\n", + " f.write(jsonl)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "189e959c-d70c-4509-bff6-1cbd8e8db637", + "metadata": { + "id": "189e959c-d70c-4509-bff6-1cbd8e8db637" + }, + "outputs": [], + "source": [ + "\n", + "write_jsonl(fine_tune_train, train_jsonl_path)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6b1480e2-ed19-4d0e-bc5d-a00086d104a2", + "metadata": { + "id": "6b1480e2-ed19-4d0e-bc5d-a00086d104a2" + }, + "outputs": [], + "source": [ + "write_jsonl(fine_tune_validation, validation_jsonl_path)" + ] + }, + { + "cell_type": "markdown", + "id": "ga-f4JK7sPU2", + "metadata": { + "id": "ga-f4JK7sPU2" + }, + "source": [ + "## Training" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "de958a51-69ba-420c-84b7-d32765898fd2", + "metadata": { + "id": "de958a51-69ba-420c-84b7-d32765898fd2" + }, + "outputs": [], + "source": [ + "import os\n", + "from openai import OpenAI\n", + "from dotenv import load_dotenv\n", + "from google.colab import userdata\n", + "\n", + "load_dotenv()\n", + "os.environ['OPENAI_API_KEY'] = userdata.get('OPENAI_API_KEY')\n", + "\n", + "openai = OpenAI()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "QFDAoNnoRCk1", + "metadata": { + "id": "QFDAoNnoRCk1" + }, + "outputs": [], + "source": [ + "with open(train_jsonl_path, 'rb') as f:\n", + " train_file = openai.files.create(file=f, purpose='fine-tune')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "kBVWisusQwDq", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "kBVWisusQwDq", + "outputId": "79dbe38a-cb76-4b8d-bd13-95b2f5ed8270" + }, + "outputs": [], + "source": [ + "train_file" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "wgth1KvMSEOb", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "wgth1KvMSEOb", + "outputId": "9191cae8-ae86-4db1-b7cb-02c53add139c" + }, + "outputs": [], + "source": [ + "with open(validation_jsonl_path, 'rb') as f:\n", + " validation_file = openai.files.create(file=f, purpose='fine-tune')\n", + "\n", + "validation_file" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "-ohEia37Sjtx", + "metadata": { + "id": "-ohEia37Sjtx" + }, + "outputs": [], + "source": [ + "wandb_integration = {\"type\": \"wandb\", \"wandb\": {\"project\": \"gpt-pricer\"}}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "g7uz8SC5S3_s", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "g7uz8SC5S3_s", + "outputId": "751e5087-8dcb-4dcb-f9eb-5b95f28c828c" + }, + "outputs": [], + "source": [ + "openai.fine_tuning.jobs.create(\n", + " training_file=train_file.id,\n", + " validation_file=validation_file.id,\n", + " model=\"gpt-4o-mini-2024-07-18\",\n", + " seed=42,\n", + " hyperparameters={\"n_epochs\": 1},\n", + " integrations = [wandb_integration],\n", + " suffix=\"pricer\"\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "_zHswJwzWCHZ", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "_zHswJwzWCHZ", + "outputId": "a6899370-4332-4445-d2fd-b0e9a3140d78" + }, + "outputs": [], + "source": [ + "openai.fine_tuning.jobs.list(limit=1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "rSHYkQojWH8Q", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 35 + }, + "id": "rSHYkQojWH8Q", + "outputId": "a52772de-12bb-460c-bed7-2ef2d21892ee" + }, + "outputs": [], + "source": [ + "job_id = openai.fine_tuning.jobs.list(limit=1).data[0].id\n", + "job_id" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "Yqq-jd1yWMuO", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Yqq-jd1yWMuO", + "outputId": "0a758e9e-d7e5-4c44-fcef-5969352729f9" + }, + "outputs": [], + "source": [ + "openai.fine_tuning.jobs.retrieve(job_id)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "37BH0u-QWOiY", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "37BH0u-QWOiY", + "outputId": "7d8b3ff3-82db-47c9-80c3-b3c1de60e469" + }, + "outputs": [], + "source": [ + "openai.fine_tuning.jobs.list_events(fine_tuning_job_id=job_id, limit=10).data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2nNSE_AzWYMq", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 955 + }, + "id": "2nNSE_AzWYMq", + "outputId": "ac6904ff-28f8-4c72-a064-8c1b72437e49" + }, + "outputs": [], + "source": [ + "import wandb\n", + "from wandb.integration.openai.fine_tuning import WandbLogger\n", + "\n", + "# Log in to Weights & Biases.\n", + "wandb.login()\n", + "# Sync the fine-tuning job with Weights & Biases.\n", + "WandbLogger.sync(fine_tune_job_id=job_id, project=\"gpt-pricer\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ASiJUw-Fh8Ul", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 35 + }, + "id": "ASiJUw-Fh8Ul", + "outputId": "037b1430-cffe-4c4d-b0a6-38262f1fecd4" + }, + "outputs": [], + "source": [ + "fine_tuned_model_name = openai.fine_tuning.jobs.retrieve(job_id).fine_tuned_model\n", + "fine_tuned_model_name" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7jB_7gqBiH_r", + "metadata": { + "id": "7jB_7gqBiH_r" + }, + "outputs": [], + "source": [ + "def messages_for(item):\n", + " system_message = \"You estimate prices of items. Reply only with the price, no explanation\"\n", + " user_prompt = item.test_prompt().replace(\" to the nearest dollar\",\"\").replace(\"\\n\\nPrice is $\",\"\")\n", + " return [\n", + " {\"role\": \"system\", \"content\": system_message},\n", + " {\"role\": \"user\", \"content\": user_prompt},\n", + " {\"role\": \"assistant\", \"content\": \"Price is $\"}\n", + " ]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "BHfLSadhiVQE", + "metadata": { + "id": "BHfLSadhiVQE" + }, + "outputs": [], + "source": [ + "# The function for gpt-4o-mini\n", + "\n", + "def gpt_fine_tuned(item):\n", + " response = openai.chat.completions.create(\n", + " model=fine_tuned_model_name,\n", + " messages=messages_for(item),\n", + " seed=42,\n", + " max_tokens=7\n", + " )\n", + " reply = response.choices[0].message.content\n", + " return get_price(reply)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "C0CiTZ4jkjrI", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "C0CiTZ4jkjrI", + "outputId": "640299e8-ebeb-4562-bcd5-d3bab726e557" + }, + "outputs": [], + "source": [ + "print(test[0].price)\n", + "print(gpt_fine_tuned(test[0]))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "WInQE0ObkuBl", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "id": "WInQE0ObkuBl", + "outputId": "bdff7207-6ecf-489a-c231-ed44a131967a" + }, + "outputs": [], + "source": [ + "Tester.test(gpt_fine_tuned, test)" + ] + } + ], + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "display_name": "sagemaker-distribution:Python", + "language": "python", + "name": "conda-env-sagemaker-distribution-py" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.9" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "07d582edd37f41298d6be880b2c09fac": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "0e5f4a86c9fb497198dba931ae3f5e34": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "14dcae832c584937b0dc7bc7e17f7517": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "191aeaabe850445d925fc686a5030919": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_bb5cc6df3f1a4ab6add74bd40b654d2f", + "max": 112590, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_974e53ceb3bf4739bf8d5e497b9a58f1", + "value": 112590 + } + }, + "1abdc3eabae647a784faf50ca04a4664": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "225e7ee50f624dc3bbaf682e1140e0d4": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "240f07ca195141bd973b34ac7ff5bc69": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_886cf1b0c3464afbbaf5c6a9a8d383f0", + "placeholder": "​", + "style": "IPY_MODEL_3c4f8e4a71b2405080eecc702fa51091", + "value": "Filter: 100%" + } + }, + "36f1ab21cb4842f5bb5ece32772ff57b": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "3c4f8e4a71b2405080eecc702fa51091": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "3e2642a0bb8a48bb8861252b8d221893": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_8dde01b656894092bb33d691e4dbe49b", + "placeholder": "​", + "style": "IPY_MODEL_4b5b7e98540840c69fdda8e3a9ce8947", + "value": "data/full-00000-of-00001.parquet: 100%" + } + }, + "4b5b7e98540840c69fdda8e3a9ce8947": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "58aed351dc524306909d796fb7a4b511": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_e5f3117210224c008ef84f8879437510", + "placeholder": "​", + "style": "IPY_MODEL_a525cd4b3b794a208d634891ffb1d334", + "value": "Generating full split: 100%" + } + }, + "5a642ca6bc8b4a52b83e9ce708f7561a": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "66ced28daca8492b8eeabfbcc62eec0f": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "675e6aa4d44148afb3f8f6c55e94807b": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "6f4d626b3d414744a420da9e2af420f7": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "77adf1d23ac84b0d9465afdd5d604f12": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_c29c01940e1040af8ac9d4c2d9a5d4e5", + "placeholder": "​", + "style": "IPY_MODEL_9a6cf9fb89184f3db6f4524533500025", + "value": " 1.12k/? [00:00<00:00, 65.7kB/s]" + } + }, + "796cb9a1e7154922a9235070b4eb0e83": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_58aed351dc524306909d796fb7a4b511", + "IPY_MODEL_d55b5af25c5f4eb2bce121ea61811ebe", + "IPY_MODEL_a30ce766b7444d9d9910a64538eee263" + ], + "layout": "IPY_MODEL_07d582edd37f41298d6be880b2c09fac" + } + }, + "7cba9a5cce3a4b899c8bfd0a3c6c8413": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_3e2642a0bb8a48bb8861252b8d221893", + "IPY_MODEL_c0ec87cef19e49989d6379ea7e63e7fd", + "IPY_MODEL_a781882db7ad49a0ae8cd4614754d2b6" + ], + "layout": "IPY_MODEL_c45b2735d3864dd9ab87628bea74f618" + } + }, + "7da27317676b462c9ba143f47b336cab": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "7ec30523eb084f1abdd5002173780c15": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "801e742fd5a94edbb698d80266ff0a12": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": "20px" + } + }, + "82ce2f089eba44e5ba17db5afc1729e7": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "8549a500ad1e46cdbc8e839bec7fb2d2": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "886cf1b0c3464afbbaf5c6a9a8d383f0": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "8dde01b656894092bb33d691e4dbe49b": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "974e53ceb3bf4739bf8d5e497b9a58f1": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "9a6cf9fb89184f3db6f4524533500025": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "a30ce766b7444d9d9910a64538eee263": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_66ced28daca8492b8eeabfbcc62eec0f", + "placeholder": "​", + "style": "IPY_MODEL_7da27317676b462c9ba143f47b336cab", + "value": " 112590/112590 [00:01<00:00, 122905.08 examples/s]" + } + }, + "a525cd4b3b794a208d634891ffb1d334": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "a781882db7ad49a0ae8cd4614754d2b6": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_82ce2f089eba44e5ba17db5afc1729e7", + "placeholder": "​", + "style": "IPY_MODEL_7ec30523eb084f1abdd5002173780c15", + "value": " 59.6M/59.6M [00:01<00:00, 41.6MB/s]" + } + }, + "bb5cc6df3f1a4ab6add74bd40b654d2f": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "c0ec87cef19e49989d6379ea7e63e7fd": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_5a642ca6bc8b4a52b83e9ce708f7561a", + "max": 59635138, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_eff7608645cc450f9dc5a69bc96839c8", + "value": 59635138 + } + }, + "c29c01940e1040af8ac9d4c2d9a5d4e5": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "c45b2735d3864dd9ab87628bea74f618": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "cb25b81f26f14748a0496f1dcb8a4079": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_f902469b330c44ecb0b5a27892543a62", + "IPY_MODEL_d4a96323719e4b19877dab1f20df76c2", + "IPY_MODEL_77adf1d23ac84b0d9465afdd5d604f12" + ], + "layout": "IPY_MODEL_8549a500ad1e46cdbc8e839bec7fb2d2" + } + }, + "d4a96323719e4b19877dab1f20df76c2": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_801e742fd5a94edbb698d80266ff0a12", + "max": 1, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_675e6aa4d44148afb3f8f6c55e94807b", + "value": 1 + } + }, + "d55b5af25c5f4eb2bce121ea61811ebe": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_ed265ba4024b45d9a3b6fd5cbb2a00c6", + "max": 112590, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_14dcae832c584937b0dc7bc7e17f7517", + "value": 112590 + } + }, + "e5f3117210224c008ef84f8879437510": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "ed265ba4024b45d9a3b6fd5cbb2a00c6": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "eff7608645cc450f9dc5a69bc96839c8": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "f30dd057a4394b1bb555427244efc8be": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_1abdc3eabae647a784faf50ca04a4664", + "placeholder": "​", + "style": "IPY_MODEL_36f1ab21cb4842f5bb5ece32772ff57b", + "value": " 112590/112590 [00:13<00:00, 12100.56 examples/s]" + } + }, + "f902469b330c44ecb0b5a27892543a62": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_0e5f4a86c9fb497198dba931ae3f5e34", + "placeholder": "​", + "style": "IPY_MODEL_6f4d626b3d414744a420da9e2af420f7", + "value": "README.md: " + } + }, + "fa83d76f75034101a2a531b3244ed61b": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_240f07ca195141bd973b34ac7ff5bc69", + "IPY_MODEL_191aeaabe850445d925fc686a5030919", + "IPY_MODEL_f30dd057a4394b1bb555427244efc8be" + ], + "layout": "IPY_MODEL_225e7ee50f624dc3bbaf682e1140e0d4" + } + } + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}