From 3b762397c326a75364253cb378557d3519abd5f3 Mon Sep 17 00:00:00 2001 From: Saurabh Gupta Date: Sat, 4 Oct 2025 21:59:36 -0400 Subject: [PATCH 1/5] Week 4, Day 5 stock buy or sell recommender using gpt and streaming. --- .../day5_stock_analysis_recommender.ipynb | 243 ++++++++++++++++++ 1 file changed, 243 insertions(+) create mode 100644 week4/community-contributions/day5_stock_analysis_recommender.ipynb diff --git a/week4/community-contributions/day5_stock_analysis_recommender.ipynb b/week4/community-contributions/day5_stock_analysis_recommender.ipynb new file mode 100644 index 0000000..b99098a --- /dev/null +++ b/week4/community-contributions/day5_stock_analysis_recommender.ipynb @@ -0,0 +1,243 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "b65e507f", + "metadata": {}, + "source": [ + "Stock Analysis Recommender. This solution can be extended with a real-time API for multiple stock symbols. In this example, analysis for only 1 stock IBM is displayed." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "d159a754", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "\n", + "import json\n", + "import requests\n", + "from dotenv import load_dotenv\n", + "from openai import OpenAI\n", + "import google.generativeai\n", + "import anthropic\n", + "from IPython.display import Markdown, display, update_display\n", + "import gradio as gr\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "325bafb1", + "metadata": {}, + "outputs": [], + "source": [ + "load_dotenv(override=True)\n", + "os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY', 'your-key-if-not-using-env')\n", + "os.environ['ANTHROPIC_API_KEY'] = os.getenv('ANTHROPIC_API_KEY', 'your-key-if-not-using-env')" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "b6dab667", + "metadata": {}, + "outputs": [], + "source": [ + "# initialize\n", + "\n", + "openai = OpenAI()\n", + "claude = anthropic.Anthropic()\n", + "OPENAI_MODEL = \"gpt-4o\"\n", + "CLAUDE_MODEL = \"claude-3-7-sonnet-20250219\"" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "id": "7f24ac3b", + "metadata": {}, + "outputs": [], + "source": [ + "system_message = \"\"\"You are an assistant that recommends whether to buy or sell a stock that user provides. Give your best educated guess on the basis of the stock\n", + " details provided to you like stock price, open, close, high, low price and technical indicators like SMA, EMA and RSI. Don't force an answer, only give the answer if the data you \n", + " receive is valid. If not valid or if you receive empty data, tell the user that the data received was not valid politely. Respond only in Markdown \"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "id": "30283e3d", + "metadata": {}, + "outputs": [], + "source": [ + "def user_prompt_for(data):\n", + " user_prompt = f\"\"\" Recommend whether to buy or sell stock of IBM tomorrow. Here is the past 100 days of stock prices data which includes the stock\n", + " open, close, high and low price for the day.\\n\\n {data['stock_data']} \\n\\n. \n", + " And here is the technical indicators data for the stock for past 52 weeks.\\n\\n\n", + " Indicator Name: SMA\n", + " Indicator Data: {data['SMA']} \\n\\n\n", + " Indicator Name: EMA\n", + " Indicator Data: {data['EMA']} \\n\\n\n", + " Indicator Name: RSI\n", + " Indicator Data: {data['RSI']} \\n\\n\n", + " Analyze all this information and give your best educated guess based on the above data provided.\n", + " \"\"\"\n", + " \n", + " return user_prompt" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "id": "48d3c50d", + "metadata": {}, + "outputs": [], + "source": [ + "def fetchData(stock_name):\n", + " data = {}\n", + " # Hardcoding Stock Data as the API costs as massive.\n", + " data['stock_name'] = stock_name\n", + "\n", + " data['stock_data'] = stock_data[\"Time Series (Daily)\"]\n", + "\n", + " technical_sma_data = sma_data['Technical Analysis: SMA']\n", + " technical_ema_data = ema_data['Technical Analysis: EMA']\n", + "\n", + " technical_rsi_data = rsi_data['Technical Analysis: RSI']\n", + " \n", + " data['SMA'] = technical_sma_data\n", + " data['EMA'] = technical_ema_data\n", + " data['RSI'] = technical_rsi_data\n", + "\n", + " return data\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "id": "02a9f901", + "metadata": {}, + "outputs": [], + "source": [ + "def messages_for(stock):\n", + " return [\n", + " {\"role\": \"system\", \"content\": system_message},\n", + " {\"role\": \"user\", \"content\": user_prompt_for(fetchData(stock))}\n", + " ]" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "id": "616413e5", + "metadata": {}, + "outputs": [], + "source": [ + "def stream_gpt(stock): \n", + " stream = openai.chat.completions.create(model=OPENAI_MODEL, messages=messages_for(stock), stream=True)\n", + " reply = \"\"\n", + " \n", + " for chunk in stream:\n", + " fragment = chunk.choices[0].delta.content or \"\"\n", + " reply += fragment\n", + " reply = reply.replace(\"```\",\"\").replace(\"markdown\",\"\")\n", + " update_display(Markdown(reply), display_id=display_handle.display_id)" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "id": "c4f6ec43", + "metadata": {}, + "outputs": [ + { + "data": { + "text/markdown": [ + "Based on the data provided for IBM's stock, here are a few considerations:\n", + "\n", + "1. **Price Movement:**\n", + " - The recent price is $288.37, with a gradual upward trend from $279.80 (5 days ago) to $288.37.\n", + "\n", + "2. **SMA and EMA Analysis:**\n", + " - The 52-week Simple Moving Average (SMA) on the latest date is $252.8374.\n", + " - The Exponential Moving Average (EMA) for the same period is $260.7207.\n", + " - Both SMA and EMA are below the current stock price, which can suggest a bullish trend as the price is above these moving averages.\n", + "\n", + "3. **RSI:**\n", + " - The Relative Strength Index (RSI) as of the latest date is 67.6313.\n", + " - This suggests that the stock is nearing overbought conditions (traditionally above 70 is considered overbought), but not quite there yet.\n", + "\n", + "**Recommendation:**\n", + "- The current upward trend, along with the price being above SMA and EMA, and the RSI approaching overbought levels, suggests a cautious optimism about further price increases.\n", + "- If you're holding IBM stock, you might consider continuing to hold, but be wary of the RSI indicating a potential for near-term overbought conditions. \n", + "- For new entries, it might be wise to wait for a potential dip before buying in, or ensure tight stop-losses are in place if buying now to protect against sudden reversals.\n", + "\n", + "Please assess these insights with additional fundamental research and individual financial goals before making investment decisions." + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "display_handle = display(Markdown(\"\"), display_id=True)\n", + "stream_gpt('IBM')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "89f8a8b7", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 42, + "id": "17a9be9c", + "metadata": {}, + "outputs": [], + "source": [ + "stock_data = {\"Meta Data\":{\"1. Information\":\"Daily Prices (open, high, low, close) and Volumes\",\"2. Symbol\":\"IBM\",\"3. Last Refreshed\":\"2025-10-03\",\"4. Output Size\":\"Compact\",\"5. Time Zone\":\"US/Eastern\"},\"Time Series (Daily)\":{\"2025-10-03\":{\"1. open\":\"287.5000\",\"2. high\":\"293.3200\",\"3. low\":\"287.3000\",\"4. close\":\"288.3700\",\"5. volume\":\"4375082\"},\"2025-10-02\":{\"1. open\":\"285.7900\",\"2. high\":\"288.5400\",\"3. low\":\"282.7900\",\"4. close\":\"286.7200\",\"5. volume\":\"3814232\"},\"2025-10-01\":{\"1. open\":\"280.2000\",\"2. high\":\"286.5900\",\"3. low\":\"280.1500\",\"4. close\":\"286.4900\",\"5. volume\":\"4381338\"},\"2025-09-30\":{\"1. open\":\"280.8800\",\"2. high\":\"286.0250\",\"3. low\":\"280.5200\",\"4. close\":\"282.1600\",\"5. volume\":\"5926924\"},\"2025-09-29\":{\"1. open\":\"286.0000\",\"2. high\":\"286.0000\",\"3. low\":\"279.6600\",\"4. close\":\"279.8000\",\"5. volume\":\"6022125\"},\"2025-09-26\":{\"1. open\":\"280.5100\",\"2. high\":\"288.8500\",\"3. low\":\"280.1100\",\"4. close\":\"284.3100\",\"5. volume\":\"9063938\"},\"2025-09-25\":{\"1. open\":\"272.9350\",\"2. high\":\"284.2300\",\"3. low\":\"271.1480\",\"4. close\":\"281.4400\",\"5. volume\":\"11506192\"},\"2025-09-24\":{\"1. open\":\"272.6200\",\"2. high\":\"273.6499\",\"3. low\":\"267.3000\",\"4. close\":\"267.5300\",\"5. volume\":\"3159924\"},\"2025-09-23\":{\"1. open\":\"272.7000\",\"2. high\":\"273.2962\",\"3. low\":\"269.2650\",\"4. close\":\"272.2400\",\"5. volume\":\"5394121\"},\"2025-09-22\":{\"1. open\":\"266.6200\",\"2. high\":\"272.3100\",\"3. low\":\"266.0000\",\"4. close\":\"271.3700\",\"5. volume\":\"5030540\"},\"2025-09-19\":{\"1. open\":\"266.0500\",\"2. high\":\"267.8700\",\"3. low\":\"263.6400\",\"4. close\":\"266.4000\",\"5. volume\":\"9858112\"},\"2025-09-18\":{\"1. open\":\"258.8600\",\"2. high\":\"265.2300\",\"3. low\":\"256.8004\",\"4. close\":\"265.0000\",\"5. volume\":\"4988421\"},\"2025-09-17\":{\"1. open\":\"257.4950\",\"2. high\":\"260.9644\",\"3. low\":\"257.0100\",\"4. close\":\"259.0800\",\"5. volume\":\"3974785\"},\"2025-09-16\":{\"1. open\":\"256.2600\",\"2. high\":\"258.0000\",\"3. low\":\"254.4100\",\"4. close\":\"257.5200\",\"5. volume\":\"2719918\"},\"2025-09-15\":{\"1. open\":\"254.0200\",\"2. high\":\"259.0500\",\"3. low\":\"254.0000\",\"4. close\":\"256.2400\",\"5. volume\":\"4028365\"},\"2025-09-12\":{\"1. open\":\"256.9500\",\"2. high\":\"257.2500\",\"3. low\":\"252.4250\",\"4. close\":\"253.4400\",\"5. volume\":\"3433300\"},\"2025-09-11\":{\"1. open\":\"257.5600\",\"2. high\":\"258.5450\",\"3. low\":\"255.6550\",\"4. close\":\"257.0100\",\"5. volume\":\"3576048\"},\"2025-09-10\":{\"1. open\":\"259.6500\",\"2. high\":\"260.0800\",\"3. low\":\"254.5600\",\"4. close\":\"256.8800\",\"5. volume\":\"5185420\"},\"2025-09-09\":{\"1. open\":\"256.1200\",\"2. high\":\"260.6600\",\"3. low\":\"254.8800\",\"4. close\":\"259.1100\",\"5. volume\":\"4931105\"},\"2025-09-08\":{\"1. open\":\"248.6300\",\"2. high\":\"257.1500\",\"3. low\":\"247.0200\",\"4. close\":\"256.0900\",\"5. volume\":\"6940270\"},\"2025-09-05\":{\"1. open\":\"248.2300\",\"2. high\":\"249.0300\",\"3. low\":\"245.4500\",\"4. close\":\"248.5300\",\"5. volume\":\"3147478\"},\"2025-09-04\":{\"1. open\":\"245.4200\",\"2. high\":\"249.2800\",\"3. low\":\"242.8500\",\"4. close\":\"247.1800\",\"5. volume\":\"4765087\"},\"2025-09-03\":{\"1. open\":\"240.0200\",\"2. high\":\"244.2500\",\"3. low\":\"239.4100\",\"4. close\":\"244.1000\",\"5. volume\":\"3156289\"},\"2025-09-02\":{\"1. open\":\"240.9000\",\"2. high\":\"241.5500\",\"3. low\":\"238.2500\",\"4. close\":\"241.5000\",\"5. volume\":\"3469501\"},\"2025-08-29\":{\"1. open\":\"245.2300\",\"2. high\":\"245.4599\",\"3. low\":\"241.7200\",\"4. close\":\"243.4900\",\"5. volume\":\"2967558\"},\"2025-08-28\":{\"1. open\":\"245.4300\",\"2. high\":\"245.8800\",\"3. low\":\"243.3600\",\"4. close\":\"245.7300\",\"5. volume\":\"2820817\"},\"2025-08-27\":{\"1. open\":\"242.8700\",\"2. high\":\"245.9600\",\"3. low\":\"242.0000\",\"4. close\":\"244.8400\",\"5. volume\":\"3698372\"},\"2025-08-26\":{\"1. open\":\"241.0200\",\"2. high\":\"244.9800\",\"3. low\":\"240.3800\",\"4. close\":\"242.6300\",\"5. volume\":\"5386582\"},\"2025-08-25\":{\"1. open\":\"242.5650\",\"2. high\":\"242.5650\",\"3. low\":\"239.4300\",\"4. close\":\"239.4300\",\"5. volume\":\"3513327\"},\"2025-08-22\":{\"1. open\":\"240.7400\",\"2. high\":\"243.6800\",\"3. low\":\"240.2200\",\"4. close\":\"242.0900\",\"5. volume\":\"3134882\"},\"2025-08-21\":{\"1. open\":\"242.2100\",\"2. high\":\"242.5000\",\"3. low\":\"238.6500\",\"4. close\":\"239.4000\",\"5. volume\":\"2991902\"},\"2025-08-20\":{\"1. open\":\"242.1100\",\"2. high\":\"242.8800\",\"3. low\":\"240.3400\",\"4. close\":\"242.5500\",\"5. volume\":\"3240064\"},\"2025-08-19\":{\"1. open\":\"240.0000\",\"2. high\":\"242.8300\",\"3. low\":\"239.4900\",\"4. close\":\"241.2800\",\"5. volume\":\"3328305\"},\"2025-08-18\":{\"1. open\":\"239.5700\",\"2. high\":\"241.4200\",\"3. low\":\"239.1158\",\"4. close\":\"239.4500\",\"5. volume\":\"3569594\"},\"2025-08-15\":{\"1. open\":\"237.6100\",\"2. high\":\"240.6200\",\"3. low\":\"236.7700\",\"4. close\":\"239.7200\",\"5. volume\":\"4344322\"},\"2025-08-14\":{\"1. open\":\"238.2500\",\"2. high\":\"239.0000\",\"3. low\":\"235.6200\",\"4. close\":\"237.1100\",\"5. volume\":\"4556725\"},\"2025-08-13\":{\"1. open\":\"236.2000\",\"2. high\":\"240.8411\",\"3. low\":\"236.2000\",\"4. close\":\"240.0700\",\"5. volume\":\"5663562\"},\"2025-08-12\":{\"1. open\":\"236.5300\",\"2. high\":\"237.9600\",\"3. low\":\"233.3600\",\"4. close\":\"234.7700\",\"5. volume\":\"8800597\"},\"2025-08-11\":{\"1. open\":\"242.2400\",\"2. high\":\"243.1500\",\"3. low\":\"234.7000\",\"4. close\":\"236.3000\",\"5. volume\":\"9381960\"},\"2025-08-08\":{\"1. open\":\"248.8800\",\"2. high\":\"249.4800\",\"3. low\":\"241.6500\",\"4. close\":\"242.2700\",\"5. volume\":\"6828390\"},\"2025-08-07\":{\"1. open\":\"252.8100\",\"2. high\":\"255.0000\",\"3. low\":\"248.8750\",\"4. close\":\"250.1600\",\"5. volume\":\"6251285\"},\"2025-08-06\":{\"1. open\":\"251.5300\",\"2. high\":\"254.3200\",\"3. low\":\"249.2800\",\"4. close\":\"252.2800\",\"5. volume\":\"3692105\"},\"2025-08-05\":{\"1. open\":\"252.0000\",\"2. high\":\"252.8000\",\"3. low\":\"248.9950\",\"4. close\":\"250.6700\",\"5. volume\":\"5823016\"},\"2025-08-04\":{\"1. open\":\"251.0500\",\"2. high\":\"252.0800\",\"3. low\":\"248.1100\",\"4. close\":\"251.9800\",\"5. volume\":\"5280588\"},\"2025-08-01\":{\"1. open\":\"251.4050\",\"2. high\":\"251.4791\",\"3. low\":\"245.6100\",\"4. close\":\"250.0500\",\"5. volume\":\"9683404\"},\"2025-07-31\":{\"1. open\":\"259.5700\",\"2. high\":\"259.9900\",\"3. low\":\"252.2200\",\"4. close\":\"253.1500\",\"5. volume\":\"6739092\"},\"2025-07-30\":{\"1. open\":\"261.6000\",\"2. high\":\"262.0000\",\"3. low\":\"258.9000\",\"4. close\":\"260.2600\",\"5. volume\":\"3718290\"},\"2025-07-29\":{\"1. open\":\"264.3000\",\"2. high\":\"265.7999\",\"3. low\":\"261.0200\",\"4. close\":\"262.4100\",\"5. volume\":\"4627265\"},\"2025-07-28\":{\"1. open\":\"260.3000\",\"2. high\":\"264.0000\",\"3. low\":\"259.6100\",\"4. close\":\"263.2100\",\"5. volume\":\"5192516\"},\"2025-07-25\":{\"1. open\":\"260.0200\",\"2. high\":\"260.8000\",\"3. low\":\"256.3500\",\"4. close\":\"259.7200\",\"5. volume\":\"7758653\"},\"2025-07-24\":{\"1. open\":\"261.2500\",\"2. high\":\"262.0486\",\"3. low\":\"252.7500\",\"4. close\":\"260.5100\",\"5. volume\":\"22647720\"},\"2025-07-23\":{\"1. open\":\"284.3000\",\"2. high\":\"288.0800\",\"3. low\":\"281.4400\",\"4. close\":\"282.0100\",\"5. volume\":\"8105906\"},\"2025-07-22\":{\"1. open\":\"284.7400\",\"2. high\":\"284.8800\",\"3. low\":\"281.2500\",\"4. close\":\"281.9600\",\"5. volume\":\"4824219\"},\"2025-07-21\":{\"1. open\":\"286.2900\",\"2. high\":\"287.7300\",\"3. low\":\"284.3800\",\"4. close\":\"284.7100\",\"5. volume\":\"3051791\"},\"2025-07-18\":{\"1. open\":\"283.3800\",\"2. high\":\"287.1600\",\"3. low\":\"282.2200\",\"4. close\":\"285.8700\",\"5. volume\":\"4478165\"},\"2025-07-17\":{\"1. open\":\"281.5000\",\"2. high\":\"283.4566\",\"3. low\":\"280.9000\",\"4. close\":\"282.0000\",\"5. volume\":\"3337168\"},\"2025-07-16\":{\"1. open\":\"282.7500\",\"2. high\":\"283.8700\",\"3. low\":\"279.8700\",\"4. close\":\"281.9200\",\"5. volume\":\"2804831\"},\"2025-07-15\":{\"1. open\":\"283.7700\",\"2. high\":\"284.1550\",\"3. low\":\"280.7301\",\"4. close\":\"282.7000\",\"5. volume\":\"2864106\"},\"2025-07-14\":{\"1. open\":\"282.8300\",\"2. high\":\"284.9250\",\"3. low\":\"281.7100\",\"4. close\":\"283.7900\",\"5. volume\":\"2857401\"},\"2025-07-11\":{\"1. open\":\"285.0100\",\"2. high\":\"287.4300\",\"3. low\":\"282.9200\",\"4. close\":\"283.5900\",\"5. volume\":\"3790679\"},\"2025-07-10\":{\"1. open\":\"288.9000\",\"2. high\":\"288.9000\",\"3. low\":\"282.2100\",\"4. close\":\"287.4300\",\"5. volume\":\"3489068\"},\"2025-07-09\":{\"1. open\":\"291.3900\",\"2. high\":\"291.6000\",\"3. low\":\"288.6300\",\"4. close\":\"290.1400\",\"5. volume\":\"2971309\"},\"2025-07-08\":{\"1. open\":\"293.1000\",\"2. high\":\"295.6100\",\"3. low\":\"289.4900\",\"4. close\":\"290.4200\",\"5. volume\":\"2925329\"},\"2025-07-07\":{\"1. open\":\"292.5000\",\"2. high\":\"295.2199\",\"3. low\":\"290.3607\",\"4. close\":\"292.4700\",\"5. volume\":\"4488064\"},\"2025-07-03\":{\"1. open\":\"287.9400\",\"2. high\":\"292.3200\",\"3. low\":\"287.9000\",\"4. close\":\"291.9700\",\"5. volume\":\"1853289\"},\"2025-07-02\":{\"1. open\":\"290.0000\",\"2. high\":\"290.1900\",\"3. low\":\"286.9000\",\"4. close\":\"287.6500\",\"5. volume\":\"3257515\"},\"2025-07-01\":{\"1. open\":\"294.5500\",\"2. high\":\"295.1081\",\"3. low\":\"290.0800\",\"4. close\":\"291.2000\",\"5. volume\":\"3272797\"},\"2025-06-30\":{\"1. open\":\"290.9300\",\"2. high\":\"294.8100\",\"3. low\":\"290.0000\",\"4. close\":\"294.7800\",\"5. volume\":\"3495386\"},\"2025-06-27\":{\"1. open\":\"292.9700\",\"2. high\":\"293.1200\",\"3. low\":\"288.5200\",\"4. close\":\"289.7000\",\"5. volume\":\"3562501\"},\"2025-06-26\":{\"1. open\":\"291.8000\",\"2. high\":\"292.9100\",\"3. low\":\"290.1650\",\"4. close\":\"291.9300\",\"5. volume\":\"3621110\"},\"2025-06-25\":{\"1. open\":\"294.4900\",\"2. high\":\"296.1600\",\"3. low\":\"289.5000\",\"4. close\":\"291.0600\",\"5. volume\":\"3862309\"},\"2025-06-24\":{\"1. open\":\"290.4600\",\"2. high\":\"294.3399\",\"3. low\":\"288.4100\",\"4. close\":\"293.7900\",\"5. volume\":\"4219120\"},\"2025-06-23\":{\"1. open\":\"281.6500\",\"2. high\":\"289.5800\",\"3. low\":\"280.2100\",\"4. close\":\"289.1800\",\"5. volume\":\"3786159\"},\"2025-06-20\":{\"1. open\":\"279.2800\",\"2. high\":\"284.1200\",\"3. low\":\"277.2000\",\"4. close\":\"280.9700\",\"5. volume\":\"7676962\"},\"2025-06-18\":{\"1. open\":\"285.0000\",\"2. high\":\"286.9100\",\"3. low\":\"282.9400\",\"4. close\":\"283.2100\",\"5. volume\":\"3534110\"},\"2025-06-17\":{\"1. open\":\"281.1500\",\"2. high\":\"284.7899\",\"3. low\":\"281.0001\",\"4. close\":\"283.0500\",\"5. volume\":\"3069556\"},\"2025-06-16\":{\"1. open\":\"279.3050\",\"2. high\":\"284.5000\",\"3. low\":\"278.6657\",\"4. close\":\"281.8300\",\"5. volume\":\"3685321\"},\"2025-06-13\":{\"1. open\":\"278.2050\",\"2. high\":\"279.8400\",\"3. low\":\"275.8300\",\"4. close\":\"277.2200\",\"5. volume\":\"3243824\"},\"2025-06-12\":{\"1. open\":\"281.5300\",\"2. high\":\"283.0600\",\"3. low\":\"279.8300\",\"4. close\":\"281.0300\",\"5. volume\":\"3418007\"},\"2025-06-11\":{\"1. open\":\"276.7000\",\"2. high\":\"281.7500\",\"3. low\":\"275.1100\",\"4. close\":\"281.5200\",\"5. volume\":\"4656034\"},\"2025-06-10\":{\"1. open\":\"273.1900\",\"2. high\":\"277.4700\",\"3. low\":\"272.5600\",\"4. close\":\"276.2400\",\"5. volume\":\"5163507\"},\"2025-06-09\":{\"1. open\":\"268.1000\",\"2. high\":\"273.4700\",\"3. low\":\"266.7100\",\"4. close\":\"272.0800\",\"5. volume\":\"4331464\"},\"2025-06-06\":{\"1. open\":\"267.9900\",\"2. high\":\"270.1700\",\"3. low\":\"267.5300\",\"4. close\":\"268.8700\",\"5. volume\":\"2495543\"},\"2025-06-05\":{\"1. open\":\"265.2000\",\"2. high\":\"267.5100\",\"3. low\":\"265.1000\",\"4. close\":\"266.8600\",\"5. volume\":\"2659478\"},\"2025-06-04\":{\"1. open\":\"264.9000\",\"2. high\":\"267.0000\",\"3. low\":\"264.7900\",\"4. close\":\"265.5200\",\"5. volume\":\"2588741\"},\"2025-06-03\":{\"1. open\":\"263.3500\",\"2. high\":\"265.5600\",\"3. low\":\"262.5800\",\"4. close\":\"265.2000\",\"5. volume\":\"2494922\"},\"2025-06-02\":{\"1. open\":\"257.8500\",\"2. high\":\"263.9760\",\"3. low\":\"257.2200\",\"4. close\":\"263.9000\",\"5. volume\":\"2831881\"},\"2025-05-30\":{\"1. open\":\"258.7500\",\"2. high\":\"260.1200\",\"3. low\":\"257.1000\",\"4. close\":\"259.0600\",\"5. volume\":\"9668923\"},\"2025-05-29\":{\"1. open\":\"260.7500\",\"2. high\":\"261.1300\",\"3. low\":\"256.7700\",\"4. close\":\"258.6900\",\"5. volume\":\"2295228\"},\"2025-05-28\":{\"1. open\":\"263.1600\",\"2. high\":\"265.0000\",\"3. low\":\"259.9400\",\"4. close\":\"260.2400\",\"5. volume\":\"2318437\"},\"2025-05-27\":{\"1. open\":\"261.0000\",\"2. high\":\"263.7869\",\"3. low\":\"259.6300\",\"4. close\":\"263.2300\",\"5. volume\":\"3284216\"},\"2025-05-23\":{\"1. open\":\"258.5800\",\"2. high\":\"259.8696\",\"3. low\":\"255.7900\",\"4. close\":\"258.6300\",\"5. volume\":\"2722721\"},\"2025-05-22\":{\"1. open\":\"260.7700\",\"2. high\":\"261.2711\",\"3. low\":\"257.9100\",\"4. close\":\"258.3700\",\"5. volume\":\"3091253\"},\"2025-05-21\":{\"1. open\":\"264.9700\",\"2. high\":\"265.6499\",\"3. low\":\"260.4100\",\"4. close\":\"260.8700\",\"5. volume\":\"3753904\"},\"2025-05-20\":{\"1. open\":\"267.4000\",\"2. high\":\"269.2800\",\"3. low\":\"265.6201\",\"4. close\":\"266.9500\",\"5. volume\":\"2437860\"},\"2025-05-19\":{\"1. open\":\"265.4500\",\"2. high\":\"269.1350\",\"3. low\":\"265.0800\",\"4. close\":\"268.4100\",\"5. volume\":\"3198903\"},\"2025-05-16\":{\"1. open\":\"266.3500\",\"2. high\":\"267.9800\",\"3. low\":\"264.5900\",\"4. close\":\"266.7600\",\"5. volume\":\"3817937\"},\"2025-05-15\":{\"1. open\":\"259.0100\",\"2. high\":\"267.4300\",\"3. low\":\"258.6100\",\"4. close\":\"266.6800\",\"5. volume\":\"4856276\"},\"2025-05-14\":{\"1. open\":\"257.6000\",\"2. high\":\"260.5500\",\"3. low\":\"256.2200\",\"4. close\":\"257.8200\",\"5. volume\":\"3635124\"},\"2025-05-13\":{\"1. open\":\"254.4300\",\"2. high\":\"259.5800\",\"3. low\":\"252.8800\",\"4. close\":\"258.5900\",\"5. volume\":\"3521389\"}}}\n", + "\n", + "sma_data = {\"Meta Data\":{\"1: Symbol\":\"IBM\",\"2: Indicator\":\"Simple Moving Average (SMA)\",\"3: Last Refreshed\":\"2025-10-03\",\"4: Interval\":\"weekly\",\"5: Time Period\":10,\"6: Series Type\":\"open\",\"7: Time Zone\":\"US/Eastern\"},\"Technical Analysis: SMA\":{\"2025-10-03\":{\"SMA\":\"252.8374\"},\"2025-09-26\":{\"SMA\":\"252.6692\"},\"2025-09-19\":{\"SMA\":\"254.0954\"},\"2025-09-12\":{\"SMA\":\"257.7420\"},\"2025-09-05\":{\"SMA\":\"261.7716\"},\"2025-08-29\":{\"SMA\":\"265.6527\"},\"2025-08-22\":{\"SMA\":\"269.1343\"},\"2025-08-15\":{\"SMA\":\"271.8027\"},\"2025-08-08\":{\"SMA\":\"273.1861\"},\"2025-08-01\":{\"SMA\":\"274.1743\"},\"2025-07-25\":{\"SMA\":\"274.6857\"},\"2025-07-18\":{\"SMA\":\"271.3300\"},\"2025-07-11\":{\"SMA\":\"267.2858\"},\"2025-07-03\":{\"SMA\":\"261.2080\"},\"2025-06-27\":{\"SMA\":\"255.7996\"},\"2025-06-20\":{\"SMA\":\"251.4810\"},\"2025-06-13\":{\"SMA\":\"245.3701\"},\"2025-06-06\":{\"SMA\":\"242.6901\"},\"2025-05-30\":{\"SMA\":\"241.4789\"},\"2025-05-23\":{\"SMA\":\"240.1462\"}}}\n", + "\n", + "\n", + "ema_data = {\"Meta Data\":{\"1: Symbol\":\"IBM\",\"2: Indicator\":\"Exponential Moving Average (EMA)\",\"3: Last Refreshed\":\"2025-10-03\",\"4: Interval\":\"weekly\",\"5: Time Period\":10,\"6: Series Type\":\"open\",\"7: Time Zone\":\"US/Eastern\"},\"Technical Analysis: EMA\":{\"2025-10-03\":{\"EMA\":\"260.7207\"},\"2025-09-26\":{\"EMA\":\"255.1031\"},\"2025-09-19\":{\"EMA\":\"252.5437\"},\"2025-09-12\":{\"EMA\":\"252.2157\"},\"2025-09-05\":{\"EMA\":\"253.0125\"},\"2025-08-29\":{\"EMA\":\"255.7041\"},\"2025-08-22\":{\"EMA\":\"258.6240\"},\"2025-08-15\":{\"EMA\":\"262.8582\"},\"2025-08-08\":{\"EMA\":\"267.4400\"},\"2025-08-01\":{\"EMA\":\"271.4664\"},\"2025-07-25\":{\"EMA\":\"274.3462\"},\"2025-07-18\":{\"EMA\":\"272.1301\"},\"2025-07-11\":{\"EMA\":\"270.1852\"},\"2025-07-03\":{\"EMA\":\"265.6740\"},\"2025-06-27\":{\"EMA\":\"260.5068\"},\"2025-06-20\":{\"EMA\":\"256.2393\"},\"2025-06-13\":{\"EMA\":\"251.5411\"},\"2025-06-06\":{\"EMA\":\"248.2716\"},\"2025-05-30\":{\"EMA\":\"246.5377\"},\"2025-05-23\":{\"EMA\":\"243.7233\"}}}\n", + "\n", + "\n", + "rsi_data = {\"Meta Data\":{\"1: Symbol\":\"IBM\",\"2: Indicator\":\"Relative Strength Index (RSI)\",\"3: Last Refreshed\":\"2025-10-03\",\"4: Interval\":\"weekly\",\"5: Time Period\":10,\"6: Series Type\":\"open\",\"7: Time Zone\":\"US/Eastern Time\"},\"Technical Analysis: RSI\":{\"2025-10-03\":{\"RSI\":\"67.6313\"},\"2025-09-26\":{\"RSI\":\"58.4411\"},\"2025-09-19\":{\"RSI\":\"50.1612\"},\"2025-09-12\":{\"RSI\":\"46.0208\"},\"2025-09-05\":{\"RSI\":\"39.5374\"},\"2025-08-29\":{\"RSI\":\"40.4799\"},\"2025-08-22\":{\"RSI\":\"38.0907\"},\"2025-08-15\":{\"RSI\":\"39.3583\"},\"2025-08-08\":{\"RSI\":\"42.7545\"},\"2025-08-01\":{\"RSI\":\"47.5445\"},\"2025-07-25\":{\"RSI\":\"66.3390\"},\"2025-07-18\":{\"RSI\":\"64.6654\"},\"2025-07-11\":{\"RSI\":\"73.9081\"},\"2025-07-03\":{\"RSI\":\"73.3515\"},\"2025-06-27\":{\"RSI\":\"69.9405\"},\"2025-06-20\":{\"RSI\":\"69.0393\"},\"2025-06-13\":{\"RSI\":\"64.4562\"},\"2025-06-06\":{\"RSI\":\"59.5233\"},\"2025-05-30\":{\"RSI\":\"61.8994\"},\"2025-05-23\":{\"RSI\":\"65.2089\"}}}\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.11" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From e63ef8c0f4dc572da2b32a497aac2f8a2a2ef8cb Mon Sep 17 00:00:00 2001 From: Mr-Tson Date: Wed, 8 Oct 2025 07:12:24 +0200 Subject: [PATCH 2/5] feat: Final submission for week 3 synthetic dataset generator (cleared cell outputs) --- ...Exercise_Synthetic_Dataset_Generator.ipynb | 5035 +++++++++++++++++ 1 file changed, 5035 insertions(+) create mode 100644 week3/community-contributions/Week3_Exercise_Synthetic_Dataset_Generator.ipynb diff --git a/week3/community-contributions/Week3_Exercise_Synthetic_Dataset_Generator.ipynb b/week3/community-contributions/Week3_Exercise_Synthetic_Dataset_Generator.ipynb new file mode 100644 index 0000000..1767bcb --- /dev/null +++ b/week3/community-contributions/Week3_Exercise_Synthetic_Dataset_Generator.ipynb @@ -0,0 +1,5035 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "OvBwRxvXhzpF" + }, + "source": [ + "# Synthetic Dataset Generator" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "-2MSGQC8uwuA" + }, + "source": [ + "## 0. Setup and sign in to Hugging Face" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 37639, + "status": "ok", + "timestamp": 1759633893217, + "user": { + "displayName": "thinsri@gmx.de", + "userId": "18422164867366802681" + }, + "user_tz": -120 + }, + "id": "ciBOYWb4uq9h", + "outputId": "5b4a31a9-ea21-4e91-ebee-01a0e3c77030" + }, + "outputs": [], + "source": [ + "!pip install -q requests bitsandbytes==0.46.0 transformers==4.48.3 accelerate==1.3.0 openai" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "yDnr7d2607E5" + }, + "outputs": [], + "source": [ + "import os\n", + "import requests\n", + "import io\n", + "import tempfile\n", + "import torch\n", + "from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, TextStreamer, TextIteratorStreamer\n", + "from google.colab import userdata\n", + "from huggingface_hub import login\n", + "from IPython.display import display, Markdown, update_display\n", + "from threading import Thread\n", + "from dotenv import load_dotenv\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "tZz5QoVJ1GTL" + }, + "outputs": [], + "source": [ + "hf_token = userdata.get('HF_TOKEN')\n", + "login(hf_token, add_to_git_credential=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "96RmuUrnut9G" + }, + "source": [ + "## 1. Code Prototyping" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "5T_fMfFguzDM" + }, + "outputs": [], + "source": [ + "# Define the model name\n", + "LLAMA = \"meta-llama/Meta-Llama-3.1-8B-Instruct\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "voGGr8V21P6-" + }, + "outputs": [], + "source": [ + "def load_model(model_name):\n", + " quant_config = BitsAndBytesConfig(\n", + " load_in_4bit=True,\n", + " bnb_4bit_use_double_quant=True,\n", + " bnb_4bit_quant_type=\"nf4\",\n", + " bnb_4bit_compute_dtype=torch.bfloat16\n", + " )\n", + " tokenizer = AutoTokenizer.from_pretrained(model_name)\n", + " tokenizer.pad_token = tokenizer.eos_token\n", + " model = AutoModelForCausalLM.from_pretrained(model_name, quantization_config=quant_config, device_map=\"auto\")\n", + " return tokenizer, model\n", + "\n", + "def generate_stream_with_thread(messages, tokenizer, model):\n", + " inputs = tokenizer.apply_chat_template(messages, return_tensors=\"pt\", return_attention_mask=True).to(\"cuda\")\n", + " input_token_len = inputs[0].shape[-1] # Get the length of the input tokens\n", + " streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)\n", + "\n", + " # Generate in separate thread\n", + " thread = Thread(target=model.generate, kwargs={\"inputs\": inputs, \"max_new_tokens\": 500, \"streamer\": streamer})\n", + " thread.start()\n", + "\n", + " # Stream and optionally filter output\n", + " unwanted_patterns = [\"assistant\", \"<|\", '|>']\n", + " response = \"\"\n", + " for text in streamer:\n", + " if text.strip() in unwanted_patterns:\n", + " continue\n", + " else:\n", + " print(text, end=\"\")\n", + " thread.join()\n", + "\n", + "def generate_stream_with_thread_gradio(messages, tokenizer, model):\n", + " \"\"\"Same as generate_stream_with_thread but yield accumulated reply\"\"\"\n", + " inputs = tokenizer.apply_chat_template(messages, return_tensors=\"pt\", return_attention_mask=True).to(\"cuda\")\n", + " input_token_len = inputs[0].shape[-1] # Get the length of the input tokens\n", + " streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)\n", + "\n", + " # Generate in separate thread\n", + " thread = Thread(target=model.generate, kwargs={\"inputs\": inputs, \"max_new_tokens\": 500, \"streamer\": streamer})\n", + " thread.start()\n", + "\n", + " # Stream and optionally filter output\n", + " unwanted_patterns = [\"assistant\", \"<|\", '|>']\n", + " response = \"\"\n", + " for text in streamer:\n", + " if text.strip() in unwanted_patterns:\n", + " continue\n", + " else:\n", + " response += text\n", + " yield response\n", + " thread.join()\n", + "\n", + "def generate_answer(messages, tokenizer, model):\n", + " inputs = tokenizer.apply_chat_template(messages, return_tensors=\"pt\", return_attention_mask=True).to(\"cuda\")\n", + " input_token_len = inputs[0].shape[-1] # Get the length of the input tokens\n", + " outputs = model.generate(inputs, max_new_tokens=500)\n", + " decoded_output = tokenizer.decode(outputs[0][input_token_len:], skip_special_tokens=True, clean_up_tokenization_spaces=True)\n", + " return decoded_output.replace(\"assistant\\n\\n\", \"\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 401, + "referenced_widgets": [ + "744ef0af763041b69cc272bd549280a2", + "fea2a012f93d4830992a69d7f970f738", + "75ebc5599c744a0a86ab66fcb7bc5639", + "9080517ad5b5457e83425628cc1f7391", + "bbb47fb60e554cabbdcfd524e6558818", + "ea47a5153ad445f3a28820d6c77fefff", + "77914334d6114da2af45add566b6f5c5", + "af70eeabfa784e1f8b51be4feb13fad2", + "0640dbea14a745fca2f6fa5f962ac775", + "c8c2f465ffe14afc99a2ab529189208a", + "bcf0d83c6828416681d05d97d342ba7f", + "ada15f657b5549f3a0ebd42c87edae53", + "e2bc87a955534be495f05e9bc1b45029", + "cda6d00d5daf4f06a58f36655b431293", + "b663362b43ed4e47b5daad7414b2db06", + "03020848cfc34cdea1c65de137c624a7", + "1be17ce8b78141c687c21ce194bcc763", + "deacaa0a0a314886bce184524764215d", + "7494d5c09ddb4259873b6e463adedbaa", + "b61e3d318e4b44b0890ac56e787892c5", + "936883d594dd4e1c910579b998de29bd", + "ec2f50127c5f461b892cd1cce3823dec", + "ad2d2da8f0c942508463f22179819ad9", + "93604978de264369af6cad9606e3af9e", + "e82012bce3b04a60926f6bccf50b0c5e", + "0729eec1581f4b4197f59906685ab73a", + "2dd4d35ebc8c4b9dafedda3b546fc11d", + "48e600d9711540568e4de524e2993f4d", + "b336bc5c759e443184406160a1ded0ce", + "4dcf7eb450a44f3aa98730b43030b0ac", + "346e0fb21dac44c9ac77bd495244183c", + "dd0a074bd2d6426e9aea514e0a96a078", + "1f56cf9a7d5b4ba4889f302c909b889f", + "9f5f8656edca4ab693c662d7f2bf75ec", + "08a797751c55468aac4bdfbfee0d4d37", + "4e3220619a2b4c358a9772de3a399bec", + "180ada3f5686478caff55bea4844ad82", + "606541fab5144826b8ec524b52793023", + "d9dc0681f3ec4f0cbb82562967f126d5", + "ae30150e12df4320b9048cb03354ef5f", + "362043576d8444ccbe7dfb2796fb2511", + "8144ecf2be9642308f86c57418490d18", + "7047676eed1142a5bf43d6bdced7e8c2", + "252d318144ca4a2386ae149cd4995107", + "0c32754d08584330abdf057ef5fde573", + "81324aba1d6a4a919aeddf13ace64b4a", + "a17391b90bc64d49b80922037401fb64", + "4da1b4471c194bda9ea105fb25769eac", + "5cd3b1a4fbcc4f0a910c5f1e1e250bf0", + "624bb914918f4f1b972d0310030cc29a", + "5797f033785c486ea415d76938605ed2", + "36b3dd197efe44eb8d111b1ab1e31585", + "a696bd2c5a8e464494112c298946b3e1", + "02bd1f7a767140ad9b9aa00b0f310b2b", + "d397a857b7174694bf35f6a0bf258509", + "a45de69949ff4a81a6fd4604a2b4678e", + "ab907069ad414d02952a30a0a014cb73", + "2bd50972a522409398a22ae76b74eb15", + "7c1a57d1a6c243a09ce43e68669ad757", + "c9f2a0612c144ab5a941084b762ce3e2", + "a5cf1251bf4c4023b6dc3e1878740085", + "46e335b5ef4e4dc19d4812b214392e16", + "1731cb921abc41acb30cf18028cf95f9", + "08ddb7a3a5094d5ea3de669f0e03d462", + "56c770bcdb944fbe86a159b9234e778e", + "a6d76986d203472190863f12fcf83e7b", + "f785f0b858d84f2385bd94e8d67cddf0", + "f173fc0b2cd740be83d262e1bf9dbe08", + "6a0c785835d64ccfbe5c97f1ed647893", + "e3a77622925b4ce4986af76f3fb292ed", + "c1e39012677246a09e42d20733c9e174", + "fc00ecc3f0b04687ab7ae379ff2d2bdc", + "265f3a5a0b2d4cabab4f78bdedd6b957", + "15f33f0f77c641aba861879ddb66f655", + "6037d1b48d6144b7a23c2422d07f2184", + "71d9920f7c004a36908c35653c072690", + "233da03e51fc4abd835b4f740e49ac92", + "559305d2cbbb4df3a8cf9f5fd3c2a43d", + "a772ad500ba440169299ebfa61f19cd2", + "a687ec74cac4401f81ea29ee51db6cf4", + "0885d35288444f80a4472c4dc759cb1b", + "184dbc08613944b2bc40e90fe33d99b9", + "6ce16005dd4941f296cfc225e65e6517", + "45458cf2b8694deba8736af6fd57e8e6", + "81838418eb6743e2b6d4bb274514c157", + "5f6804815ea94c229004b75300d6434e", + "2b83b912332742daa28ab4a657380a40", + "c38dd8628d2e4869a5ce5338c87a638f", + "5ba7e060c17f4c509c4e258e3a5e3211", + "31cf47d5aec34a2b97d9efbba84da4e5", + "fd2a719ccbb44d769fa40282835f8286", + "75f4741409a24842895649af0218d239", + "87b474e2d1a9424fa44498e1a09e4600", + "cf00d92f58174d279f65b2fa9e6fc29f", + "2d45fecd748e43b899dd99f00b436ed6", + "8cefe6b259324768837e84e704862876", + "733b3148561e4b67aff5cfe82c168bb2", + "44d4522449874c7cab58e83fd7627903", + "8394d3bb4d60475f9e64f78c3dc4c56a", + "cec143ff825146659d71076cc4f238a5", + "b529628f69c5467294a9bd230210f1dd", + "a1d1a045be664a1784191d2f4dfb12e5", + "1b2837fb0147452a937b35ab8c585ade", + "4aa823c0e15e40b8b328f6f9de3f7771", + "06118d97f4ec45559a7851005f1cb4e1", + "e986b132445440429a6d77bebf031eea", + "18106bb7478c492e982b2485055d12a5", + "e9445fdbd5f640e2b7dacb72d3428057", + "cf3b4bfb03324c5a981041f866aed437", + "9cb0c3e048bc467fa53e4dc1383337fb", + "6aefe246643c479280124257988487d0", + "f35ea75c598e41eb9953f3f908577faa", + "34e150d0acb748da98fb50f5372c57bc", + "60770906b5b14eefa6e2c35a4993f944", + "91f21566f5d34ca785a18b3b372a510c", + "116c89c60a014cadbb281ba93a021cb7", + "e5939b22aecd47168c7e7da7daec611e", + "c66628bfed614e039314a2ab42340d6f", + "e0425d229f5f462e9367c09524e6545f", + "0700a6f52fcd48d99f2c995b9474c963", + "873a8cdf09734e7882ee1503e2aeb7c1", + "ed0cc53b0df34c0e8bf230bc68ae4410", + "ece91278c6df45d39e347a02842173fa", + "10eb570b260b44e6a7d42b6332b897bf", + "6027176c5c824977be67e7a135c0184b", + "49a00770f8fe4ac195311535ee4901af", + "c3e6c70c163a45439ba12ef559a0e6c6", + "cbc8acacf1024ebfb8db8fec7d3601ce", + "6a1718c4158a4df1a4cafba544727cca", + "d0b16c25ae5546a5ba491ff761206ae5", + "9f66fffe75b54a5c8a0d0cd0647460d4", + "851db003a3f44e609e2a50555ed15a6b" + ] + }, + "executionInfo": { + "elapsed": 602984, + "status": "ok", + "timestamp": 1759634516171, + "user": { + "displayName": "thinsri@gmx.de", + "userId": "18422164867366802681" + }, + "user_tz": -120 + }, + "id": "8KT2cbPEBDdZ", + "outputId": "bc2ca7a3-6191-4fef-83fb-b34926dec936" + }, + "outputs": [], + "source": [ + "# test basic functions\n", + "tokenizer, model = load_model(LLAMA)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 5686, + "status": "ok", + "timestamp": 1759634552750, + "user": { + "displayName": "thinsri@gmx.de", + "userId": "18422164867366802681" + }, + "user_tz": -120 + }, + "id": "aa1WGIn8BYzy", + "outputId": "207a2342-ce36-40e1-e2bc-d57dc599f323" + }, + "outputs": [], + "source": [ + "system_message = [{\"role\": \"system\", \"content\": \"You are a helpful assistant.\"}]\n", + "user_prompt = \"Tell me 4 line poem.\"\n", + "messages = system_message + [{\"role\": \"user\", \"content\": user_prompt}]\n", + "generate_stream_with_thread(messages, tokenizer, model)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 8333, + "status": "ok", + "timestamp": 1759465246704, + "user": { + "displayName": "thinsri@gmx.de", + "userId": "18422164867366802681" + }, + "user_tz": -120 + }, + "id": "uT_h9S131QBF", + "outputId": "16203a12-f311-496c-9962-d1d72f5cf290" + }, + "outputs": [], + "source": [ + "# prototype system- and user-prompt for synthetic dataset generator\n", + "dataset_types = [\"Instruction-Response Pairs\", \"Multi-Turn Chat Dialogue\", \"Text Classification\"]\n", + "response_csv_columns_headers = {\n", + " \"Instruction-Response Pairs\": [\"instruction\", \"response\", \"domain\", \"complexity\"],\n", + " \"Multi-Turn Chat Dialogue\": [\"conversation_id\", \"turn_number\", \"role\", \"content\"],\n", + " \"Text Classification\": [\"text\", \"label\", \"sourcestyle\"],\n", + "}\n", + "\n", + "dataset_type = \"Text Classification\"\n", + "system_message = f\"You are a dataset generator for {dataset_type}. Respond in csv format only, include the header, nothing extra and use the following columns {response_csv_columns_headers[dataset_type]} .\"\n", + "\n", + "target_domain = \"Cooking\"\n", + "instruction_type = \"Summarization\"\n", + "diversity_prompt = \"Increase in complexity\"\n", + "number_of_samples = 5\n", + "user_message_instruction_response_pairs = f\"For the target domain {target_domain} using the instruction type {instruction_type} create a {dataset_type} dataset. Make the question {diversity_prompt}. Create {number_of_samples} samples.\"\n", + "\n", + "scenario_role=\"customer support\"\n", + "number_of_turns=3\n", + "conversation_goal=\"Explain a complex concept\"\n", + "user_message_multi_turn_chat_dialogue = f\"For the scenario role {scenario_role} with {number_of_turns} number of turns and the conversation goal {conversation_goal} create a {dataset_type} dataset. Create {number_of_samples} samples.\"\n", + "\n", + "text_type = \"News Headlines\"\n", + "list_of_labels = [\"Sports\", \"Politics\", \"Tech\"]\n", + "user_message_text_classification = f\"For the text-type {text_type} and the labels {list_of_labels} create a {dataset_type} dataset. Create {number_of_samples} samples.\"\n", + "\n", + "messages = [{\"role\": \"system\", \"content\": system_message}]\n", + "messages.append({\"role\": \"user\", \"content\": user_message_text_classification})\n", + "response = generate_stream_with_thread(messages, tokenizer, model)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 51, + "status": "ok", + "timestamp": 1759347628652, + "user": { + "displayName": "thinsri@gmx.de", + "userId": "18422164867366802681" + }, + "user_tz": -120 + }, + "id": "koJHoRuVDLFN", + "outputId": "08e302a8-7a57-448c-8c02-9d03218b7343" + }, + "outputs": [], + "source": [ + "print(response)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "iObFAoLBwL8F" + }, + "source": [ + "## 2. Gradio App" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "0GwRK-ecDPj4" + }, + "outputs": [], + "source": [ + "dataset_types = [\"Instruction-Response Pairs\", \"Multi-Turn Chat Dialogue\", \"Text Classification\"]\n", + "response_csv_columns_headers = {\n", + " \"Instruction-Response Pairs\": [\"instruction\", \"response\", \"domain\", \"complexity\"],\n", + " \"Multi-Turn Chat Dialogue\": [\"conversation_id\", \"turn_number\", \"role\", \"content\"],\n", + " \"Text Classification\": [\"text\", \"label\", \"sourcestyle\"],\n", + "}\n", + "\n", + "def update_params(dataset_type):\n", + " # Default to hidden for all containers\n", + " hide_all = gr.update(visible=False)\n", + "\n", + " ir_update = hide_all\n", + " mtd_update = hide_all\n", + " tc_update = hide_all\n", + "\n", + " # Set the relevant container to visible based on selection\n", + " if dataset_type == \"Instruction-Response Pairs\":\n", + " ir_update = gr.update(visible=True)\n", + " elif dataset_type == \"Multi-Turn Chat Dialogue\":\n", + " mtd_update = gr.update(visible=True)\n", + " elif dataset_type == \"Text Classification\":\n", + " tc_update = gr.update(visible=True)\n", + "\n", + " # Now, we only return 3 updates!\n", + " return ir_update, mtd_update, tc_update\n", + "\n", + "def combine_messages(system_message, user_message):\n", + " messages = [{\"role\": \"system\", \"content\": system_message}]\n", + " messages.append({\"role\": \"user\", \"content\": user_message})\n", + " return messages\n", + "\n", + "def create_download_link(full_response):\n", + " # Create a temporary file\n", + " try:\n", + " temp_file = tempfile.NamedTemporaryFile(\n", + " mode='w',\n", + " delete=False,\n", + " suffix='.csv',\n", + " encoding='utf-8'\n", + " )\n", + "\n", + " temp_file.write(full_response)\n", + " temp_file.close()\n", + " return full_response, gr.update(visible=True, value=temp_file.name)\n", + " except Exception as e:\n", + " error_message = f\"Error writing to file: {e}\"\n", + " return full_response + \"\\n\\n\" + error_message, None\n", + "\n", + "def create_download_link_in_memory(full_response):\n", + " csv_buffer = io.StringIO(full_response)\n", + " csv_buffer.name = \"generated_dataset.csv\"\n", + "\n", + " try:\n", + " return full_response, gr.update(visible=True, value=csv_buffer)\n", + " except Exception as e:\n", + " error_message = f\"Error creating in-memory file: {e}\"\n", + " return full_response + \"\\n\\n\" + error_message, gr.update(value=None, visible=False)\n", + "\n", + "\n", + "def create_dataset_ir(dataset_type, target_domain, instruction_type, diversity_prompt, number_of_samples):\n", + " global response_csv_columns_headers\n", + " system_message = f\"You are a dataset generator for Instruction-Response Pairs. Respond in csv format only, include the header, nothing extra and use the following columns {response_csv_columns_headers[dataset_type]} .\"\n", + " user_message = f\"For the target domain {target_domain} using the instruction type {instruction_type} create a {dataset_type} dataset. Make the question {diversity_prompt}. Create {number_of_samples} samples.\"\n", + " messages = combine_messages(system_message, user_message)\n", + " stream_generator = generate_stream_with_thread_gradio(messages, tokenizer, model)\n", + " full_response = \"\"\n", + " for update in stream_generator:\n", + " full_response = update\n", + " yield update, gr.update(visible=False) # Keep download link hidden during stream\n", + " # After streaming is complete, create the download file\n", + " try:\n", + " temp_file = tempfile.NamedTemporaryFile(\n", + " mode='w',\n", + " delete=False,\n", + " suffix='.csv',\n", + " encoding='utf-8'\n", + " )\n", + " temp_file.write(full_response)\n", + " temp_file.close()\n", + "\n", + " # Final yield with the complete text and the visible download link\n", + " yield full_response, gr.update(visible=True, value=temp_file.name)\n", + " except Exception as e:\n", + " error_message = f\"Error writing to file: {e}\"\n", + " yield full_response + \"\\n\\n\" + error_message, gr.update(visible=False)\n", + "\n", + "\n", + "def create_dataset_mtd(dataset_type, scenario_role, number_of_turns, conversation_goal, number_of_samples):\n", + " global response_csv_columns_headers\n", + " system_message = f\"You are a dataset generator for Multi-Turn Chat Dialogue. Respond in csv format only, include the header, nothing extra and use the following columns {response_csv_columns_headers[dataset_type]} .\"\n", + " user_message = f\"For the scenario role {scenario_role} with {number_of_turns} number of turns and the conversation goal {conversation_goal} create a {dataset_type} dataset. Create {number_of_samples} samples.\"\n", + " messages = combine_messages(system_message, user_message)\n", + " stream_generator = generate_stream_with_thread_gradio(messages, tokenizer, model)\n", + " full_response = \"\"\n", + " for update in stream_generator:\n", + " full_response = update\n", + " yield update, gr.update(visible=False) # Keep download link hidden during stream\n", + " try:\n", + " temp_file = tempfile.NamedTemporaryFile(\n", + " mode='w',\n", + " delete=False,\n", + " suffix='.csv',\n", + " encoding='utf-8'\n", + " )\n", + " temp_file.write(full_response)\n", + " temp_file.close()\n", + "\n", + " # Final yield with the complete text and the visible download link\n", + " yield full_response, gr.update(visible=True, value=temp_file.name)\n", + " except Exception as e:\n", + " error_message = f\"Error writing to file: {e}\"\n", + " yield full_response + \"\\n\\n\" + error_message, gr.update(visible=False)\n", + "\n", + "def create_dataset_tc(dataset_type, text_type, list_of_labels, number_of_samples):\n", + " global response_csv_columns_headers\n", + " system_message = f\"You are a dataset generator for Text Classification. Respond in csv format only, include the header, nothing extra and use the following columns {response_csv_columns_headers[dataset_type]} .\"\n", + " user_message = f\"For the text-type {text_type} and the labels {list_of_labels} create a {dataset_type} dataset. Create {number_of_samples} samples.\"\n", + " messages = combine_messages(system_message, user_message)\n", + " stream_generator = generate_stream_with_thread_gradio(messages, tokenizer, model)\n", + " full_response = \"\"\n", + " for update in stream_generator:\n", + " full_response = update\n", + " yield update, gr.update(visible=False) # Keep download link hidden during stream\n", + " try:\n", + " temp_file = tempfile.NamedTemporaryFile(\n", + " mode='w',\n", + " delete=False,\n", + " suffix='.csv',\n", + " encoding='utf-8'\n", + " )\n", + " temp_file.write(full_response)\n", + " temp_file.close()\n", + "\n", + " # Final yield with the complete text and the visible download link\n", + " yield full_response, gr.update(visible=True, value=temp_file.name)\n", + " except Exception as e:\n", + " error_message = f\"Error writing to file: {e}\"\n", + " yield full_response + \"\\n\\n\" + error_message, gr.update(visible=False)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 680 + }, + "executionInfo": { + "elapsed": 82700, + "status": "ok", + "timestamp": 1759468954578, + "user": { + "displayName": "thinsri@gmx.de", + "userId": "18422164867366802681" + }, + "user_tz": -120 + }, + "id": "lqrDsgEiwL_n", + "outputId": "81e41895-7c64-44ad-b05f-9b4b92030389" + }, + "outputs": [], + "source": [ + "import gradio as gr\n", + "\n", + "with gr.Blocks() as demo:\n", + " # 1. Main Selection\n", + " with gr.Row():\n", + " with gr.Column():\n", + " dataset_type = gr.Dropdown(\n", + " choices=dataset_types,\n", + " label=\"Dataset Type\",\n", + " value=None,\n", + " )\n", + "\n", + " with gr.Column(visible=False) as ir_container:\n", + " with gr.Row():\n", + " target_domain = gr.Textbox(label=\"Target Domain\")\n", + " instruction_type = gr.Textbox(label=\"Instruction Type\")\n", + " diversity_prompt = gr.Textbox(label=\"Diversity Prompt\")\n", + " number_of_samples_ir = gr.Textbox(label=\"Number of Samples\")\n", + " with gr.Row():\n", + " submit_ir = gr.Button(\"Submit\")\n", + "\n", + " ir_output = gr.Textbox(label=\"AI Response\", lines=10)\n", + " ir_download_link = gr.File(label=\"Download Generated Dataset (.csv)\", visible=False)\n", + "\n", + " with gr.Column(visible=False) as mtd_container:\n", + " with gr.Row():\n", + " scenario_role = gr.Textbox(label=\"Scenario Role\")\n", + " number_of_turns = gr.Textbox(label=\"Number of Turns\")\n", + " conversation_goal = gr.Textbox(label=\"Conversation Goal\")\n", + " number_of_samples_mtd = gr.Textbox(label=\"Number of Samples\")\n", + " with gr.Row():\n", + " submit_mtd = gr.Button(\"Submit\")\n", + "\n", + " mtd_output = gr.Textbox(label=\"AI Response\", lines=10)\n", + " mtd_download_link = gr.File(label=\"Download Generated Dataset (.csv)\", visible=False)\n", + "\n", + " with gr.Column(visible=False) as tc_container:\n", + " with gr.Row():\n", + " text_type = gr.Textbox(label=\"Text Type\")\n", + " list_of_labels = gr.Textbox(label=\"List of Labels\")\n", + " number_of_samples_tc = gr.Textbox(label=\"Number of Samples\")\n", + " with gr.Row():\n", + " submit_tc = gr.Button(\"Submit\")\n", + "\n", + " tc_output = gr.Textbox(label=\"AI Response\", lines=10)\n", + " tc_download_link = gr.File(label=\"Download Generated Dataset (.csv)\", visible=False)\n", + "\n", + " # Attach the listener\n", + " dataset_type.change(update_params, inputs=[dataset_type], outputs=[ir_container, mtd_container, tc_container])\n", + " submit_ir.click(create_dataset_ir, inputs=[dataset_type, target_domain, instruction_type, diversity_prompt, number_of_samples_ir], outputs=[ir_output, ir_download_link])\n", + " submit_mtd.click(create_dataset_mtd, inputs=[dataset_type, scenario_role, number_of_turns, conversation_goal, number_of_samples_mtd], outputs=[mtd_output, mtd_download_link])\n", + " submit_tc.click(create_dataset_tc, inputs=[dataset_type, text_type, list_of_labels, number_of_samples_tc], outputs=[tc_output, tc_download_link])\n", + "\n", + "\n", + "demo.launch(debug=True, share=True)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "QnbU-q7d2oGB" + }, + "source": [ + "## Refactored Code and named temp-file" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 680 + }, + "executionInfo": { + "elapsed": 75622, + "status": "ok", + "timestamp": 1759636078533, + "user": { + "displayName": "thinsri@gmx.de", + "userId": "18422164867366802681" + }, + "user_tz": -120 + }, + "id": "9sSdP7hy2oLt", + "outputId": "d99aea8e-feeb-4c9d-d2ee-4d090b295128" + }, + "outputs": [], + "source": [ + "import tempfile\n", + "import os\n", + "import gradio as gr\n", + "\n", + "dataset_types = [\"Instruction-Response Pairs\", \"Multi-Turn Chat Dialogue\", \"Text Classification\"]\n", + "\n", + "response_csv_columns_headers = {\n", + " \"Instruction-Response Pairs\": [\"instruction\", \"response\", \"domain\", \"complexity\"],\n", + " \"Multi-Turn Chat Dialogue\": [\"conversation_id\", \"turn_number\", \"role\", \"content\"],\n", + " \"Text Classification\": [\"text\", \"label\", \"sourcestyle\"],\n", + "}\n", + "\n", + "# System message templates for each dataset type\n", + "system_message_templates = {\n", + " \"Instruction-Response Pairs\": \"You are a dataset generator for Instruction-Response Pairs. Respond in csv format only, include the header, nothing extra and use the following columns {columns}.\",\n", + " \"Multi-Turn Chat Dialogue\": \"You are a dataset generator for Multi-Turn Chat Dialogue. Respond in csv format only, include the header, nothing extra and use the following columns {columns}.\",\n", + " \"Text Classification\": \"You are a dataset generator for Text Classification. Respond in csv format only, include the header, nothing extra and use the following columns {columns}.\",\n", + "}\n", + "\n", + "# User message templates for each dataset type\n", + "user_message_templates = {\n", + " \"Instruction-Response Pairs\": \"For the target domain {target_domain} using the instruction type {instruction_type} create a {dataset_type} dataset. Make the question {diversity_prompt}. Create {number_of_samples} samples.\",\n", + " \"Multi-Turn Chat Dialogue\": \"For the scenario role {scenario_role} with {number_of_turns} number of turns and the conversation goal {conversation_goal} create a {dataset_type} dataset. Create {number_of_samples} samples.\",\n", + " \"Text Classification\": \"For the text-type {text_type} and the labels {list_of_labels} create a {dataset_type} dataset. Create {number_of_samples} samples.\",\n", + "}\n", + "\n", + "def update_params(dataset_type):\n", + " \"\"\"Update visibility of parameter containers based on selected dataset type\"\"\"\n", + " hide_all = gr.update(visible=False)\n", + "\n", + " visibility_map = {\n", + " \"Instruction-Response Pairs\": (gr.update(visible=True), hide_all, hide_all),\n", + " \"Multi-Turn Chat Dialogue\": (hide_all, gr.update(visible=True), hide_all),\n", + " \"Text Classification\": (hide_all, hide_all, gr.update(visible=True)),\n", + " }\n", + "\n", + " return visibility_map.get(dataset_type, (hide_all, hide_all, hide_all))\n", + "\n", + "def combine_messages(system_message, user_message):\n", + " \"\"\"Combine system and user messages into a message list\"\"\"\n", + " return [\n", + " {\"role\": \"system\", \"content\": system_message},\n", + " {\"role\": \"user\", \"content\": user_message}\n", + " ]\n", + "\n", + "def create_dataset(dataset_type, **kwargs):\n", + " \"\"\"\n", + " Generic dataset creation function that handles all dataset types\n", + "\n", + " Args:\n", + " dataset_type: Type of dataset to generate\n", + " **kwargs: Dynamic parameters based on dataset type\n", + " \"\"\"\n", + " # Get the appropriate columns for this dataset type\n", + " columns = response_csv_columns_headers[dataset_type]\n", + "\n", + " # Create system message\n", + " system_message = system_message_templates[dataset_type].format(columns=columns)\n", + "\n", + " # Create user message with appropriate template and parameters\n", + " user_message = user_message_templates[dataset_type].format(\n", + " dataset_type=dataset_type,\n", + " **kwargs\n", + " )\n", + "\n", + " # Combine messages\n", + " messages = combine_messages(system_message, user_message)\n", + "\n", + " # Generate stream\n", + " stream_generator = generate_stream_with_thread_gradio(messages, tokenizer, model)\n", + " full_response = \"\"\n", + "\n", + " # Stream the text output\n", + " for update in stream_generator:\n", + " full_response = update\n", + " yield update, gr.update(visible=False) # Keep download hidden during stream\n", + "\n", + " # After streaming is complete, create the download file\n", + " try:\n", + " temp_filename = f\"generated_dataset_{dataset_type}.csv\"\n", + " temp_dir = tempfile.gettempdir()\n", + " temp_filepath = os.path.join(temp_dir, temp_filename)\n", + "\n", + " with open(temp_filepath, 'w', encoding='utf-8') as temp_file:\n", + " temp_file.write(full_response)\n", + "\n", + " # Final yield with the complete text and the visible download link\n", + " yield full_response, gr.update(visible=True, value=temp_file.name)\n", + " except Exception as e:\n", + " error_message = f\"Error writing to file: {e}\"\n", + " yield full_response + \"\\n\\n\" + error_message, gr.update(visible=False)\n", + "\n", + "# Wrapper functions for each dataset type (to handle different parameter names)\n", + "def create_dataset_ir(dataset_type, target_domain, instruction_type, diversity_prompt, number_of_samples):\n", + " \"\"\"Create Instruction-Response Pairs dataset\"\"\"\n", + " yield from create_dataset(\n", + " dataset_type,\n", + " target_domain=target_domain,\n", + " instruction_type=instruction_type,\n", + " diversity_prompt=diversity_prompt,\n", + " number_of_samples=number_of_samples\n", + " )\n", + "\n", + "def create_dataset_mtd(dataset_type, scenario_role, number_of_turns, conversation_goal, number_of_samples):\n", + " \"\"\"Create Multi-Turn Chat Dialogue dataset\"\"\"\n", + " yield from create_dataset(\n", + " dataset_type,\n", + " scenario_role=scenario_role,\n", + " number_of_turns=number_of_turns,\n", + " conversation_goal=conversation_goal,\n", + " number_of_samples=number_of_samples\n", + " )\n", + "\n", + "def create_dataset_tc(dataset_type, text_type, list_of_labels, number_of_samples):\n", + " \"\"\"Create Text Classification dataset\"\"\"\n", + " yield from create_dataset(\n", + " dataset_type,\n", + " text_type=text_type,\n", + " list_of_labels=list_of_labels,\n", + " number_of_samples=number_of_samples\n", + " )\n", + "\n", + "# Gradio UI\n", + "with gr.Blocks() as demo:\n", + " gr.Markdown(\"# Synthetic Dataset Generator\")\n", + "\n", + " # Main Selection\n", + " with gr.Row():\n", + " with gr.Column():\n", + " dataset_type = gr.Dropdown(\n", + " choices=dataset_types,\n", + " label=\"Dataset Type\",\n", + " value=None,\n", + " )\n", + "\n", + " # Instruction-Response Pairs Container\n", + " with gr.Column(visible=False) as ir_container:\n", + " gr.Markdown(\"### Instruction-Response Pairs Parameters\")\n", + " with gr.Row():\n", + " target_domain = gr.Textbox(label=\"Target Domain\")\n", + " instruction_type = gr.Textbox(label=\"Instruction Type\")\n", + " diversity_prompt = gr.Textbox(label=\"Diversity Prompt\")\n", + " number_of_samples_ir = gr.Textbox(label=\"Number of Samples\")\n", + " with gr.Row():\n", + " submit_ir = gr.Button(\"Generate Dataset\")\n", + "\n", + " ir_output = gr.Textbox(label=\"AI Response\", lines=10)\n", + " ir_download_link = gr.File(label=\"Download Generated Dataset (.csv)\", visible=False)\n", + "\n", + " # Multi-Turn Chat Dialogue Container\n", + " with gr.Column(visible=False) as mtd_container:\n", + " gr.Markdown(\"### Multi-Turn Chat Dialogue Parameters\")\n", + " with gr.Row():\n", + " scenario_role = gr.Textbox(label=\"Scenario Role\")\n", + " number_of_turns = gr.Textbox(label=\"Number of Turns\")\n", + " conversation_goal = gr.Textbox(label=\"Conversation Goal\")\n", + " number_of_samples_mtd = gr.Textbox(label=\"Number of Samples\")\n", + " with gr.Row():\n", + " submit_mtd = gr.Button(\"Generate Dataset\")\n", + "\n", + " mtd_output = gr.Textbox(label=\"AI Response\", lines=10)\n", + " mtd_download_link = gr.File(label=\"Download Generated Dataset (.csv)\", visible=False)\n", + "\n", + " # Text Classification Container\n", + " with gr.Column(visible=False) as tc_container:\n", + " gr.Markdown(\"### Text Classification Parameters\")\n", + " with gr.Row():\n", + " text_type = gr.Textbox(label=\"Text Type\")\n", + " list_of_labels = gr.Textbox(label=\"List of Labels\")\n", + " number_of_samples_tc = gr.Textbox(label=\"Number of Samples\")\n", + " with gr.Row():\n", + " submit_tc = gr.Button(\"Generate Dataset\")\n", + "\n", + " tc_output = gr.Textbox(label=\"AI Response\", lines=10)\n", + " tc_download_link = gr.File(label=\"Download Generated Dataset (.csv)\", visible=False)\n", + "\n", + " # Event Handlers\n", + " dataset_type.change(\n", + " update_params,\n", + " inputs=[dataset_type],\n", + " outputs=[ir_container, mtd_container, tc_container]\n", + " )\n", + "\n", + " submit_ir.click(\n", + " create_dataset_ir,\n", + " inputs=[dataset_type, target_domain, instruction_type, diversity_prompt, number_of_samples_ir],\n", + " outputs=[ir_output, ir_download_link],\n", + " )\n", + "\n", + " submit_mtd.click(\n", + " create_dataset_mtd,\n", + " inputs=[dataset_type, scenario_role, number_of_turns, conversation_goal, number_of_samples_mtd],\n", + " outputs=[mtd_output, mtd_download_link],\n", + " )\n", + "\n", + " submit_tc.click(\n", + " create_dataset_tc,\n", + " inputs=[dataset_type, text_type, list_of_labels, number_of_samples_tc],\n", + " outputs=[tc_output, tc_download_link],\n", + " )\n", + "\n", + "# Launch with share=True for Google Colab\n", + "demo.launch(debug=True, share=True)" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "authorship_tag": "ABX9TyONLEGpVh5gB9OpjnkJUf32", + "gpuType": "T4", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "name": "python" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "02bd1f7a767140ad9b9aa00b0f310b2b": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "03020848cfc34cdea1c65de137c624a7": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "06118d97f4ec45559a7851005f1cb4e1": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "0640dbea14a745fca2f6fa5f962ac775": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "0700a6f52fcd48d99f2c995b9474c963": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "0729eec1581f4b4197f59906685ab73a": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_dd0a074bd2d6426e9aea514e0a96a078", + "placeholder": "​", + "style": "IPY_MODEL_1f56cf9a7d5b4ba4889f302c909b889f", + "value": " 296/296 [00:00<00:00, 12.3kB/s]" + } + }, + "0885d35288444f80a4472c4dc759cb1b": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_2b83b912332742daa28ab4a657380a40", + "placeholder": "​", + "style": "IPY_MODEL_c38dd8628d2e4869a5ce5338c87a638f", + "value": " 5.00G/5.00G [02:58<00:00, 53.9MB/s]" + } + }, + "08a797751c55468aac4bdfbfee0d4d37": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_d9dc0681f3ec4f0cbb82562967f126d5", + "placeholder": "​", + "style": "IPY_MODEL_ae30150e12df4320b9048cb03354ef5f", + "value": "config.json: 100%" + } + }, + "08ddb7a3a5094d5ea3de669f0e03d462": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "0c32754d08584330abdf057ef5fde573": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_81324aba1d6a4a919aeddf13ace64b4a", + "IPY_MODEL_a17391b90bc64d49b80922037401fb64", + "IPY_MODEL_4da1b4471c194bda9ea105fb25769eac" + ], + "layout": "IPY_MODEL_5cd3b1a4fbcc4f0a910c5f1e1e250bf0" + } + }, + "10eb570b260b44e6a7d42b6332b897bf": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_6a1718c4158a4df1a4cafba544727cca", + "max": 184, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_d0b16c25ae5546a5ba491ff761206ae5", + "value": 184 + } + }, + "116c89c60a014cadbb281ba93a021cb7": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "15f33f0f77c641aba861879ddb66f655": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "1731cb921abc41acb30cf18028cf95f9": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "180ada3f5686478caff55bea4844ad82": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_7047676eed1142a5bf43d6bdced7e8c2", + "placeholder": "​", + "style": "IPY_MODEL_252d318144ca4a2386ae149cd4995107", + "value": " 855/855 [00:00<00:00, 34.4kB/s]" + } + }, + "18106bb7478c492e982b2485055d12a5": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "184dbc08613944b2bc40e90fe33d99b9": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "1b2837fb0147452a937b35ab8c585ade": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_cf3b4bfb03324c5a981041f866aed437", + "placeholder": "​", + "style": "IPY_MODEL_9cb0c3e048bc467fa53e4dc1383337fb", + "value": " 1.17G/1.17G [00:19<00:00, 101MB/s]" + } + }, + "1be17ce8b78141c687c21ce194bcc763": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "1f56cf9a7d5b4ba4889f302c909b889f": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "233da03e51fc4abd835b4f740e49ac92": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "252d318144ca4a2386ae149cd4995107": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "265f3a5a0b2d4cabab4f78bdedd6b957": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "2b83b912332742daa28ab4a657380a40": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "2bd50972a522409398a22ae76b74eb15": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_1731cb921abc41acb30cf18028cf95f9", + "max": 4, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_08ddb7a3a5094d5ea3de669f0e03d462", + "value": 4 + } + }, + "2d45fecd748e43b899dd99f00b436ed6": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "2dd4d35ebc8c4b9dafedda3b546fc11d": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "31cf47d5aec34a2b97d9efbba84da4e5": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_cf00d92f58174d279f65b2fa9e6fc29f", + "placeholder": "​", + "style": "IPY_MODEL_2d45fecd748e43b899dd99f00b436ed6", + "value": "model-00003-of-00004.safetensors: 100%" + } + }, + "346e0fb21dac44c9ac77bd495244183c": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "34e150d0acb748da98fb50f5372c57bc": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_c66628bfed614e039314a2ab42340d6f", + "max": 4, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_e0425d229f5f462e9367c09524e6545f", + "value": 4 + } + }, + "362043576d8444ccbe7dfb2796fb2511": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "36b3dd197efe44eb8d111b1ab1e31585": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "44d4522449874c7cab58e83fd7627903": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "45458cf2b8694deba8736af6fd57e8e6": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "46e335b5ef4e4dc19d4812b214392e16": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "48e600d9711540568e4de524e2993f4d": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "49a00770f8fe4ac195311535ee4901af": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "4aa823c0e15e40b8b328f6f9de3f7771": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "4da1b4471c194bda9ea105fb25769eac": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_02bd1f7a767140ad9b9aa00b0f310b2b", + "placeholder": "​", + "style": "IPY_MODEL_d397a857b7174694bf35f6a0bf258509", + "value": " 23.9k/23.9k [00:00<00:00, 1.01MB/s]" + } + }, + "4dcf7eb450a44f3aa98730b43030b0ac": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "4e3220619a2b4c358a9772de3a399bec": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_362043576d8444ccbe7dfb2796fb2511", + "max": 855, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_8144ecf2be9642308f86c57418490d18", + "value": 855 + } + }, + "559305d2cbbb4df3a8cf9f5fd3c2a43d": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_a772ad500ba440169299ebfa61f19cd2", + "IPY_MODEL_a687ec74cac4401f81ea29ee51db6cf4", + "IPY_MODEL_0885d35288444f80a4472c4dc759cb1b" + ], + "layout": "IPY_MODEL_184dbc08613944b2bc40e90fe33d99b9" + } + }, + "56c770bcdb944fbe86a159b9234e778e": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "5797f033785c486ea415d76938605ed2": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "5ba7e060c17f4c509c4e258e3a5e3211": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_31cf47d5aec34a2b97d9efbba84da4e5", + "IPY_MODEL_fd2a719ccbb44d769fa40282835f8286", + "IPY_MODEL_75f4741409a24842895649af0218d239" + ], + "layout": "IPY_MODEL_87b474e2d1a9424fa44498e1a09e4600" + } + }, + "5cd3b1a4fbcc4f0a910c5f1e1e250bf0": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "5f6804815ea94c229004b75300d6434e": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "6027176c5c824977be67e7a135c0184b": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_9f66fffe75b54a5c8a0d0cd0647460d4", + "placeholder": "​", + "style": "IPY_MODEL_851db003a3f44e609e2a50555ed15a6b", + "value": " 184/184 [00:00<00:00, 17.0kB/s]" + } + }, + "6037d1b48d6144b7a23c2422d07f2184": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "606541fab5144826b8ec524b52793023": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "60770906b5b14eefa6e2c35a4993f944": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_0700a6f52fcd48d99f2c995b9474c963", + "placeholder": "​", + "style": "IPY_MODEL_873a8cdf09734e7882ee1503e2aeb7c1", + "value": " 4/4 [01:10<00:00, 15.16s/it]" + } + }, + "624bb914918f4f1b972d0310030cc29a": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "6a0c785835d64ccfbe5c97f1ed647893": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_15f33f0f77c641aba861879ddb66f655", + "max": 4976698672, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_6037d1b48d6144b7a23c2422d07f2184", + "value": 4976698672 + } + }, + "6a1718c4158a4df1a4cafba544727cca": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "6aefe246643c479280124257988487d0": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_f35ea75c598e41eb9953f3f908577faa", + "IPY_MODEL_34e150d0acb748da98fb50f5372c57bc", + "IPY_MODEL_60770906b5b14eefa6e2c35a4993f944" + ], + "layout": "IPY_MODEL_91f21566f5d34ca785a18b3b372a510c" + } + }, + "6ce16005dd4941f296cfc225e65e6517": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "7047676eed1142a5bf43d6bdced7e8c2": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "71d9920f7c004a36908c35653c072690": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "733b3148561e4b67aff5cfe82c168bb2": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "744ef0af763041b69cc272bd549280a2": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_fea2a012f93d4830992a69d7f970f738", + "IPY_MODEL_75ebc5599c744a0a86ab66fcb7bc5639", + "IPY_MODEL_9080517ad5b5457e83425628cc1f7391" + ], + "layout": "IPY_MODEL_bbb47fb60e554cabbdcfd524e6558818" + } + }, + "7494d5c09ddb4259873b6e463adedbaa": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "75ebc5599c744a0a86ab66fcb7bc5639": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_af70eeabfa784e1f8b51be4feb13fad2", + "max": 55351, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_0640dbea14a745fca2f6fa5f962ac775", + "value": 55351 + } + }, + "75f4741409a24842895649af0218d239": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_44d4522449874c7cab58e83fd7627903", + "placeholder": "​", + "style": "IPY_MODEL_8394d3bb4d60475f9e64f78c3dc4c56a", + "value": " 4.92G/4.92G [03:21<00:00, 96.2MB/s]" + } + }, + "77914334d6114da2af45add566b6f5c5": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "7c1a57d1a6c243a09ce43e68669ad757": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_56c770bcdb944fbe86a159b9234e778e", + "placeholder": "​", + "style": "IPY_MODEL_a6d76986d203472190863f12fcf83e7b", + "value": " 4/4 [08:39<00:00, 114.14s/it]" + } + }, + "81324aba1d6a4a919aeddf13ace64b4a": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_624bb914918f4f1b972d0310030cc29a", + "placeholder": "​", + "style": "IPY_MODEL_5797f033785c486ea415d76938605ed2", + "value": "model.safetensors.index.json: 100%" + } + }, + "8144ecf2be9642308f86c57418490d18": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "81838418eb6743e2b6d4bb274514c157": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "8394d3bb4d60475f9e64f78c3dc4c56a": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "851db003a3f44e609e2a50555ed15a6b": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "873a8cdf09734e7882ee1503e2aeb7c1": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "87b474e2d1a9424fa44498e1a09e4600": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "8cefe6b259324768837e84e704862876": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "9080517ad5b5457e83425628cc1f7391": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_c8c2f465ffe14afc99a2ab529189208a", + "placeholder": "​", + "style": "IPY_MODEL_bcf0d83c6828416681d05d97d342ba7f", + "value": " 55.4k/55.4k [00:00<00:00, 2.66MB/s]" + } + }, + "91f21566f5d34ca785a18b3b372a510c": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "93604978de264369af6cad9606e3af9e": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_48e600d9711540568e4de524e2993f4d", + "placeholder": "​", + "style": "IPY_MODEL_b336bc5c759e443184406160a1ded0ce", + "value": "special_tokens_map.json: 100%" + } + }, + "936883d594dd4e1c910579b998de29bd": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "9cb0c3e048bc467fa53e4dc1383337fb": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "9f5f8656edca4ab693c662d7f2bf75ec": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_08a797751c55468aac4bdfbfee0d4d37", + "IPY_MODEL_4e3220619a2b4c358a9772de3a399bec", + "IPY_MODEL_180ada3f5686478caff55bea4844ad82" + ], + "layout": "IPY_MODEL_606541fab5144826b8ec524b52793023" + } + }, + "9f66fffe75b54a5c8a0d0cd0647460d4": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "a17391b90bc64d49b80922037401fb64": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_36b3dd197efe44eb8d111b1ab1e31585", + "max": 23950, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_a696bd2c5a8e464494112c298946b3e1", + "value": 23950 + } + }, + "a1d1a045be664a1784191d2f4dfb12e5": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_18106bb7478c492e982b2485055d12a5", + "max": 1168138808, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_e9445fdbd5f640e2b7dacb72d3428057", + "value": 1168138808 + } + }, + "a45de69949ff4a81a6fd4604a2b4678e": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_ab907069ad414d02952a30a0a014cb73", + "IPY_MODEL_2bd50972a522409398a22ae76b74eb15", + "IPY_MODEL_7c1a57d1a6c243a09ce43e68669ad757" + ], + "layout": "IPY_MODEL_c9f2a0612c144ab5a941084b762ce3e2" + } + }, + "a5cf1251bf4c4023b6dc3e1878740085": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "a687ec74cac4401f81ea29ee51db6cf4": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_81838418eb6743e2b6d4bb274514c157", + "max": 4999802720, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_5f6804815ea94c229004b75300d6434e", + "value": 4999802720 + } + }, + "a696bd2c5a8e464494112c298946b3e1": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "a6d76986d203472190863f12fcf83e7b": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "a772ad500ba440169299ebfa61f19cd2": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_6ce16005dd4941f296cfc225e65e6517", + "placeholder": "​", + "style": "IPY_MODEL_45458cf2b8694deba8736af6fd57e8e6", + "value": "model-00002-of-00004.safetensors: 100%" + } + }, + "ab907069ad414d02952a30a0a014cb73": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_a5cf1251bf4c4023b6dc3e1878740085", + "placeholder": "​", + "style": "IPY_MODEL_46e335b5ef4e4dc19d4812b214392e16", + "value": "Downloading shards: 100%" + } + }, + "ad2d2da8f0c942508463f22179819ad9": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_93604978de264369af6cad9606e3af9e", + "IPY_MODEL_e82012bce3b04a60926f6bccf50b0c5e", + "IPY_MODEL_0729eec1581f4b4197f59906685ab73a" + ], + "layout": "IPY_MODEL_2dd4d35ebc8c4b9dafedda3b546fc11d" + } + }, + "ada15f657b5549f3a0ebd42c87edae53": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_e2bc87a955534be495f05e9bc1b45029", + "IPY_MODEL_cda6d00d5daf4f06a58f36655b431293", + "IPY_MODEL_b663362b43ed4e47b5daad7414b2db06" + ], + "layout": "IPY_MODEL_03020848cfc34cdea1c65de137c624a7" + } + }, + "ae30150e12df4320b9048cb03354ef5f": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "af70eeabfa784e1f8b51be4feb13fad2": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "b336bc5c759e443184406160a1ded0ce": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "b529628f69c5467294a9bd230210f1dd": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_06118d97f4ec45559a7851005f1cb4e1", + "placeholder": "​", + "style": "IPY_MODEL_e986b132445440429a6d77bebf031eea", + "value": "model-00004-of-00004.safetensors: 100%" + } + }, + "b61e3d318e4b44b0890ac56e787892c5": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "b663362b43ed4e47b5daad7414b2db06": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_936883d594dd4e1c910579b998de29bd", + "placeholder": "​", + "style": "IPY_MODEL_ec2f50127c5f461b892cd1cce3823dec", + "value": " 9.09M/9.09M [00:00<00:00, 18.6MB/s]" + } + }, + "bbb47fb60e554cabbdcfd524e6558818": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "bcf0d83c6828416681d05d97d342ba7f": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "c1e39012677246a09e42d20733c9e174": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "c38dd8628d2e4869a5ce5338c87a638f": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "c3e6c70c163a45439ba12ef559a0e6c6": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "c66628bfed614e039314a2ab42340d6f": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "c8c2f465ffe14afc99a2ab529189208a": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "c9f2a0612c144ab5a941084b762ce3e2": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "cbc8acacf1024ebfb8db8fec7d3601ce": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "cda6d00d5daf4f06a58f36655b431293": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_7494d5c09ddb4259873b6e463adedbaa", + "max": 9085657, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_b61e3d318e4b44b0890ac56e787892c5", + "value": 9085657 + } + }, + "cec143ff825146659d71076cc4f238a5": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_b529628f69c5467294a9bd230210f1dd", + "IPY_MODEL_a1d1a045be664a1784191d2f4dfb12e5", + "IPY_MODEL_1b2837fb0147452a937b35ab8c585ade" + ], + "layout": "IPY_MODEL_4aa823c0e15e40b8b328f6f9de3f7771" + } + }, + "cf00d92f58174d279f65b2fa9e6fc29f": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "cf3b4bfb03324c5a981041f866aed437": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "d0b16c25ae5546a5ba491ff761206ae5": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "d397a857b7174694bf35f6a0bf258509": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "d9dc0681f3ec4f0cbb82562967f126d5": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "dd0a074bd2d6426e9aea514e0a96a078": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "deacaa0a0a314886bce184524764215d": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "e0425d229f5f462e9367c09524e6545f": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "e2bc87a955534be495f05e9bc1b45029": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_1be17ce8b78141c687c21ce194bcc763", + "placeholder": "​", + "style": "IPY_MODEL_deacaa0a0a314886bce184524764215d", + "value": "tokenizer.json: 100%" + } + }, + "e3a77622925b4ce4986af76f3fb292ed": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_71d9920f7c004a36908c35653c072690", + "placeholder": "​", + "style": "IPY_MODEL_233da03e51fc4abd835b4f740e49ac92", + "value": " 4.98G/4.98G [01:59<00:00, 66.1MB/s]" + } + }, + "e5939b22aecd47168c7e7da7daec611e": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "e82012bce3b04a60926f6bccf50b0c5e": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_4dcf7eb450a44f3aa98730b43030b0ac", + "max": 296, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_346e0fb21dac44c9ac77bd495244183c", + "value": 296 + } + }, + "e9445fdbd5f640e2b7dacb72d3428057": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "e986b132445440429a6d77bebf031eea": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "ea47a5153ad445f3a28820d6c77fefff": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "ec2f50127c5f461b892cd1cce3823dec": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "ece91278c6df45d39e347a02842173fa": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_c3e6c70c163a45439ba12ef559a0e6c6", + "placeholder": "​", + "style": "IPY_MODEL_cbc8acacf1024ebfb8db8fec7d3601ce", + "value": "generation_config.json: 100%" + } + }, + "ed0cc53b0df34c0e8bf230bc68ae4410": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_ece91278c6df45d39e347a02842173fa", + "IPY_MODEL_10eb570b260b44e6a7d42b6332b897bf", + "IPY_MODEL_6027176c5c824977be67e7a135c0184b" + ], + "layout": "IPY_MODEL_49a00770f8fe4ac195311535ee4901af" + } + }, + "f173fc0b2cd740be83d262e1bf9dbe08": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_fc00ecc3f0b04687ab7ae379ff2d2bdc", + "placeholder": "​", + "style": "IPY_MODEL_265f3a5a0b2d4cabab4f78bdedd6b957", + "value": "model-00001-of-00004.safetensors: 100%" + } + }, + "f35ea75c598e41eb9953f3f908577faa": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_116c89c60a014cadbb281ba93a021cb7", + "placeholder": "​", + "style": "IPY_MODEL_e5939b22aecd47168c7e7da7daec611e", + "value": "Loading checkpoint shards: 100%" + } + }, + "f785f0b858d84f2385bd94e8d67cddf0": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_f173fc0b2cd740be83d262e1bf9dbe08", + "IPY_MODEL_6a0c785835d64ccfbe5c97f1ed647893", + "IPY_MODEL_e3a77622925b4ce4986af76f3fb292ed" + ], + "layout": "IPY_MODEL_c1e39012677246a09e42d20733c9e174" + } + }, + "fc00ecc3f0b04687ab7ae379ff2d2bdc": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "fd2a719ccbb44d769fa40282835f8286": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_8cefe6b259324768837e84e704862876", + "max": 4915916176, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_733b3148561e4b67aff5cfe82c168bb2", + "value": 4915916176 + } + }, + "fea2a012f93d4830992a69d7f970f738": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_ea47a5153ad445f3a28820d6c77fefff", + "placeholder": "​", + "style": "IPY_MODEL_77914334d6114da2af45add566b6f5c5", + "value": "tokenizer_config.json: 100%" + } + } + } + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} From e4fd46ad672f62086a84f2b82d65ce44d995e268 Mon Sep 17 00:00:00 2001 From: Uday Slathia <127138307+udayslathia16@users.noreply.github.com> Date: Thu, 9 Oct 2025 12:45:04 +0530 Subject: [PATCH 3/5] Add files via upload --- .../Synthetic Dataset Generator/README.md | 251 +++++++++ .../Synthetic Dataset Generator/app.ipynb | 502 ++++++++++++++++++ .../requirements.txt | 5 + 3 files changed, 758 insertions(+) create mode 100644 week3/community-contributions/Synthetic Dataset Generator/README.md create mode 100644 week3/community-contributions/Synthetic Dataset Generator/app.ipynb create mode 100644 week3/community-contributions/Synthetic Dataset Generator/requirements.txt diff --git a/week3/community-contributions/Synthetic Dataset Generator/README.md b/week3/community-contributions/Synthetic Dataset Generator/README.md new file mode 100644 index 0000000..9c46342 --- /dev/null +++ b/week3/community-contributions/Synthetic Dataset Generator/README.md @@ -0,0 +1,251 @@ +# 🤖 Synthetic Dataset Generator +## AI-Powered Synthetic Data Generation with Claude 3 Haiku +## 📥 Installation + +### 1️⃣ Clone the Repository + +```bash +git clone https://github.com/yourusername/synthetic-dataset-generator.git +cd synthetic-dataset-generator +``` + +### 2️⃣ Create Virtual Environment (Recommended) + +```bash +# Windows +python -m venv venv +venv\Scripts\activate + +# macOS/Linux +python3 -m venv venv +source venv/bin/activate +``` + +### 3️⃣ Install Dependencies + +```bash +pip install -r requirements.txt +``` + +**Requirements file (`requirements.txt`):** +```txt +gradio>=4.0.0 +anthropic>=0.25.0 +pandas>=1.5.0 +python-dotenv>=1.0.0 +httpx==0.27.2 +``` + +### 4️⃣ Set Up API Key + +Create a `.env` file in the project root: + +```bash +# .env +ANTHROPIC_API_KEY=your_api_key_here +``` + +> **Note**: Never commit your `.env` file to version control. Add it to `.gitignore`. + +--- + +## 🚀 Usage + +### Running the Application + +```bash +python app.ipynb +``` + +The Gradio interface will launch at `http://localhost:7860` + +### Basic Workflow + +1. **Enter API Key** (if not in `.env`) +2. **Describe Your Schema** in plain English +3. **Set Number of Records** (1-200) +4. **Add Example Format** (optional, but recommended) +5. **Click Generate** 🎉 +6. **Download CSV** when ready + +--- + +## 📝 Example Schemas + +### 👥 Customer Data +``` +Generate customer data with: +- customer_id (format: CUST-XXXX) +- name (full name) +- email (valid email address) +- age (between 18-80) +- city (US cities) +- purchase_amount (between $10-$1000) +- join_date (dates in 2023-2024) +- subscription_type (Free, Basic, Premium) +``` + +### 👨‍💼 Employee Records +``` +Generate employee records with: +- employee_id (format: EMP001, EMP002, etc.) +- name (full name) +- department (Engineering, Sales, Marketing, HR, Finance) +- salary (between $40,000-$150,000) +- hire_date (between 2020-2024) +- performance_rating (1-5) +- is_remote (true/false) +``` + +### 🛒 E-commerce Products +``` +Generate e-commerce product data with: +- product_id (format: PRD-XXXX) +- product_name (creative product names) +- category (Electronics, Clothing, Home, Books, Sports) +- price (between $5-$500) +- stock_quantity (between 0-1000) +- rating (1.0-5.0) +- num_reviews (0-500) +- in_stock (true/false) +``` + +--- + +## 🎯 Advanced Usage + +### Batch Generation + +For datasets larger than 50 records, the tool automatically: +- Splits generation into batches of 50 +- Combines results into a single dataset +- Prevents API timeout issues + +### Custom Formats + +Provide example JSON to guide the output format: + +```json +{ + "id": "USR-001", + "name": "Jane Smith", + "email": "jane.smith@example.com", + "created_at": "2024-01-15T10:30:00Z" +} +``` + +--- + +## 🔧 Troubleshooting + +### ❌ Error: `proxies` keyword argument + +**Solution**: Downgrade httpx to compatible version + +```bash +pip install "httpx==0.27.2" +``` + +Then restart your Python kernel/terminal. + +### ❌ API Key Not Found + +**Solutions**: +1. Check `.env` file exists in project root +2. Verify `ANTHROPIC_API_KEY` is spelled correctly +3. Ensure no extra spaces in the `.env` file +4. Restart the application after creating `.env` + +### ❌ JSON Parsing Error + +**Solutions**: +1. Make your schema description more specific +2. Add an example format +3. Reduce the number of records per batch +4. Check your API key has sufficient credits + +### ❌ Rate Limit Errors + +**Solutions**: +1. Reduce batch size in code (change `batch_size=50` to `batch_size=20`) +2. Add delays between batches +3. Upgrade your Anthropic API plan + +--- + +## 📊 Output Format + +### DataFrame Preview +View generated data directly in the browser with scrollable table. + +### CSV Download +- Automatic CSV generation +- Proper encoding (UTF-8) +- No index column +- Ready for Excel, Pandas, or any data tool + +--- + +## 🧑‍💻 Skill Level + +**Beginner Friendly** ✅ + +- No ML/AI expertise required +- Basic Python knowledge helpful +- Simple natural language interface +- Pre-configured examples included + +--- + +## 💡 Tips for Best Results + +1. **Be Specific**: Include data types, ranges, and formats +2. **Use Examples**: Provide sample JSON for complex schemas +3. **Start Small**: Test with 5-10 records before scaling up +4. **Iterate**: Refine your schema based on initial results +5. **Validate**: Check the first few records before using the entire dataset + +--- + +## 🤝 Contributing + +Contributions are welcome! Please feel free to submit a Pull Request. + +1. Fork the repository +2. Create your feature branch +3. Commit your changes +4. Push to the branch +5. Open a Pull Request + +--- + + +## 🙏 Acknowledgments + +- **Anthropic** for the Claude API +- **Gradio** for the UI framework +- **Pandas** for data manipulation + +--- + +## 📞 Support + +- 📧 Email: udayslathia16@gmail.com + +--- + +## 🔗 Related Projects + +- [Claude API Documentation](https://docs.anthropic.com/) +- [Gradio Documentation](https://gradio.app/docs/) +- [Pandas Documentation](https://pandas.pydata.org/) + +--- + +
+ +**Made with ❤️ using Claude 3 Haiku** + +⭐ Star this repo if you find it useful! + +
\ No newline at end of file diff --git a/week3/community-contributions/Synthetic Dataset Generator/app.ipynb b/week3/community-contributions/Synthetic Dataset Generator/app.ipynb new file mode 100644 index 0000000..4773307 --- /dev/null +++ b/week3/community-contributions/Synthetic Dataset Generator/app.ipynb @@ -0,0 +1,502 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "1faf5626-864e-4287-af11-535f9a3f59ae", + "metadata": {}, + "source": [ + "# 🤖 Synthetic Dataset Generator\n", + "## AI-Powered Synthetic Data Generation with Claude 3 Haiku\n", + "Generate custom synthetic datasets by simply describing your data schema. This tool uses Claude 3 Haiku to create realistic, diverse datasets for testing, development, and prototyping.\n", + "\n", + "![Alt text](https://img.shields.io/badge/python-3.10+-blue.svg) ![Alt text](https://img.shields.io/badge/License-MIT-yellow.svg) \n", + "\n", + "## ✨ Features\n", + "\n", + "- 🎯 Schema-Based Generation - Describe your data structure in plain English\n", + "- 🚀 Fast & Efficient - Powered by Claude 3 Haiku for cost-effective generation\n", + "- 📊 Batch Processing - Automatically handles large datasets (200+ records)\n", + "- 💾 Export Ready - Download as CSV for immediate use\n", + "- 🎨 User-Friendly UI - Built with Gradio for easy interaction\n", + "- 🔒 Secure - API key management via .env files\n", + "- 📝 Built-in Examples - Pre-configured schemas for common use cases\n", + "\n", + "## 🌍 Use Cases\n", + "\n", + "+ 🧪 Testing & Development - Generate test data for applications\n", + "+ 📈 Data Science - Create training datasets for ML models\n", + "+ 🎓 Education - Generate sample datasets for learning\n", + "+ 🏢 Prototyping - Quick data mockups for demos\n", + "+ 🔬 Research - Synthetic data for experiments\n", + "\n", + "## 🧠 Model\n", + "\n", + "- AI Model: Anthropic's claude-3-haiku-20240307\n", + "-Task: Structured data generation based on natural language schemas\n", + "- Output Format: JSON arrays converted to Pandas DataFrames and CSV\n", + "\n", + "## 🛠️ Requirements\n", + "### ⚙️ Hardware\n", + "\n", + "- ✅ CPU is sufficient — No GPU required\n", + "- 💾 Minimal RAM (2GB+)\n", + "\n", + "### 📦 Software\n", + "\n", + "Python 3.8 or higher\n", + "Anthropic API Key \n", + "\n", + "### Take the help of (`README.md`) for errors" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "id": "7ece01a4-0676-4176-86b9-91b0be3a9786", + "metadata": {}, + "outputs": [], + "source": [ + "import gradio as gr\n", + "import json\n", + "import pandas as pd\n", + "from typing import List, Dict\n", + "import os\n", + "from dotenv import load_dotenv\n", + "import tempfile" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "id": "01665d8a-c483-48c7-92e1-0d92ca4c9731", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Load environment variables from .env file\n", + "load_dotenv()" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "id": "3cf53df7-175a-46b0-8508-a8ae34afb65b", + "metadata": {}, + "outputs": [], + "source": [ + "# Get API key from environment\n", + "ANTHROPIC_API_KEY = os.getenv('ANTHROPIC_API_KEY')" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "id": "53a0686e-26c7-49c0-b048-a113be756c7c", + "metadata": {}, + "outputs": [], + "source": [ + "# Import anthropic after other imports to avoid conflicts\n", + "try:\n", + " from anthropic import Anthropic, APIError\n", + "except ImportError:\n", + " import anthropic\n", + " Anthropic = anthropic.Anthropic\n", + " APIError = anthropic.APIError\n" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "id": "5f9cb807-ad4c-45b1-bedf-d342a14ebe4a", + "metadata": {}, + "outputs": [], + "source": [ + "# Initialize Anthropic client\n", + "def create_client(api_key: str):\n", + " \"\"\"Create Anthropic client with proper initialization\"\"\"\n", + " try:\n", + " # Try normal initialization\n", + " return Anthropic(api_key=api_key)\n", + " except TypeError as e:\n", + " if 'proxies' in str(e):\n", + " # Workaround for httpx version mismatch\n", + " import httpx\n", + " # Create a basic httpx client without proxies\n", + " http_client = httpx.Client()\n", + " return Anthropic(api_key=api_key, http_client=http_client)\n", + " else:\n", + " raise e\n" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "id": "dea61271-a138-4f9b-979e-77a998a6950c", + "metadata": {}, + "outputs": [], + "source": [ + "def generate_synthetic_data(\n", + " api_key: str,\n", + " schema_description: str,\n", + " num_records: int,\n", + " example_format: str = \"\"\n", + ") -> tuple:\n", + " \"\"\"\n", + " Generate synthetic dataset using Claude 3 Haiku\n", + " \n", + " Args:\n", + " api_key: Anthropic API key\n", + " schema_description: Description of the data schema\n", + " num_records: Number of records to generate\n", + " example_format: Optional example of desired format\n", + " \n", + " Returns:\n", + " tuple: (DataFrame, status message, csv_file_path)\n", + " \"\"\"\n", + " try:\n", + " # Create client\n", + " client = create_client(api_key)\n", + " \n", + " # Construct the prompt\n", + " example_section = f\"\\n\\nExample format:\\n{example_format}\" if example_format else \"\"\n", + " \n", + " prompt = f\"\"\"Generate {num_records} synthetic data records based on the following schema:\n", + "\n", + "{schema_description}{example_section}\n", + "\n", + "Requirements:\n", + "1. Return ONLY a valid JSON array of objects\n", + "2. Each object should be one record matching the schema\n", + "3. Make the data realistic and diverse\n", + "4. Ensure data types are appropriate (strings, numbers, booleans, dates, etc.)\n", + "5. Do not include any explanation, only the JSON array\n", + "\n", + "Generate exactly {num_records} records.\"\"\"\n", + "\n", + " # Call Claude API with explicit parameters\n", + " message = client.messages.create(\n", + " model=\"claude-3-haiku-20240307\",\n", + " max_tokens=4096,\n", + " messages=[\n", + " {\"role\": \"user\", \"content\": prompt}\n", + " ]\n", + " )\n", + " \n", + " # Extract the response\n", + " response_text = message.content[0].text\n", + " \n", + " # Try to parse JSON from the response\n", + " # Sometimes Claude might wrap it in markdown code blocks\n", + " if \"```json\" in response_text:\n", + " json_str = response_text.split(\"```json\")[1].split(\"```\")[0].strip()\n", + " elif \"```\" in response_text:\n", + " json_str = response_text.split(\"```\")[1].split(\"```\")[0].strip()\n", + " else:\n", + " json_str = response_text.strip()\n", + " \n", + " # Parse JSON\n", + " data = json.loads(json_str)\n", + " \n", + " # Convert to DataFrame\n", + " df = pd.DataFrame(data)\n", + " \n", + " # Save to temporary CSV file with proper path\n", + " fd, temp_path = tempfile.mkstemp(suffix='.csv', prefix='synthetic_data_')\n", + " os.close(fd) # Close the file descriptor\n", + " \n", + " # Write CSV to the temp file\n", + " df.to_csv(temp_path, index=False)\n", + " \n", + " status = f\"✅ Successfully generated {len(df)} records!\"\n", + " return df, status, temp_path\n", + " \n", + " except json.JSONDecodeError as e:\n", + " return None, f\"❌ Error parsing JSON: {str(e)}\\n\\nResponse received:\\n{response_text[:500] if 'response_text' in locals() else 'N/A'}...\", None\n", + " except APIError as e:\n", + " return None, f\"❌ API Error: {str(e)}\", None\n", + " except Exception as e:\n", + " return None, f\"❌ Error: {type(e).__name__}: {str(e)}\", None" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "id": "aa95c2aa-ac99-4919-94bd-981cb7bd42b7", + "metadata": {}, + "outputs": [], + "source": [ + "def generate_batch_data(\n", + " api_key: str,\n", + " schema_description: str,\n", + " total_records: int,\n", + " example_format: str = \"\",\n", + " batch_size: int = 50\n", + ") -> tuple:\n", + " \"\"\"\n", + " Generate larger datasets in batches\n", + " \"\"\"\n", + " all_data = []\n", + " batches = (total_records + batch_size - 1) // batch_size\n", + " \n", + " for i in range(batches):\n", + " records_in_batch = min(batch_size, total_records - len(all_data))\n", + " df_batch, status, csv_path = generate_synthetic_data(\n", + " api_key, schema_description, records_in_batch, example_format\n", + " )\n", + " \n", + " if df_batch is not None:\n", + " all_data.extend(df_batch.to_dict('records'))\n", + " else:\n", + " return None, f\"❌ Error in batch {i+1}: {status}\", None\n", + " \n", + " final_df = pd.DataFrame(all_data)\n", + " \n", + " # Save final CSV with proper temp file handling\n", + " fd, temp_path = tempfile.mkstemp(suffix='.csv', prefix='synthetic_data_batch_')\n", + " os.close(fd)\n", + " \n", + " final_df.to_csv(temp_path, index=False)\n", + " \n", + " status = f\"✅ Successfully generated {len(final_df)} records in {batches} batches!\"\n", + " return final_df, status, temp_path\n" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "id": "b73aff00-c0c0-43d4-96a9-43b0cd84de2b", + "metadata": {}, + "outputs": [], + "source": [ + "# Create Gradio Interface\n", + "def create_interface():\n", + " with gr.Blocks(title=\"Synthetic Dataset Generator\", theme=gr.themes.Soft()) as demo:\n", + " gr.Markdown(\"\"\"\n", + " # 🤖 Synthetic Dataset Generator\n", + " ### Powered by Claude 3 Haiku\n", + " \n", + " Create custom synthetic datasets by describing your schema. Claude will generate realistic data matching your specifications.\n", + " \"\"\")\n", + " \n", + " with gr.Row():\n", + " with gr.Column(scale=1):\n", + " # Show API key input only if not found in environment\n", + " if not ANTHROPIC_API_KEY:\n", + " api_key_input = gr.Textbox(\n", + " label=\"Anthropic API Key\",\n", + " type=\"password\",\n", + " placeholder=\"sk-ant-...\",\n", + " info=\"API key not found in .env file\"\n", + " )\n", + " else:\n", + " api_key_input = gr.Textbox(\n", + " label=\"Anthropic API Key\",\n", + " type=\"password\",\n", + " value=ANTHROPIC_API_KEY,\n", + " placeholder=\"Loaded from .env\",\n", + " info=\"✅ API key loaded from environment\",\n", + " interactive=False\n", + " )\n", + " \n", + " schema_input = gr.Textbox(\n", + " label=\"Data Schema Description\",\n", + " placeholder=\"\"\"Example: Generate customer data with:\n", + "- name (full name)\n", + "- email (valid email address)\n", + "- age (between 18-80)\n", + "- city (US cities)\n", + "- purchase_amount (between $10-$1000)\n", + "- join_date (dates in 2023-2024)\"\"\",\n", + " lines=10\n", + " )\n", + " \n", + " example_input = gr.Textbox(\n", + " label=\"Example Format (Optional)\",\n", + " placeholder=\"\"\"{\"name\": \"John Doe\", \"email\": \"john@example.com\", \"age\": 35, \"city\": \"New York\", \"purchase_amount\": 299.99, \"join_date\": \"2023-05-15\"}\"\"\",\n", + " lines=4\n", + " )\n", + " \n", + " num_records = gr.Slider(\n", + " minimum=1,\n", + " maximum=200,\n", + " value=10,\n", + " step=1,\n", + " label=\"Number of Records\"\n", + " )\n", + " \n", + " generate_btn = gr.Button(\"🚀 Generate Dataset\", variant=\"primary\")\n", + " \n", + " with gr.Column(scale=2):\n", + " status_output = gr.Textbox(label=\"Status\", lines=2)\n", + " dataframe_output = gr.Dataframe(\n", + " label=\"Generated Dataset\",\n", + " wrap=True\n", + " )\n", + " csv_output = gr.File(label=\"Download CSV\", type=\"filepath\")\n", + " \n", + " # Examples\n", + " gr.Markdown(\"### 📝 Example Schemas\")\n", + " gr.Examples(\n", + " examples=[\n", + " [\n", + " \"\"\"Generate employee records with:\n", + "- employee_id (format: EMP001, EMP002, etc.)\n", + "- name (full name)\n", + "- department (Engineering, Sales, Marketing, HR, Finance)\n", + "- salary (between $40,000-$150,000)\n", + "- hire_date (between 2020-2024)\n", + "- performance_rating (1-5)\"\"\",\n", + " 10\n", + " ],\n", + " [\n", + " \"\"\"Generate e-commerce product data with:\n", + "- product_id (format: PRD-XXXX)\n", + "- product_name (creative product names)\n", + "- category (Electronics, Clothing, Home, Books, Sports)\n", + "- price (between $5-$500)\n", + "- stock_quantity (between 0-1000)\n", + "- rating (1.0-5.0)\n", + "- num_reviews (0-500)\"\"\",\n", + " 15\n", + " ],\n", + " [\n", + " \"\"\"Generate student records with:\n", + "- student_id (format: STU2024XXX)\n", + "- name (full name)\n", + "- major (Computer Science, Biology, Business, Arts, Engineering)\n", + "- gpa (2.0-4.0)\n", + "- year (Freshman, Sophomore, Junior, Senior)\n", + "- credits_completed (0-120)\"\"\",\n", + " 20\n", + " ]\n", + " ],\n", + " inputs=[schema_input, num_records]\n", + " )\n", + " \n", + " def generate_wrapper(api_key, schema, num_rec, example):\n", + " # Use environment API key if available, otherwise use input\n", + " final_api_key = ANTHROPIC_API_KEY or api_key\n", + " \n", + " if not final_api_key:\n", + " return None, \"❌ Please provide your Anthropic API key (either in .env file or input field)\", None\n", + " if not schema:\n", + " return None, \"❌ Please describe your data schema\", None\n", + " \n", + " # For larger datasets, use batch generation\n", + " if num_rec > 50:\n", + " return generate_batch_data(final_api_key, schema, num_rec, example)\n", + " else:\n", + " return generate_synthetic_data(final_api_key, schema, num_rec, example)\n", + " \n", + " generate_btn.click(\n", + " fn=generate_wrapper,\n", + " inputs=[api_key_input, schema_input, num_records, example_input],\n", + " outputs=[dataframe_output, status_output, csv_output]\n", + " )\n", + " \n", + " gr.Markdown(\"\"\"\n", + " ---\n", + " ### 💡 Tips:\n", + " - Be specific about data types, ranges, and formats\n", + " - Provide examples for better results\n", + " - For large datasets (>50 records), generation happens in batches\n", + " - Claude 3 Haiku is fast and cost-effective for this task\n", + " \n", + " ### 🔑 API Key Setup:\n", + " Create a `.env` file in the same directory with:\n", + " ```\n", + " ANTHROPIC_API_KEY=your_api_key_here\n", + " ```\n", + " \n", + " ### ⚠️ Troubleshooting:\n", + " If you see a \"proxies\" error, update httpx:\n", + " ```\n", + " pip install --upgrade httpx\n", + " ```\n", + " \"\"\")\n", + " \n", + " return demo\n" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "id": "cef71337-b446-46b2-b84b-d23b7dd4f13e", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "* Running on local URL: http://127.0.0.1:7867\n", + "\n", + "To create a public link, set `share=True` in `launch()`.\n" + ] + }, + { + "data": { + "text/html": [ + "
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [] + }, + "execution_count": 40, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "demo = create_interface()\n", + "demo.launch()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ec34fee8-eeb1-4015-95fe-62276927d25a", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.10" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/week3/community-contributions/Synthetic Dataset Generator/requirements.txt b/week3/community-contributions/Synthetic Dataset Generator/requirements.txt new file mode 100644 index 0000000..5ad09e6 --- /dev/null +++ b/week3/community-contributions/Synthetic Dataset Generator/requirements.txt @@ -0,0 +1,5 @@ +gradio>=4.0.0 +anthropic>=0.25.0 +pandas>=1.5.0 +python-dotenv>=1.0.0 +httpx==0.27.2 \ No newline at end of file From d08b9ecd292d55b317b77984d9a0163bf227dd22 Mon Sep 17 00:00:00 2001 From: Saurabh Gupta Date: Sat, 11 Oct 2025 16:38:37 -0400 Subject: [PATCH 4/5] Week 5, day 4, Website Summarizer using RAG and Vector Store and OpenAI --- .../day4_RAG_website_summarizer.ipynb | 202 ++++++++++++++++++ 1 file changed, 202 insertions(+) create mode 100644 week5/community-contributions/day4_RAG_website_summarizer.ipynb diff --git a/week5/community-contributions/day4_RAG_website_summarizer.ipynb b/week5/community-contributions/day4_RAG_website_summarizer.ipynb new file mode 100644 index 0000000..0dd3902 --- /dev/null +++ b/week5/community-contributions/day4_RAG_website_summarizer.ipynb @@ -0,0 +1,202 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "6afa6324", + "metadata": {}, + "source": [ + "Website Summarizer using Langchain RecursiveUrlLoader and OpenAI GPT-4o." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cd0aa282", + "metadata": {}, + "outputs": [], + "source": [ + "%pip install -qU langchain-community beautifulsoup4 lxml" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "ff0ba859", + "metadata": {}, + "outputs": [], + "source": [ + "# imports\n", + "\n", + "import os\n", + "import glob\n", + "from dotenv import load_dotenv\n", + "import gradio as gr\n", + "\n", + "# imports for langchain\n", + "\n", + "from langchain.text_splitter import CharacterTextSplitter\n", + "from langchain.schema import Document\n", + "from langchain_openai import OpenAIEmbeddings, ChatOpenAI\n", + "from langchain_chroma import Chroma\n", + "\n", + "from langchain.memory import ConversationBufferMemory\n", + "from langchain.chains import ConversationalRetrievalChain\n", + "\n", + "from langchain_community.document_loaders import RecursiveUrlLoader\n", + "import re\n", + "\n", + "from bs4 import BeautifulSoup\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "e2be45ee", + "metadata": {}, + "outputs": [], + "source": [ + "MODEL = \"gpt-4o\"\n", + "db_name = \"vector_db\"\n", + "\n", + "\n", + "load_dotenv(override=True)\n", + "os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY', 'your-key-if-not-using-env')" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "2cd21d56", + "metadata": {}, + "outputs": [], + "source": [ + "def bs4_extractor(html: str) -> str:\n", + " soup = BeautifulSoup(html, \"lxml\")\n", + " return re.sub(r\"\\n\\n+\", \"\\n\\n\", soup.text).strip()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "c07925ce", + "metadata": {}, + "outputs": [], + "source": [ + "def prepareLLM(website_url):\n", + " loader = RecursiveUrlLoader(website_url, extractor=bs4_extractor)\n", + " docs = loader.load()\n", + " print(f\"Loaded {len(docs)} documents\")\n", + " text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=200)\n", + " chunks = text_splitter.split_documents(docs)\n", + " print(f\"Loaded {len(chunks)} chunks\")\n", + "\n", + " embeddings = OpenAIEmbeddings()\n", + "\n", + " # Delete if already exists\n", + "\n", + " if os.path.exists(db_name):\n", + " Chroma(persist_directory=db_name, embedding_function=embeddings).delete_collection()\n", + "\n", + " # Create vectorstore\n", + "\n", + " vectorstore = Chroma.from_documents(documents=chunks, embedding=embeddings, persist_directory=db_name)\n", + " print(f\"Vectorstore created with {vectorstore._collection.count()} documents\")\n", + "\n", + " # create a new Chat with OpenAI\n", + " llm = ChatOpenAI(temperature=0.7, model_name=MODEL)\n", + "\n", + " # set up the conversation memory for the chat\n", + " memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)\n", + "\n", + " # the retriever is an abstraction over the VectorStore that will be used during RAG\n", + " retriever = vectorstore.as_retriever()\n", + "\n", + " # putting it together: set up the conversation chain with the GPT 4o-mini LLM, the vector store and memory\n", + " conversation_chain = ConversationalRetrievalChain.from_llm(llm=llm, retriever=retriever, memory=memory)\n", + "\n", + " return conversation_chain" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "8cc26a70", + "metadata": {}, + "outputs": [], + "source": [ + "website_global= None\n", + "conversational_chain_global = None" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "809e7afa", + "metadata": {}, + "outputs": [], + "source": [ + "def chat(website,question):\n", + " global website_global\n", + " global conversational_chain_global\n", + " if website_global != website:\n", + " conversation_chain = prepareLLM(website)\n", + " website_global = website\n", + " conversational_chain_global = conversation_chain\n", + " result = conversational_chain_global.invoke({\"question\":question})\n", + " return result['answer']" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "e1e9c0e9", + "metadata": {}, + "outputs": [], + "source": [ + "with gr.Blocks() as ui:\n", + " website = gr.Textbox(label=\"Website URL (Only required for the first submit)\")\n", + " question = gr.Textbox(label=\"Your Question\")\n", + " submit = gr.Button(\"Submit\")\n", + " answer = gr.Textbox(label=\"Response\")\n", + " submit.click(fn=chat, inputs=[website,question], outputs=[answer])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "80ef8c02", + "metadata": {}, + "outputs": [], + "source": [ + "ui.launch()" + ] + }, + { + "cell_type": "markdown", + "id": "fef26a4b", + "metadata": {}, + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.11" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From 2b2772bbc865820cd6bda411db0c6e86cecf5540 Mon Sep 17 00:00:00 2001 From: Saurabh Gupta Date: Sat, 11 Oct 2025 16:42:07 -0400 Subject: [PATCH 5/5] Added Disclaimer --- .../day5_stock_analysis_recommender.ipynb | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/week4/community-contributions/day5_stock_analysis_recommender.ipynb b/week4/community-contributions/day5_stock_analysis_recommender.ipynb index b99098a..4ac209f 100644 --- a/week4/community-contributions/day5_stock_analysis_recommender.ipynb +++ b/week4/community-contributions/day5_stock_analysis_recommender.ipynb @@ -5,7 +5,9 @@ "id": "b65e507f", "metadata": {}, "source": [ - "Stock Analysis Recommender. This solution can be extended with a real-time API for multiple stock symbols. In this example, analysis for only 1 stock IBM is displayed." + "Stock Analysis Recommender. This solution can be extended with a real-time API for multiple stock symbols. In this example, analysis for only 1 stock IBM is displayed.\n", + "\n", + "Disclaimer: This is just a test project. Please dont use it for any commercial use." ] }, {