From de28580c722a8827dac9cafebd5244124a7d159f Mon Sep 17 00:00:00 2001
From: Umar Javed <javedumar507@gmail.com>
Date: Tue, 21 Oct 2025 17:49:37 +0500
Subject: [PATCH] Synthetic data writer

---
 .../w3day5_synthetic_dataset_generator.ipynb  | 540 ++++++++++++++++++
 1 file changed, 540 insertions(+)
 create mode 100644 week3/community-contributions/w3day5_synthetic_dataset_generator.ipynb
diff --git a/week3/community-contributions/w3day5_synthetic_dataset_generator.ipynb b/week3/community-contributions/w3day5_synthetic_dataset_generator.ipynb
new file mode 100644
index 0000000..179db82
--- /dev/null
+++ b/week3/community-contributions/w3day5_synthetic_dataset_generator.ipynb
@@ -0,0 +1,540 @@
+{
+  "cells": [
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "Note: you may need to restart the kernel to use updated packages.\n"
+          ]
+        }
+      ],
+      "source": [
+        "%pip install -q bitsandbytes>=0.43.1 accelerate transformers torch sentencepiece"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 5,
+      "metadata": {},
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "💻 CPU mode - loading without quantization...\n"
+          ]
+        },
+        {
+          "data": {
+            "application/vnd.jupyter.widget-view+json": {
+              "model_id": "2fa644e735144ab0a238f031bf7c6c7a",
+              "version_major": 2,
+              "version_minor": 0
+            },
+            "text/plain": [
+              "model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]"
+            ]
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        },
+        {
+          "data": {
+            "application/vnd.jupyter.widget-view+json": {
+              "model_id": "93cedb68e7374f7f98622d24ee02ba33",
+              "version_major": 2,
+              "version_minor": 0
+            },
+            "text/plain": [
+              "Fetching 4 files:   0%|          | 0/4 [00:00<?, ?it/s]"
+            ]
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        },
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "Llama model failed to load: <ContextVar name='shell_parent' at 0x1061d0220>\n",
+            "Trying alternative loading method...\n"
+          ]
+        },
+        {
+          "data": {
+            "application/vnd.jupyter.widget-view+json": {
+              "model_id": "17d3da1874734c7fbf542b239f6f5ba0",
+              "version_major": 2,
+              "version_minor": 0
+            },
+            "text/plain": [
+              "Fetching 4 files:   0%|          | 0/4 [00:00<?, ?it/s]"
+            ]
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        },
+        {
+          "name": "stderr",
+          "output_type": "stream",
+          "text": [
+            "Exception ignored in: <function tqdm.__del__ at 0x126b14720>\n",
+            "Traceback (most recent call last):\n",
+            "  File \"/opt/miniconda3/lib/python3.13/site-packages/tqdm/std.py\", line 1148, in __del__\n",
+            "    self.close()\n",
+            "  File \"/opt/miniconda3/lib/python3.13/site-packages/tqdm/notebook.py\", line 279, in close\n",
+            "    self.disp(bar_style='danger', check_delay=False)\n",
+            "AttributeError: 'tqdm' object has no attribute 'disp'\n",
+            "Exception ignored in: <function tqdm.__del__ at 0x126b14720>\n",
+            "Traceback (most recent call last):\n",
+            "  File \"/opt/miniconda3/lib/python3.13/site-packages/tqdm/std.py\", line 1148, in __del__\n",
+            "    self.close()\n",
+            "  File \"/opt/miniconda3/lib/python3.13/site-packages/tqdm/notebook.py\", line 279, in close\n",
+            "    self.disp(bar_style='danger', check_delay=False)\n",
+            "AttributeError: 'tqdm' object has no attribute 'disp'\n",
+            "Exception ignored in: <function tqdm.__del__ at 0x126b14720>\n",
+            "Traceback (most recent call last):\n",
+            "  File \"/opt/miniconda3/lib/python3.13/site-packages/tqdm/std.py\", line 1148, in __del__\n",
+            "    self.close()\n",
+            "  File \"/opt/miniconda3/lib/python3.13/site-packages/tqdm/notebook.py\", line 279, in close\n",
+            "    self.disp(bar_style='danger', check_delay=False)\n",
+            "AttributeError: 'tqdm' object has no attribute 'disp'\n",
+            "Exception ignored in: <function tqdm.__del__ at 0x126b14720>\n",
+            "Traceback (most recent call last):\n",
+            "  File \"/opt/miniconda3/lib/python3.13/site-packages/tqdm/std.py\", line 1148, in __del__\n",
+            "    self.close()\n",
+            "  File \"/opt/miniconda3/lib/python3.13/site-packages/tqdm/notebook.py\", line 279, in close\n",
+            "    self.disp(bar_style='danger', check_delay=False)\n",
+            "AttributeError: 'tqdm' object has no attribute 'disp'\n",
+            "Exception ignored in: <function tqdm.__del__ at 0x126b14720>\n",
+            "Traceback (most recent call last):\n",
+            "  File \"/opt/miniconda3/lib/python3.13/site-packages/tqdm/std.py\", line 1148, in __del__\n",
+            "    self.close()\n",
+            "  File \"/opt/miniconda3/lib/python3.13/site-packages/tqdm/notebook.py\", line 279, in close\n",
+            "    self.disp(bar_style='danger', check_delay=False)\n",
+            "AttributeError: 'tqdm' object has no attribute 'disp'\n",
+            "Exception ignored in: <function tqdm.__del__ at 0x126b14720>\n",
+            "Traceback (most recent call last):\n",
+            "  File \"/opt/miniconda3/lib/python3.13/site-packages/tqdm/std.py\", line 1148, in __del__\n",
+            "    self.close()\n",
+            "  File \"/opt/miniconda3/lib/python3.13/site-packages/tqdm/notebook.py\", line 279, in close\n",
+            "    self.disp(bar_style='danger', check_delay=False)\n",
+            "AttributeError: 'tqdm' object has no attribute 'disp'\n",
+            "Exception ignored in: <function tqdm.__del__ at 0x126b14720>\n",
+            "Traceback (most recent call last):\n",
+            "  File \"/opt/miniconda3/lib/python3.13/site-packages/tqdm/std.py\", line 1148, in __del__\n",
+            "    self.close()\n",
+            "  File \"/opt/miniconda3/lib/python3.13/site-packages/tqdm/notebook.py\", line 279, in close\n",
+            "    self.disp(bar_style='danger', check_delay=False)\n",
+            "AttributeError: 'tqdm' object has no attribute 'disp'\n",
+            "Exception ignored in: <function tqdm.__del__ at 0x126b14720>\n",
+            "Traceback (most recent call last):\n",
+            "  File \"/opt/miniconda3/lib/python3.13/site-packages/tqdm/std.py\", line 1148, in __del__\n",
+            "    self.close()\n",
+            "  File \"/opt/miniconda3/lib/python3.13/site-packages/tqdm/notebook.py\", line 279, in close\n",
+            "    self.disp(bar_style='danger', check_delay=False)\n",
+            "AttributeError: 'tqdm' object has no attribute 'disp'\n"
+          ]
+        },
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "Llama model completely failed: <ContextVar name='shell_parent' at 0x1061d0220>\n",
+            "Will use OpenAI only mode.\n"
+          ]
+        }
+      ],
+      "source": [
+        "import torch\n",
+        "import pandas as pd\n",
+        "import random\n",
+        "from io import StringIO\n",
+        "from openai import OpenAI\n",
+        "import gradio as gr\n",
+        "from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig\n",
+        "from dotenv import load_dotenv\n",
+        "import os\n",
+        "\n",
+        "load_dotenv(override=True)\n",
+        "openai = OpenAI()\n",
+        "\n",
+        "LLAMA = \"meta-llama/Meta-Llama-3.1-8B-Instruct\"\n",
+        "device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n",
+        "\n",
+        "try:\n",
+        "    tokenizer = AutoTokenizer.from_pretrained(LLAMA)\n",
+        "    tokenizer.pad_token = tokenizer.eos_token\n",
+        "    \n",
+        "    if torch.cuda.is_available():\n",
+        "        print(\"🚀 CUDA available - loading with quantization...\")\n",
+        "        quant_config = BitsAndBytesConfig(\n",
+        "            load_in_4bit=True,\n",
+        "            bnb_4bit_use_double_quant=True,\n",
+        "            bnb_4bit_compute_dtype=torch.bfloat16,\n",
+        "            bnb_4bit_quant_type=\"nf4\"\n",
+        "        )\n",
+        "        model = AutoModelForCausalLM.from_pretrained(LLAMA, device_map=\"auto\", quantization_config=quant_config)\n",
+        "    else:\n",
+        "        print(\"💻 CPU mode - loading without quantization...\")\n",
+        "        model = AutoModelForCausalLM.from_pretrained(LLAMA, device_map=\"cpu\", torch_dtype=torch.float16)\n",
+        "    \n",
+        "    print(\"Llama model loaded successfully!\")\n",
+        "except Exception as e:\n",
+        "    print(f\"Llama model failed to load: {e}\")\n",
+        "    print(\"Trying alternative loading method...\")\n",
+        "    try:\n",
+        "        tokenizer = AutoTokenizer.from_pretrained(LLAMA)\n",
+        "        tokenizer.pad_token = tokenizer.eos_token\n",
+        "        model = AutoModelForCausalLM.from_pretrained(LLAMA, device_map=\"cpu\", torch_dtype=torch.float32)\n",
+        "        print(\"Llama model loaded in CPU mode!\")\n",
+        "    except Exception as e2:\n",
+        "        print(f\"Llama model completely failed: {e2}\")\n",
+        "        print(\"Will use OpenAI only mode.\")\n",
+        "        model = None\n",
+        "        tokenizer = None\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 7,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "def generate_with_openai(dataset_type, num_records, region):\n",
+        "    prompts = {\n",
+        "        \"employees\": f\"Generate {num_records} synthetic employee records with {region} addresses. Include: employee_id, first_name, last_name, email, phone, department, salary, hire_date, address, city, state, country.\",\n",
+        "        \"customers\": f\"Generate {num_records} synthetic customer records with {region} addresses. Include: customer_id, first_name, last_name, email, phone, company, address, city, state, country, registration_date.\",\n",
+        "        \"products\": f\"Generate {num_records} synthetic product records. Include: product_id, name, category, price, description, brand, stock_quantity, supplier, created_date.\",\n",
+        "        \"transactions\": f\"Generate {num_records} synthetic transaction records. Include: transaction_id, customer_id, product_id, amount, quantity, transaction_date, payment_method, status.\"\n",
+        "    }\n",
+        "    \n",
+        "    response = openai.chat.completions.create(\n",
+        "        model=\"gpt-4o-mini\",\n",
+        "        messages=[\n",
+        "            {\"role\": \"system\", \"content\": \"You are a data generation expert. Create realistic, diverse synthetic data in CSV format.\"},\n",
+        "            {\"role\": \"user\", \"content\": prompts[dataset_type]}\n",
+        "        ]\n",
+        "    )\n",
+        "    \n",
+        "    return clean_csv_response(response.choices[0].message.content)\n",
+        "\n",
+        "def generate_with_llama(dataset_type, num_records, region):\n",
+        "    if model is None or tokenizer is None:\n",
+        "        return \"❌ Llama model not available. Please use OpenAI option.\"\n",
+        "    \n",
+        "    prompts = {\n",
+        "        \"employees\": f\"Create {num_records} employee records with {region} addresses: employee_id, first_name, last_name, email, phone, department, salary, hire_date, address, city, state, country. Format as CSV.\",\n",
+        "        \"customers\": f\"Create {num_records} customer records with {region} addresses: customer_id, first_name, last_name, email, phone, company, address, city, state, country, registration_date. Format as CSV.\",\n",
+        "        \"products\": f\"Create {num_records} product records: product_id, name, category, price, description, brand, stock_quantity, supplier, created_date. Format as CSV.\",\n",
+        "        \"transactions\": f\"Create {num_records} transaction records: transaction_id, customer_id, product_id, amount, quantity, transaction_date, payment_method, status. Format as CSV.\"\n",
+        "    }\n",
+        "    \n",
+        "    try:\n",
+        "        inputs = tokenizer(prompts[dataset_type], return_tensors=\"pt\").to(device)\n",
+        "        \n",
+        "        with torch.no_grad():\n",
+        "            outputs = model.generate(\n",
+        "                **inputs,\n",
+        "                max_new_tokens=2048,\n",
+        "                temperature=0.7,\n",
+        "                do_sample=True,\n",
+        "                pad_token_id=tokenizer.eos_token_id\n",
+        "            )\n",
+        "        \n",
+        "        response = tokenizer.decode(outputs[0], skip_special_tokens=True)\n",
+        "        return clean_csv_response(response)\n",
+        "    except Exception as e:\n",
+        "        return f\"❌ Error generating with Llama: {str(e)}\"\n",
+        "\n",
+        "def clean_csv_response(response):\n",
+        "    response = response.strip()\n",
+        "    if \"```\" in response:\n",
+        "        response = response.split(\"```\")[1] if len(response.split(\"```\")) > 1 else response\n",
+        "    return response\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 8,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "def generate_dataset(dataset_type, num_records, region, model_choice):\n",
+        "    try:\n",
+        "        if model_choice == \"OpenAI GPT-4o-mini\":\n",
+        "            csv_data = generate_with_openai(dataset_type, num_records, region)\n",
+        "        else:\n",
+        "            csv_data = generate_with_llama(dataset_type, num_records, region)\n",
+        "        \n",
+        "        df = pd.read_csv(StringIO(csv_data))\n",
+        "        return df, csv_data, f\"✅ Generated {len(df)} records successfully!\"\n",
+        "    except Exception as e:\n",
+        "        return pd.DataFrame(), \"\", f\"❌ Error: {str(e)}\"\n",
+        "\n",
+        "def download_csv(csv_data):\n",
+        "    return csv_data if csv_data else \"\"\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "* Running on local URL:  http://127.0.0.1:7863\n",
+            "* Running on public URL: https://aaf0c65f7daaafbd21.gradio.live\n",
+            "\n",
+            "This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)\n"
+          ]
+        },
+        {
+          "data": {
+            "text/html": [
+              "<div><iframe src=\"https://aaf0c65f7daaafbd21.gradio.live\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
+            ],
+            "text/plain": [
+              "<IPython.core.display.HTML object>"
+            ]
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        },
+        {
+          "data": {
+            "text/plain": []
+          },
+          "execution_count": 13,
+          "metadata": {},
+          "output_type": "execute_result"
+        },
+        {
+          "name": "stderr",
+          "output_type": "stream",
+          "text": [
+            "Traceback (most recent call last):\n",
+            "  File \"/opt/miniconda3/lib/python3.13/site-packages/gradio/queueing.py\", line 759, in process_events\n",
+            "    response = await route_utils.call_process_api(\n",
+            "               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
+            "    ...<5 lines>...\n",
+            "    )\n",
+            "    ^\n",
+            "  File \"/opt/miniconda3/lib/python3.13/site-packages/gradio/route_utils.py\", line 354, in call_process_api\n",
+            "    output = await app.get_blocks().process_api(\n",
+            "             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
+            "    ...<11 lines>...\n",
+            "    )\n",
+            "    ^\n",
+            "  File \"/opt/miniconda3/lib/python3.13/site-packages/gradio/blocks.py\", line 2127, in process_api\n",
+            "    data = await self.postprocess_data(block_fn, result[\"prediction\"], state)\n",
+            "           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
+            "  File \"/opt/miniconda3/lib/python3.13/site-packages/gradio/blocks.py\", line 1910, in postprocess_data\n",
+            "    await processing_utils.async_move_files_to_cache(\n",
+            "    ...<3 lines>...\n",
+            "    )\n",
+            "  File \"/opt/miniconda3/lib/python3.13/site-packages/gradio/processing_utils.py\", line 594, in async_move_files_to_cache\n",
+            "    return await client_utils.async_traverse(\n",
+            "           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
+            "        data, _move_to_cache, client_utils.is_file_obj_with_meta\n",
+            "        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
+            "    )\n",
+            "    ^\n",
+            "  File \"/opt/miniconda3/lib/python3.13/site-packages/gradio_client/utils.py\", line 1197, in async_traverse\n",
+            "    return await func(json_obj)\n",
+            "           ^^^^^^^^^^^^^^^^^^^^\n",
+            "  File \"/opt/miniconda3/lib/python3.13/site-packages/gradio/processing_utils.py\", line 560, in _move_to_cache\n",
+            "    elif utils.is_static_file(payload):\n",
+            "         ~~~~~~~~~~~~~~~~~~~~^^^^^^^^^\n",
+            "  File \"/opt/miniconda3/lib/python3.13/site-packages/gradio/utils.py\", line 1191, in is_static_file\n",
+            "    return _is_static_file(file_path, _StaticFiles.all_paths)\n",
+            "  File \"/opt/miniconda3/lib/python3.13/site-packages/gradio/utils.py\", line 1204, in _is_static_file\n",
+            "    if not file_path.exists():\n",
+            "           ~~~~~~~~~~~~~~~~^^\n",
+            "  File \"/opt/miniconda3/lib/python3.13/pathlib/_abc.py\", line 450, in exists\n",
+            "    self.stat(follow_symlinks=follow_symlinks)\n",
+            "    ~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
+            "  File \"/opt/miniconda3/lib/python3.13/pathlib/_local.py\", line 515, in stat\n",
+            "    return os.stat(self, follow_symlinks=follow_symlinks)\n",
+            "           ~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
+            "OSError: [Errno 63] File name too long: 'csv\\ntransaction_id,customer_id,product_id,amount,quantity,transaction_date,payment_method,status\\n1,CUST001,PROD1001,29.99,1,2023-01-15,Credit Card,Completed\\n2,CUST002,PROD1002,15.49,2,2023-01-18,Debit Card,Completed\\n3,CUST003,PROD1003,65.00,1,2023-02-01,PayPal,Pending\\n4,CUST001,PROD1004,10.99,3,2023-02-10,Credit Card,Completed\\n5,CUST004,PROD1005,45.50,1,2023-02-20,Cash,Completed\\n6,CUST005,PROD1006,89.99,1,2023-03-02,Debit Card,Completed\\n7,CUST002,PROD1007,24.99,2,2023-03-14,Credit Card,Cancelled\\n8,CUST003,PROD1008,12.50,4,2023-03-20,PayPal,Completed\\n9,CUST006,PROD1009,150.00,1,2023-04-01,Credit Card,Completed\\n10,CUST007,PROD1010,30.00,2,2023-04-10,Debit Card,Pending\\n11,CUST008,PROD1011,5.99,10,2023-04-12,Cash,Completed\\n12,CUST001,PROD1012,70.00,1,2023-05-05,Credit Card,Completed\\n13,CUST009,PROD1013,100.00,1,2023-05-15,PayPal,Completed\\n14,CUST004,PROD1014,45.00,1,2023-05-25,Credit Card,Returned\\n15,CUST002,PROD1015,7.50,5,2023-06-10,Debit Card,Completed\\n16,CUST005,PROD1016,22.00,3,2023-06-12,Cash,Completed\\n17,CUST006,PROD1017,120.00,1,2023-06-20,Credit Card,Pending\\n18,CUST008,PROD1018,80.00,1,2023-07-01,PayPal,Completed\\n19,CUST007,PROD1019,60.00,2,2023-07-05,Credit Card,Completed\\n20,CUST003,PROD1020,15.00,3,2023-07-15,Debit Card,Completed\\n'\n",
+            "Traceback (most recent call last):\n",
+            "  File \"/opt/miniconda3/lib/python3.13/site-packages/gradio/queueing.py\", line 759, in process_events\n",
+            "    response = await route_utils.call_process_api(\n",
+            "               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
+            "    ...<5 lines>...\n",
+            "    )\n",
+            "    ^\n",
+            "  File \"/opt/miniconda3/lib/python3.13/site-packages/gradio/route_utils.py\", line 354, in call_process_api\n",
+            "    output = await app.get_blocks().process_api(\n",
+            "             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
+            "    ...<11 lines>...\n",
+            "    )\n",
+            "    ^\n",
+            "  File \"/opt/miniconda3/lib/python3.13/site-packages/gradio/blocks.py\", line 2127, in process_api\n",
+            "    data = await self.postprocess_data(block_fn, result[\"prediction\"], state)\n",
+            "           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
+            "  File \"/opt/miniconda3/lib/python3.13/site-packages/gradio/blocks.py\", line 1910, in postprocess_data\n",
+            "    await processing_utils.async_move_files_to_cache(\n",
+            "    ...<3 lines>...\n",
+            "    )\n",
+            "  File \"/opt/miniconda3/lib/python3.13/site-packages/gradio/processing_utils.py\", line 594, in async_move_files_to_cache\n",
+            "    return await client_utils.async_traverse(\n",
+            "           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
+            "        data, _move_to_cache, client_utils.is_file_obj_with_meta\n",
+            "        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
+            "    )\n",
+            "    ^\n",
+            "  File \"/opt/miniconda3/lib/python3.13/site-packages/gradio_client/utils.py\", line 1197, in async_traverse\n",
+            "    return await func(json_obj)\n",
+            "           ^^^^^^^^^^^^^^^^^^^^\n",
+            "  File \"/opt/miniconda3/lib/python3.13/site-packages/gradio/processing_utils.py\", line 560, in _move_to_cache\n",
+            "    elif utils.is_static_file(payload):\n",
+            "         ~~~~~~~~~~~~~~~~~~~~^^^^^^^^^\n",
+            "  File \"/opt/miniconda3/lib/python3.13/site-packages/gradio/utils.py\", line 1191, in is_static_file\n",
+            "    return _is_static_file(file_path, _StaticFiles.all_paths)\n",
+            "  File \"/opt/miniconda3/lib/python3.13/site-packages/gradio/utils.py\", line 1204, in _is_static_file\n",
+            "    if not file_path.exists():\n",
+            "           ~~~~~~~~~~~~~~~~^^\n",
+            "  File \"/opt/miniconda3/lib/python3.13/pathlib/_abc.py\", line 450, in exists\n",
+            "    self.stat(follow_symlinks=follow_symlinks)\n",
+            "    ~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
+            "  File \"/opt/miniconda3/lib/python3.13/pathlib/_local.py\", line 515, in stat\n",
+            "    return os.stat(self, follow_symlinks=follow_symlinks)\n",
+            "           ~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
+            "OSError: [Errno 63] File name too long: 'csv\\nproduct_id,name,category,price,description,brand,stock_quantity,supplier,created_date\\nP001,Wireless Earbuds,Electronics,79.99,\"Noise-cancelling wireless earbuds with touch controls.\",\"SoundWave\",250,\"TechSupply Co.\",2023-08-15\\nP002,Men\\'s Running Shoes,Sportswear,89.99,\"Lightweight and breathable running shoes designed for comfort.\",\"FitRun\",150,\"SportyDeals\",2023-09-05\\nP003,4K Ultra HD TV,Electronics,499.99,\"55-inch 4K Ultra HD Smart LED TV with HDR.\",\"VisionMax\",80,\"HomeTech Distributors\",2023-08-20\\nP004,Coffee Maker,Home Appliances,49.99,\"Programmable coffee maker with 12-cup capacity.\",\"BrewMaster\",200,\"Kitchen Supply Inc.\",2023-07-30\\nP005,Water Bottle,Sports Equipment,19.99,\"Insulated stainless steel water bottle, keeps drinks cold for 24 hours.\",\"HydroCool\",500,\"EcoBottles\",2023-09-10\\nP006,Ergonomic Office Chair,Furniture,199.99,\"Comfortable ergonomic chair with lumbar support and adjustable height.\",\"Home Comforts\",75,\"OfficeWorks\",2023-08-28\\nP007,Smart Watch,Electronics,249.99,\"Smart watch with fitness tracking and heart rate monitor.\",\"FitTrack\",120,\"GizmoGadgets\",2023-09-12\\nP008,Yoga Mat,Sports Equipment,29.99,\"Non-slip yoga mat with extra cushioning.\",\"Zen Yoga\",350,\"Wellness Store\",2023-09-15\\nP009,Air Fryer,Home Appliances,89.99,\"Compact air fryer with multiple cooking presets.\",\"CrispyCook\",145,\"KitchenPro\",2023-08-02\\nP010,Wireless Mouse,Electronics,29.99,\"Ergonomic wireless mouse with customizable buttons.\",\"ClickTech\",300,\"Gadget World\",2023-07-25\\nP011,Spice Rack Organization Set,Home Decor,39.99,\"Rotating spice rack with 12 glass jars included.\",\"HomeChef\",210,\"OrganizeIt Co.\",2023-08-17\\nP012,Dumbbell Set,Sports Equipment,99.99,\"Adjustable dumbbell set ranging from 5 to 30 lbs.\",\"StrengthTech\",100,\"Fit Equipment\",2023-09-01\\nP013,Kids\\' Backpack,Accessories,34.99,\"Durable backpack with multiple compartments for school.\",\"KidStyle\",175,\"Backpack Haven\",2023-08-23\\nP014,Digital Camera,Electronics,399.99,\"Compact digital camera with 20 MP and full HD video.\",\"SnapShot\",60,\"Camera Boutique\",2023-09-09\\nP015,Portable Bluetooth Speaker,Electronics,59.99,\"Water-resistant Bluetooth speaker with 12 hours of playtime.\",\"SoundBox\",130,\"Audio Plus\",2023-09-14\\nP016,Electric Toothbrush,Health & Personal Care,59.99,\"Rechargeable electric toothbrush with timer and pressure sensor.\",\"DentalCare\",400,\"HealthFirst Supplies\",2023-08-30\\nP017,Tote Bag,Accessories,24.99,\"Stylish and spacious tote bag for everyday use.\",\"Chic Designs\",300,\"Fashion Hub\",2023-09-06\\nP018,Sneaker Cleaner Kit,Accessories,15.99,\"Complete shoe cleaning kit for all types of sneakers.\",\"FreshFeet\",500,\"CleanKicks\",2023-09-03\\nP019,Camping Tent,Outdoor,129.99,\"Easy setup camping tent for 4 people, weather-resistant.\",\"Outdoors Pro\",85,\"Adventure Outfitters\",2023-08-12\\nP020,LED Desk Lamp,Home Decor,39.99,\"Adjustable LED desk lamp with multiple brightness settings.\",\"BrightEase\",170,\"HomeLight Solutions\",2023-09-08\\n'\n"
+          ]
+        }
+      ],
+      "source": [
+        "with gr.Blocks(\n",
+        "    theme=gr.themes.Soft(\n",
+        "        primary_hue=\"blue\",\n",
+        "        neutral_hue=\"gray\",\n",
+        "        font=[\"Inter\", \"ui-sans-serif\", \"system-ui\"]\n",
+        "    ),\n",
+        "    css=\"\"\"\n",
+        "    .gradio-container { max-width: 1200px !important; margin: auto !important; }\n",
+        "    .header { text-align: center; margin-bottom: 2em; }\n",
+        "    .header h1 { color: #1f2937; font-size: 2.5em; margin-bottom: 0.5em; }\n",
+        "    .header p { color: #6b7280; font-size: 1.1em; }\n",
+        "    .generate-btn { background: linear-gradient(135deg, #3b82f6 0%, #1d4ed8 100%) !important; }\n",
+        "    .generate-btn:hover { transform: translateY(-2px) !important; box-shadow: 0 8px 25px rgba(59, 130, 246, 0.3) !important; }\n",
+        "    .stats-card { background: linear-gradient(135deg, #f8fafc 0%, #e2e8f0 100%); border-radius: 12px; padding: 1.5em; margin: 1em 0; }\n",
+        "    \"\"\"\n",
+        ") as demo:\n",
+        "    \n",
+        "    gr.HTML(\"\"\"\n",
+        "    <div class=\"header\">\n",
+        "        <h1>Synthetic Dataset Generator</h1>\n",
+        "        <p>Generate realistic synthetic datasets using AI models for testing and development</p>\n",
+        "    </div>\n",
+        "    \"\"\")\n",
+        "    \n",
+        "    with gr.Row():\n",
+        "        with gr.Column(scale=1):\n",
+        "            gr.Markdown(\"### Configuration\")\n",
+        "            \n",
+        "            dataset_type = gr.Dropdown(\n",
+        "                choices=[\"employees\", \"customers\", \"products\", \"transactions\"],\n",
+        "                value=\"employees\",\n",
+        "                label=\"Dataset Type\",\n",
+        "                info=\"Choose the type of data to generate\"\n",
+        "            )\n",
+        "            \n",
+        "            num_records = gr.Slider(\n",
+        "                minimum=5, maximum=100, step=5, value=20,\n",
+        "                label=\"Number of Records\",\n",
+        "                info=\"How many records to generate\"\n",
+        "            )\n",
+        "            \n",
+        "            region = gr.Dropdown(\n",
+        "                choices=[\"US Only\", \"International\", \"Mixed\", \"Europe\", \"Asia\"],\n",
+        "                value=\"US Only\",\n",
+        "                label=\"Geographic Region\",\n",
+        "                info=\"Location for addresses and phone numbers\"\n",
+        "            )\n",
+        "            \n",
+        "            model_choice = gr.Radio(\n",
+        "                choices=[\"OpenAI GPT-4o-mini\", \"Llama 3.1 8B\"],\n",
+        "                value=\"OpenAI GPT-4o-mini\",\n",
+        "                label=\"AI Model\",\n",
+        "                info=\"Choose the AI model for generation\"\n",
+        "            )\n",
+        "            \n",
+        "            generate_btn = gr.Button(\n",
+        "                \"Generate Dataset\",\n",
+        "                variant=\"primary\",\n",
+        "                elem_classes=\"generate-btn\",\n",
+        "                size=\"lg\"\n",
+        "            )\n",
+        "        \n",
+        "        with gr.Column(scale=2):\n",
+        "            gr.Markdown(\"### Generated Dataset\")\n",
+        "            \n",
+        "            status = gr.Markdown(\"Ready to generate your dataset!\")\n",
+        "            \n",
+        "            dataframe_output = gr.Dataframe(\n",
+        "                value=pd.DataFrame(),\n",
+        "                label=\"Dataset Preview\",\n",
+        "                wrap=True\n",
+        "            )\n",
+        "            \n",
+        "            with gr.Row():\n",
+        "                csv_output = gr.Textbox(\n",
+        "                    value=\"\",\n",
+        "                    label=\"CSV Data\",\n",
+        "                    lines=10,\n",
+        "                    max_lines=15\n",
+        "                )\n",
+        "                \n",
+        "                download_btn = gr.DownloadButton(\n",
+        "                    \"Download CSV\",\n",
+        "                    elem_id=\"download-btn\"\n",
+        "                )\n",
+        "    \n",
+        "    generate_btn.click(\n",
+        "        generate_dataset,\n",
+        "        inputs=[dataset_type, num_records, region, model_choice],\n",
+        "        outputs=[dataframe_output, csv_output, status]\n",
+        "    )\n",
+        "    \n",
+        "    csv_output.change(\n",
+        "        download_csv,\n",
+        "        inputs=[csv_output],\n",
+        "        outputs=[download_btn]\n",
+        "    )\n",
+        "\n",
+        "demo.launch(share=True, inbrowser=True)\n"
+      ]
+    }
+  ],
+  "metadata": {
+    "kernelspec": {
+      "display_name": "base",
+      "language": "python",
+      "name": "python3"
+    },
+    "language_info": {
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.13.5"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 2
+}