{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Note: you may need to restart the kernel to use updated packages.\n" ] } ], "source": [ "%pip install -q bitsandbytes>=0.43.1 accelerate transformers torch sentencepiece" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "💻 CPU mode - loading without quantization...\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "2fa644e735144ab0a238f031bf7c6c7a", "version_major": 2, "version_minor": 0 }, "text/plain": [ "model.safetensors.index.json: 0%| | 0.00/23.9k [00:00\n", "Trying alternative loading method...\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "17d3da1874734c7fbf542b239f6f5ba0", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Fetching 4 files: 0%| | 0/4 [00:00\n", "Traceback (most recent call last):\n", " File \"/opt/miniconda3/lib/python3.13/site-packages/tqdm/std.py\", line 1148, in __del__\n", " self.close()\n", " File \"/opt/miniconda3/lib/python3.13/site-packages/tqdm/notebook.py\", line 279, in close\n", " self.disp(bar_style='danger', check_delay=False)\n", "AttributeError: 'tqdm' object has no attribute 'disp'\n", "Exception ignored in: \n", "Traceback (most recent call last):\n", " File \"/opt/miniconda3/lib/python3.13/site-packages/tqdm/std.py\", line 1148, in __del__\n", " self.close()\n", " File \"/opt/miniconda3/lib/python3.13/site-packages/tqdm/notebook.py\", line 279, in close\n", " self.disp(bar_style='danger', check_delay=False)\n", "AttributeError: 'tqdm' object has no attribute 'disp'\n", "Exception ignored in: \n", "Traceback (most recent call last):\n", " File \"/opt/miniconda3/lib/python3.13/site-packages/tqdm/std.py\", line 1148, in __del__\n", " self.close()\n", " File \"/opt/miniconda3/lib/python3.13/site-packages/tqdm/notebook.py\", line 279, in close\n", " self.disp(bar_style='danger', check_delay=False)\n", "AttributeError: 'tqdm' object has no attribute 'disp'\n", "Exception ignored in: \n", "Traceback (most recent call last):\n", " File \"/opt/miniconda3/lib/python3.13/site-packages/tqdm/std.py\", line 1148, in __del__\n", " self.close()\n", " File \"/opt/miniconda3/lib/python3.13/site-packages/tqdm/notebook.py\", line 279, in close\n", " self.disp(bar_style='danger', check_delay=False)\n", "AttributeError: 'tqdm' object has no attribute 'disp'\n", "Exception ignored in: \n", "Traceback (most recent call last):\n", " File \"/opt/miniconda3/lib/python3.13/site-packages/tqdm/std.py\", line 1148, in __del__\n", " self.close()\n", " File \"/opt/miniconda3/lib/python3.13/site-packages/tqdm/notebook.py\", line 279, in close\n", " self.disp(bar_style='danger', check_delay=False)\n", "AttributeError: 'tqdm' object has no attribute 'disp'\n", "Exception ignored in: \n", "Traceback (most recent call last):\n", " File \"/opt/miniconda3/lib/python3.13/site-packages/tqdm/std.py\", line 1148, in __del__\n", " self.close()\n", " File \"/opt/miniconda3/lib/python3.13/site-packages/tqdm/notebook.py\", line 279, in close\n", " self.disp(bar_style='danger', check_delay=False)\n", "AttributeError: 'tqdm' object has no attribute 'disp'\n", "Exception ignored in: \n", "Traceback (most recent call last):\n", " File \"/opt/miniconda3/lib/python3.13/site-packages/tqdm/std.py\", line 1148, in __del__\n", " self.close()\n", " File \"/opt/miniconda3/lib/python3.13/site-packages/tqdm/notebook.py\", line 279, in close\n", " self.disp(bar_style='danger', check_delay=False)\n", "AttributeError: 'tqdm' object has no attribute 'disp'\n", "Exception ignored in: \n", "Traceback (most recent call last):\n", " File \"/opt/miniconda3/lib/python3.13/site-packages/tqdm/std.py\", line 1148, in __del__\n", " self.close()\n", " File \"/opt/miniconda3/lib/python3.13/site-packages/tqdm/notebook.py\", line 279, in close\n", " self.disp(bar_style='danger', check_delay=False)\n", "AttributeError: 'tqdm' object has no attribute 'disp'\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Llama model completely failed: \n", "Will use OpenAI only mode.\n" ] } ], "source": [ "import torch\n", "import pandas as pd\n", "import random\n", "from io import StringIO\n", "from openai import OpenAI\n", "import gradio as gr\n", "from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig\n", "from dotenv import load_dotenv\n", "import os\n", "\n", "load_dotenv(override=True)\n", "openai = OpenAI()\n", "\n", "LLAMA = \"meta-llama/Meta-Llama-3.1-8B-Instruct\"\n", "device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n", "\n", "try:\n", " tokenizer = AutoTokenizer.from_pretrained(LLAMA)\n", " tokenizer.pad_token = tokenizer.eos_token\n", " \n", " if torch.cuda.is_available():\n", " print(\"🚀 CUDA available - loading with quantization...\")\n", " quant_config = BitsAndBytesConfig(\n", " load_in_4bit=True,\n", " bnb_4bit_use_double_quant=True,\n", " bnb_4bit_compute_dtype=torch.bfloat16,\n", " bnb_4bit_quant_type=\"nf4\"\n", " )\n", " model = AutoModelForCausalLM.from_pretrained(LLAMA, device_map=\"auto\", quantization_config=quant_config)\n", " else:\n", " print(\"💻 CPU mode - loading without quantization...\")\n", " model = AutoModelForCausalLM.from_pretrained(LLAMA, device_map=\"cpu\", torch_dtype=torch.float16)\n", " \n", " print(\"Llama model loaded successfully!\")\n", "except Exception as e:\n", " print(f\"Llama model failed to load: {e}\")\n", " print(\"Trying alternative loading method...\")\n", " try:\n", " tokenizer = AutoTokenizer.from_pretrained(LLAMA)\n", " tokenizer.pad_token = tokenizer.eos_token\n", " model = AutoModelForCausalLM.from_pretrained(LLAMA, device_map=\"cpu\", torch_dtype=torch.float32)\n", " print(\"Llama model loaded in CPU mode!\")\n", " except Exception as e2:\n", " print(f\"Llama model completely failed: {e2}\")\n", " print(\"Will use OpenAI only mode.\")\n", " model = None\n", " tokenizer = None\n" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "def generate_with_openai(dataset_type, num_records, region):\n", " prompts = {\n", " \"employees\": f\"Generate {num_records} synthetic employee records with {region} addresses. Include: employee_id, first_name, last_name, email, phone, department, salary, hire_date, address, city, state, country.\",\n", " \"customers\": f\"Generate {num_records} synthetic customer records with {region} addresses. Include: customer_id, first_name, last_name, email, phone, company, address, city, state, country, registration_date.\",\n", " \"products\": f\"Generate {num_records} synthetic product records. Include: product_id, name, category, price, description, brand, stock_quantity, supplier, created_date.\",\n", " \"transactions\": f\"Generate {num_records} synthetic transaction records. Include: transaction_id, customer_id, product_id, amount, quantity, transaction_date, payment_method, status.\"\n", " }\n", " \n", " response = openai.chat.completions.create(\n", " model=\"gpt-4o-mini\",\n", " messages=[\n", " {\"role\": \"system\", \"content\": \"You are a data generation expert. Create realistic, diverse synthetic data in CSV format.\"},\n", " {\"role\": \"user\", \"content\": prompts[dataset_type]}\n", " ]\n", " )\n", " \n", " return clean_csv_response(response.choices[0].message.content)\n", "\n", "def generate_with_llama(dataset_type, num_records, region):\n", " if model is None or tokenizer is None:\n", " return \"❌ Llama model not available. Please use OpenAI option.\"\n", " \n", " prompts = {\n", " \"employees\": f\"Create {num_records} employee records with {region} addresses: employee_id, first_name, last_name, email, phone, department, salary, hire_date, address, city, state, country. Format as CSV.\",\n", " \"customers\": f\"Create {num_records} customer records with {region} addresses: customer_id, first_name, last_name, email, phone, company, address, city, state, country, registration_date. Format as CSV.\",\n", " \"products\": f\"Create {num_records} product records: product_id, name, category, price, description, brand, stock_quantity, supplier, created_date. Format as CSV.\",\n", " \"transactions\": f\"Create {num_records} transaction records: transaction_id, customer_id, product_id, amount, quantity, transaction_date, payment_method, status. Format as CSV.\"\n", " }\n", " \n", " try:\n", " inputs = tokenizer(prompts[dataset_type], return_tensors=\"pt\").to(device)\n", " \n", " with torch.no_grad():\n", " outputs = model.generate(\n", " **inputs,\n", " max_new_tokens=2048,\n", " temperature=0.7,\n", " do_sample=True,\n", " pad_token_id=tokenizer.eos_token_id\n", " )\n", " \n", " response = tokenizer.decode(outputs[0], skip_special_tokens=True)\n", " return clean_csv_response(response)\n", " except Exception as e:\n", " return f\"❌ Error generating with Llama: {str(e)}\"\n", "\n", "def clean_csv_response(response):\n", " response = response.strip()\n", " if \"```\" in response:\n", " response = response.split(\"```\")[1] if len(response.split(\"```\")) > 1 else response\n", " return response\n" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "def generate_dataset(dataset_type, num_records, region, model_choice):\n", " try:\n", " if model_choice == \"OpenAI GPT-4o-mini\":\n", " csv_data = generate_with_openai(dataset_type, num_records, region)\n", " else:\n", " csv_data = generate_with_llama(dataset_type, num_records, region)\n", " \n", " df = pd.read_csv(StringIO(csv_data))\n", " return df, csv_data, f\"✅ Generated {len(df)} records successfully!\"\n", " except Exception as e:\n", " return pd.DataFrame(), \"\", f\"❌ Error: {str(e)}\"\n", "\n", "def download_csv(csv_data):\n", " return csv_data if csv_data else \"\"\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "* Running on local URL: http://127.0.0.1:7863\n", "* Running on public URL: https://aaf0c65f7daaafbd21.gradio.live\n", "\n", "This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)\n" ] }, { "data": { "text/html": [ "
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" }, { "name": "stderr", "output_type": "stream", "text": [ "Traceback (most recent call last):\n", " File \"/opt/miniconda3/lib/python3.13/site-packages/gradio/queueing.py\", line 759, in process_events\n", " response = await route_utils.call_process_api(\n", " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", " ...<5 lines>...\n", " )\n", " ^\n", " File \"/opt/miniconda3/lib/python3.13/site-packages/gradio/route_utils.py\", line 354, in call_process_api\n", " output = await app.get_blocks().process_api(\n", " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", " ...<11 lines>...\n", " )\n", " ^\n", " File \"/opt/miniconda3/lib/python3.13/site-packages/gradio/blocks.py\", line 2127, in process_api\n", " data = await self.postprocess_data(block_fn, result[\"prediction\"], state)\n", " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", " File \"/opt/miniconda3/lib/python3.13/site-packages/gradio/blocks.py\", line 1910, in postprocess_data\n", " await processing_utils.async_move_files_to_cache(\n", " ...<3 lines>...\n", " )\n", " File \"/opt/miniconda3/lib/python3.13/site-packages/gradio/processing_utils.py\", line 594, in async_move_files_to_cache\n", " return await client_utils.async_traverse(\n", " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", " data, _move_to_cache, client_utils.is_file_obj_with_meta\n", " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", " )\n", " ^\n", " File \"/opt/miniconda3/lib/python3.13/site-packages/gradio_client/utils.py\", line 1197, in async_traverse\n", " return await func(json_obj)\n", " ^^^^^^^^^^^^^^^^^^^^\n", " File \"/opt/miniconda3/lib/python3.13/site-packages/gradio/processing_utils.py\", line 560, in _move_to_cache\n", " elif utils.is_static_file(payload):\n", " ~~~~~~~~~~~~~~~~~~~~^^^^^^^^^\n", " File \"/opt/miniconda3/lib/python3.13/site-packages/gradio/utils.py\", line 1191, in is_static_file\n", " return _is_static_file(file_path, _StaticFiles.all_paths)\n", " File \"/opt/miniconda3/lib/python3.13/site-packages/gradio/utils.py\", line 1204, in _is_static_file\n", " if not file_path.exists():\n", " ~~~~~~~~~~~~~~~~^^\n", " File \"/opt/miniconda3/lib/python3.13/pathlib/_abc.py\", line 450, in exists\n", " self.stat(follow_symlinks=follow_symlinks)\n", " ~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", " File \"/opt/miniconda3/lib/python3.13/pathlib/_local.py\", line 515, in stat\n", " return os.stat(self, follow_symlinks=follow_symlinks)\n", " ~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", "OSError: [Errno 63] File name too long: 'csv\\ntransaction_id,customer_id,product_id,amount,quantity,transaction_date,payment_method,status\\n1,CUST001,PROD1001,29.99,1,2023-01-15,Credit Card,Completed\\n2,CUST002,PROD1002,15.49,2,2023-01-18,Debit Card,Completed\\n3,CUST003,PROD1003,65.00,1,2023-02-01,PayPal,Pending\\n4,CUST001,PROD1004,10.99,3,2023-02-10,Credit Card,Completed\\n5,CUST004,PROD1005,45.50,1,2023-02-20,Cash,Completed\\n6,CUST005,PROD1006,89.99,1,2023-03-02,Debit Card,Completed\\n7,CUST002,PROD1007,24.99,2,2023-03-14,Credit Card,Cancelled\\n8,CUST003,PROD1008,12.50,4,2023-03-20,PayPal,Completed\\n9,CUST006,PROD1009,150.00,1,2023-04-01,Credit Card,Completed\\n10,CUST007,PROD1010,30.00,2,2023-04-10,Debit Card,Pending\\n11,CUST008,PROD1011,5.99,10,2023-04-12,Cash,Completed\\n12,CUST001,PROD1012,70.00,1,2023-05-05,Credit Card,Completed\\n13,CUST009,PROD1013,100.00,1,2023-05-15,PayPal,Completed\\n14,CUST004,PROD1014,45.00,1,2023-05-25,Credit Card,Returned\\n15,CUST002,PROD1015,7.50,5,2023-06-10,Debit Card,Completed\\n16,CUST005,PROD1016,22.00,3,2023-06-12,Cash,Completed\\n17,CUST006,PROD1017,120.00,1,2023-06-20,Credit Card,Pending\\n18,CUST008,PROD1018,80.00,1,2023-07-01,PayPal,Completed\\n19,CUST007,PROD1019,60.00,2,2023-07-05,Credit Card,Completed\\n20,CUST003,PROD1020,15.00,3,2023-07-15,Debit Card,Completed\\n'\n", "Traceback (most recent call last):\n", " File \"/opt/miniconda3/lib/python3.13/site-packages/gradio/queueing.py\", line 759, in process_events\n", " response = await route_utils.call_process_api(\n", " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", " ...<5 lines>...\n", " )\n", " ^\n", " File \"/opt/miniconda3/lib/python3.13/site-packages/gradio/route_utils.py\", line 354, in call_process_api\n", " output = await app.get_blocks().process_api(\n", " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", " ...<11 lines>...\n", " )\n", " ^\n", " File \"/opt/miniconda3/lib/python3.13/site-packages/gradio/blocks.py\", line 2127, in process_api\n", " data = await self.postprocess_data(block_fn, result[\"prediction\"], state)\n", " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", " File \"/opt/miniconda3/lib/python3.13/site-packages/gradio/blocks.py\", line 1910, in postprocess_data\n", " await processing_utils.async_move_files_to_cache(\n", " ...<3 lines>...\n", " )\n", " File \"/opt/miniconda3/lib/python3.13/site-packages/gradio/processing_utils.py\", line 594, in async_move_files_to_cache\n", " return await client_utils.async_traverse(\n", " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", " data, _move_to_cache, client_utils.is_file_obj_with_meta\n", " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", " )\n", " ^\n", " File \"/opt/miniconda3/lib/python3.13/site-packages/gradio_client/utils.py\", line 1197, in async_traverse\n", " return await func(json_obj)\n", " ^^^^^^^^^^^^^^^^^^^^\n", " File \"/opt/miniconda3/lib/python3.13/site-packages/gradio/processing_utils.py\", line 560, in _move_to_cache\n", " elif utils.is_static_file(payload):\n", " ~~~~~~~~~~~~~~~~~~~~^^^^^^^^^\n", " File \"/opt/miniconda3/lib/python3.13/site-packages/gradio/utils.py\", line 1191, in is_static_file\n", " return _is_static_file(file_path, _StaticFiles.all_paths)\n", " File \"/opt/miniconda3/lib/python3.13/site-packages/gradio/utils.py\", line 1204, in _is_static_file\n", " if not file_path.exists():\n", " ~~~~~~~~~~~~~~~~^^\n", " File \"/opt/miniconda3/lib/python3.13/pathlib/_abc.py\", line 450, in exists\n", " self.stat(follow_symlinks=follow_symlinks)\n", " ~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", " File \"/opt/miniconda3/lib/python3.13/pathlib/_local.py\", line 515, in stat\n", " return os.stat(self, follow_symlinks=follow_symlinks)\n", " ~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", "OSError: [Errno 63] File name too long: 'csv\\nproduct_id,name,category,price,description,brand,stock_quantity,supplier,created_date\\nP001,Wireless Earbuds,Electronics,79.99,\"Noise-cancelling wireless earbuds with touch controls.\",\"SoundWave\",250,\"TechSupply Co.\",2023-08-15\\nP002,Men\\'s Running Shoes,Sportswear,89.99,\"Lightweight and breathable running shoes designed for comfort.\",\"FitRun\",150,\"SportyDeals\",2023-09-05\\nP003,4K Ultra HD TV,Electronics,499.99,\"55-inch 4K Ultra HD Smart LED TV with HDR.\",\"VisionMax\",80,\"HomeTech Distributors\",2023-08-20\\nP004,Coffee Maker,Home Appliances,49.99,\"Programmable coffee maker with 12-cup capacity.\",\"BrewMaster\",200,\"Kitchen Supply Inc.\",2023-07-30\\nP005,Water Bottle,Sports Equipment,19.99,\"Insulated stainless steel water bottle, keeps drinks cold for 24 hours.\",\"HydroCool\",500,\"EcoBottles\",2023-09-10\\nP006,Ergonomic Office Chair,Furniture,199.99,\"Comfortable ergonomic chair with lumbar support and adjustable height.\",\"Home Comforts\",75,\"OfficeWorks\",2023-08-28\\nP007,Smart Watch,Electronics,249.99,\"Smart watch with fitness tracking and heart rate monitor.\",\"FitTrack\",120,\"GizmoGadgets\",2023-09-12\\nP008,Yoga Mat,Sports Equipment,29.99,\"Non-slip yoga mat with extra cushioning.\",\"Zen Yoga\",350,\"Wellness Store\",2023-09-15\\nP009,Air Fryer,Home Appliances,89.99,\"Compact air fryer with multiple cooking presets.\",\"CrispyCook\",145,\"KitchenPro\",2023-08-02\\nP010,Wireless Mouse,Electronics,29.99,\"Ergonomic wireless mouse with customizable buttons.\",\"ClickTech\",300,\"Gadget World\",2023-07-25\\nP011,Spice Rack Organization Set,Home Decor,39.99,\"Rotating spice rack with 12 glass jars included.\",\"HomeChef\",210,\"OrganizeIt Co.\",2023-08-17\\nP012,Dumbbell Set,Sports Equipment,99.99,\"Adjustable dumbbell set ranging from 5 to 30 lbs.\",\"StrengthTech\",100,\"Fit Equipment\",2023-09-01\\nP013,Kids\\' Backpack,Accessories,34.99,\"Durable backpack with multiple compartments for school.\",\"KidStyle\",175,\"Backpack Haven\",2023-08-23\\nP014,Digital Camera,Electronics,399.99,\"Compact digital camera with 20 MP and full HD video.\",\"SnapShot\",60,\"Camera Boutique\",2023-09-09\\nP015,Portable Bluetooth Speaker,Electronics,59.99,\"Water-resistant Bluetooth speaker with 12 hours of playtime.\",\"SoundBox\",130,\"Audio Plus\",2023-09-14\\nP016,Electric Toothbrush,Health & Personal Care,59.99,\"Rechargeable electric toothbrush with timer and pressure sensor.\",\"DentalCare\",400,\"HealthFirst Supplies\",2023-08-30\\nP017,Tote Bag,Accessories,24.99,\"Stylish and spacious tote bag for everyday use.\",\"Chic Designs\",300,\"Fashion Hub\",2023-09-06\\nP018,Sneaker Cleaner Kit,Accessories,15.99,\"Complete shoe cleaning kit for all types of sneakers.\",\"FreshFeet\",500,\"CleanKicks\",2023-09-03\\nP019,Camping Tent,Outdoor,129.99,\"Easy setup camping tent for 4 people, weather-resistant.\",\"Outdoors Pro\",85,\"Adventure Outfitters\",2023-08-12\\nP020,LED Desk Lamp,Home Decor,39.99,\"Adjustable LED desk lamp with multiple brightness settings.\",\"BrightEase\",170,\"HomeLight Solutions\",2023-09-08\\n'\n" ] } ], "source": [ "with gr.Blocks(\n", " theme=gr.themes.Soft(\n", " primary_hue=\"blue\",\n", " neutral_hue=\"gray\",\n", " font=[\"Inter\", \"ui-sans-serif\", \"system-ui\"]\n", " ),\n", " css=\"\"\"\n", " .gradio-container { max-width: 1200px !important; margin: auto !important; }\n", " .header { text-align: center; margin-bottom: 2em; }\n", " .header h1 { color: #1f2937; font-size: 2.5em; margin-bottom: 0.5em; }\n", " .header p { color: #6b7280; font-size: 1.1em; }\n", " .generate-btn { background: linear-gradient(135deg, #3b82f6 0%, #1d4ed8 100%) !important; }\n", " .generate-btn:hover { transform: translateY(-2px) !important; box-shadow: 0 8px 25px rgba(59, 130, 246, 0.3) !important; }\n", " .stats-card { background: linear-gradient(135deg, #f8fafc 0%, #e2e8f0 100%); border-radius: 12px; padding: 1.5em; margin: 1em 0; }\n", " \"\"\"\n", ") as demo:\n", " \n", " gr.HTML(\"\"\"\n", "
\n", "

Synthetic Dataset Generator

\n", "

Generate realistic synthetic datasets using AI models for testing and development

\n", "
\n", " \"\"\")\n", " \n", " with gr.Row():\n", " with gr.Column(scale=1):\n", " gr.Markdown(\"### Configuration\")\n", " \n", " dataset_type = gr.Dropdown(\n", " choices=[\"employees\", \"customers\", \"products\", \"transactions\"],\n", " value=\"employees\",\n", " label=\"Dataset Type\",\n", " info=\"Choose the type of data to generate\"\n", " )\n", " \n", " num_records = gr.Slider(\n", " minimum=5, maximum=100, step=5, value=20,\n", " label=\"Number of Records\",\n", " info=\"How many records to generate\"\n", " )\n", " \n", " region = gr.Dropdown(\n", " choices=[\"US Only\", \"International\", \"Mixed\", \"Europe\", \"Asia\"],\n", " value=\"US Only\",\n", " label=\"Geographic Region\",\n", " info=\"Location for addresses and phone numbers\"\n", " )\n", " \n", " model_choice = gr.Radio(\n", " choices=[\"OpenAI GPT-4o-mini\", \"Llama 3.1 8B\"],\n", " value=\"OpenAI GPT-4o-mini\",\n", " label=\"AI Model\",\n", " info=\"Choose the AI model for generation\"\n", " )\n", " \n", " generate_btn = gr.Button(\n", " \"Generate Dataset\",\n", " variant=\"primary\",\n", " elem_classes=\"generate-btn\",\n", " size=\"lg\"\n", " )\n", " \n", " with gr.Column(scale=2):\n", " gr.Markdown(\"### Generated Dataset\")\n", " \n", " status = gr.Markdown(\"Ready to generate your dataset!\")\n", " \n", " dataframe_output = gr.Dataframe(\n", " value=pd.DataFrame(),\n", " label=\"Dataset Preview\",\n", " wrap=True\n", " )\n", " \n", " with gr.Row():\n", " csv_output = gr.Textbox(\n", " value=\"\",\n", " label=\"CSV Data\",\n", " lines=10,\n", " max_lines=15\n", " )\n", " \n", " download_btn = gr.DownloadButton(\n", " \"Download CSV\",\n", " elem_id=\"download-btn\"\n", " )\n", " \n", " generate_btn.click(\n", " generate_dataset,\n", " inputs=[dataset_type, num_records, region, model_choice],\n", " outputs=[dataframe_output, csv_output, status]\n", " )\n", " \n", " csv_output.change(\n", " download_csv,\n", " inputs=[csv_output],\n", " outputs=[download_btn]\n", " )\n", "\n", "demo.launch(share=True, inbrowser=True)\n" ] } ], "metadata": { "kernelspec": { "display_name": "base", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.13.5" } }, "nbformat": 4, "nbformat_minor": 2 }