From a4db90be83b516b4fa5860eb5e1c05905e169086 Mon Sep 17 00:00:00 2001 From: SABEEH Shaikh Date: Tue, 20 May 2025 22:16:19 +0200 Subject: [PATCH] Added my dataset generator to contributions folder --- .../llm_dataset_generator.ipynb | 1801 +++++++++++++++++ 1 file changed, 1801 insertions(+) create mode 100644 week3/community-contributions/llm_dataset_generator.ipynb diff --git a/week3/community-contributions/llm_dataset_generator.ipynb b/week3/community-contributions/llm_dataset_generator.ipynb new file mode 100644 index 0000000..3de4ce1 --- /dev/null +++ b/week3/community-contributions/llm_dataset_generator.ipynb @@ -0,0 +1,1801 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "source": [ + "# Synthetic Data Generator Notebook\n", + "## About\n", + "This colab notebook demonstrates the use of Frontier and Open-source LLM models for generating synthetic dataset for a business scenario provided by the user. From a UI interface implemented in gradio, a user can define their business scenario in detail, select the number of records needed along with the its format and adjust the number of max output tokens to be generated by the chosen LLM.\n", + "\n", + "It does not stop here. Once the records have been produced in the LLM output, it can be extracted and stored in a file, format same as set by user before. The file is stored in colab notebook under the contents directory. All of this is extraction is done with the help of the 're' library. My first time using it and I totally enjoyed learning it.\n", + "\n", + "## Outlook\n", + "Sometimes the response is loaded with the user prompt and a lot of tags when using an open-source models, such as Mixtral from Mistral. This is because of the prompt format being used. The 'assistant' 'role' format does not suit them. This is an optimization to look for and can be easily done by using custom prompt template for such models and these templates are hinted on their huggingface repo." + ], + "metadata": { + "id": "SFA6R-4jL7SS" + } + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ip4I4Lff3B2M" + }, + "source": [ + "## Install & Imports" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "8zVlW-GMcBaU", + "outputId": "0c473564-fb93-41a9-c819-e6aa2382d75a" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m54.2/54.2 MB\u001b[0m \u001b[31m9.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m323.1/323.1 kB\u001b[0m \u001b[31m12.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m264.0/264.0 kB\u001b[0m \u001b[31m9.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m363.4/363.4 MB\u001b[0m \u001b[31m1.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.8/13.8 MB\u001b[0m \u001b[31m95.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m24.6/24.6 MB\u001b[0m \u001b[31m78.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m883.7/883.7 kB\u001b[0m \u001b[31m48.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m664.8/664.8 MB\u001b[0m \u001b[31m1.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m211.5/211.5 MB\u001b[0m \u001b[31m3.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m56.3/56.3 MB\u001b[0m \u001b[31m8.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m127.9/127.9 MB\u001b[0m \u001b[31m6.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m207.5/207.5 MB\u001b[0m \u001b[31m6.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m21.1/21.1 MB\u001b[0m \u001b[31m83.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m76.1/76.1 MB\u001b[0m \u001b[31m9.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m95.2/95.2 kB\u001b[0m \u001b[31m7.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m11.6/11.6 MB\u001b[0m \u001b[31m95.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m72.0/72.0 kB\u001b[0m \u001b[31m5.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m62.5/62.5 kB\u001b[0m \u001b[31m5.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25h" + ] + } + ], + "source": [ + "!pip install -q gradio anthropic requests torch bitsandbytes transformers accelerate openai" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "id": "YKVNzE5sFH2l" + }, + "outputs": [], + "source": [ + "# imports\n", + "import re\n", + "import os\n", + "import sys\n", + "import gc\n", + "import io\n", + "import json\n", + "import anthropic\n", + "import gradio as gr\n", + "import requests\n", + "import subprocess\n", + "import google.generativeai as ggai\n", + "import torch\n", + "import tempfile\n", + "import shutil\n", + "from io import StringIO\n", + "import pandas as pd\n", + "from google.colab import userdata\n", + "from huggingface_hub import login\n", + "from openai import OpenAI\n", + "from pathlib import Path\n", + "from datetime import datetime\n", + "from IPython.display import Markdown, display, update_display\n", + "from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer, BitsAndBytesConfig" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "LWpD6bZv3mAR" + }, + "source": [ + "## HuggingFace Setup" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "id": "aeC2oWY2FTv7" + }, + "outputs": [], + "source": [ + "# Sign in to HuggingFace Hub\n", + "\n", + "hf_token = userdata.get('HF_TOKEN')\n", + "login(hf_token, add_to_git_credential=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "8Au2UPVy3vn5" + }, + "source": [ + "## Frontier Models configuration" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "id": "geBBsd14X3UL" + }, + "outputs": [], + "source": [ + "openai_client = OpenAI(api_key=userdata.get('OPENAI_API_KEY'))\n", + "anthropic_client = anthropic.Anthropic(api_key=userdata.get('ANTHROPIC_API_KEY'))\n", + "ggai.configure(api_key=userdata.get('GOOGLE_API_KEY'))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "tCnDIOlKgjbO" + }, + "source": [ + "## Defining Prompts" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "id": "gkwXZsxofAU1" + }, + "outputs": [], + "source": [ + "system_prompt = \"\"\"\n", + "You are a synthetic dataset generator. Your role is to create synthetic dataset that infers structured data schemas from business scenarios given by the user.\n", + "\n", + "Your task is to:\n", + "1. Understand the user's business problem(s) or use case(s).\n", + "2. Identify the key fields needed to support that scenario.\n", + "3. Define appropriate field names, data types, and formats.\n", + "4. Generate synthetic records that match the inferred schema.\n", + "\n", + "Guidelines:\n", + "- Use realistic field names and values. Do not invent unrelated fields or values.\n", + "- Choose sensible data types: string, integer, float, date, boolean, enum, etc.\n", + "- Respect logical constraints (e.g., age range, date ranges, email formats).\n", + "- Output the dataset in the format the user requests (json, csv, txt, markdown table).\n", + "- If the scenario is vague or broad, make reasonable assumptions and explain them briefly before generating the dataset.\n", + "- Always generate a dataset that supports the business use case logically.\n", + "\n", + "Before generating the data, display the inferred schema in a readable format.\n", + "\"\"\"\n", + "\n", + "# trial_user_prompt = \"I’m building a churn prediction model for a telecom company. Can you generate a synthetic dataset with 100 rows?\"\n", + "def get_user_prompt(business_problem, no_of_samples, file_format):\n", + " return f\"\"\"\n", + " The business scenario for which I want you to generate a dataset is defined below:\n", + " {business_problem}\n", + "\n", + " Generate a synthetic dataset of {no_of_samples} records in {file_format} format.\n", + " When generating the dataset, wrap it between the '<<<>>>' tag. Make sure the tag is there in the output.\n", + " Do not include any other special characters in between the tags, other than the ones required in producing the correct format of data.\n", + " For examples: When a 'csv' format is given, only the ',' character can be used in between the tags.\n", + " \"\"\"" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "yNpVf9-oQdoO" + }, + "source": [ + "### Quanitzation Config" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "id": "3ErZ315MQdU3" + }, + "outputs": [], + "source": [ + "# This allows us to load the model into memory and use less memory\n", + "def get_quantization_config():\n", + " return BitsAndBytesConfig(\n", + " load_in_4bit=True,\n", + " bnb_4bit_use_double_quant=True,\n", + " bnb_4bit_compute_dtype=torch.bfloat16,\n", + " bnb_4bit_quant_type=\"nf4\"\n", + " )" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "clGtRh0N4951" + }, + "source": [ + "## HF Model inference" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "id": "MAhyn1ehb3Dh" + }, + "outputs": [], + "source": [ + "# All in one HuggingFace Model Response function\n", + "def run_hfmodel_and_get_response(prompt, model_name, output_tokens):\n", + " tokenizer = AutoTokenizer.from_pretrained(model_name)\n", + " tokenizer.pad_token = tokenizer.eos_token\n", + " inputs = tokenizer.apply_chat_template(prompt, return_tensors=\"pt\")\n", + " if torch.cuda.is_available():\n", + " inputs = inputs.to(\"cuda\")\n", + " streamer = TextStreamer(tokenizer)\n", + " if \"microsoft/bitnet-b1.58-2B-4T\" in model_name:\n", + " model = AutoModelForCausalLM.from_pretrained(model_name, device_map=\"auto\", trust_remote_code=True)\n", + " elif \"tiiuae/Falcon-E-3B-Instruct\" in model_name:\n", + " model = AutoModelForCausalLM.from_pretrained(model_name, device_map=\"auto\", torch_dtype=torch.float16 )\n", + " else:\n", + " model = AutoModelForCausalLM.from_pretrained(model_name, device_map=\"auto\", quantization_config=get_quantization_config())\n", + " outputs = model.generate(inputs, max_new_tokens=output_tokens, streamer=streamer)\n", + " response = tokenizer.decode(outputs[0])\n", + " del model, inputs, tokenizer, outputs\n", + " gc.collect()\n", + " torch.cuda.empty_cache()\n", + " return response" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Gh_Ny1aM-L8z" + }, + "source": [ + "## Frontier Models Inference" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "id": "h11WlZNhfHCR" + }, + "outputs": [], + "source": [ + "# ChatGPT, Claude and Gemini response function\n", + "def get_chatgpt_response(prompt, model_name, output_tokens):\n", + " response = openai_client.chat.completions.create(\n", + " model=model_name,\n", + " messages=prompt,\n", + " max_tokens=output_tokens,\n", + " )\n", + " return response.choices[0].message.content\n", + "\n", + "def get_claude_response(prompt, model_name, output_tokens):\n", + " response = anthropic_client.messages.create(\n", + " model=model_name,\n", + " max_tokens=output_tokens,\n", + " system=system_prompt,\n", + " messages=[\n", + " {\n", + " \"role\": \"user\",\n", + " \"content\": prompt,\n", + " }\n", + " ],\n", + " )\n", + " return response.content[0].text\n", + "\n", + "def get_gemini_response(prompt, model_name, output_tokens):\n", + " model = ggai.GenerativeModel(\n", + " model_name=model_name,\n", + " system_instruction=system_prompt,\n", + " )\n", + "\n", + " response = model.generate_content(prompt, generation_config={\n", + " \"max_output_tokens\": output_tokens,\n", + " \"temperature\": 0.7,\n", + " })\n", + " return response.text" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "nzHbM_WQvRgT" + }, + "source": [ + "## Gradio Implementation" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "uFWZqw1R-al_" + }, + "source": [ + "### Dropdowns Selection Lists" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "id": "rOzEb0o--aD7" + }, + "outputs": [], + "source": [ + "# Dropdown List Values for the user\n", + "MODEL_TYPES=[\"GPT\", \"Claude\", \"Gemini\", \"HuggingFace\"]\n", + "OPENAI_MODEL_NAMES=[\"gpt-4o-mini\", \"gpt-4o\", \"gpt-3.5-turbo\"]\n", + "ANTHROPIC_MODELS=[\"claude-3-7-sonnet-latest\", \"claude-3-5-haiku-latest\", \"claude-3-opus-latest\"]\n", + "GOOGLE_MODELS=[\"gemini-2.0-flash\", \"gemini-1.5-pro\"]\n", + "HUGGINGFACE_MODELS=[\n", + " \"meta-llama/Llama-3.2-3B-Instruct\",\n", + " \"microsoft/bitnet-b1.58-2B-4T\",\n", + " \"ByteDance-Seed/Seed-Coder-8B-Instruct\",\n", + " \"tiiuae/Falcon-E-3B-Instruct\",\n", + " \"Qwen/Qwen2.5-7B-Instruct\"\n", + "]\n", + "MODEL_NAMES = {\n", + " \"GPT\": OPENAI_MODEL_NAMES,\n", + " \"Claude\": ANTHROPIC_MODELS,\n", + " \"Gemini\": GOOGLE_MODELS,\n", + " \"HuggingFace\": HUGGINGFACE_MODELS\n", + "}" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "sbXGL8_4-oKc" + }, + "source": [ + "### UI" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "id": "_0NCY7FgCVHj" + }, + "outputs": [], + "source": [ + "with gr.Blocks() as generator_ui:\n", + " gr.Markdown(\"# 🧠 Business Scenario → Synthetic Dataset Generator\")\n", + "\n", + " with gr.Row():\n", + " with gr.Column(scale=3):\n", + " with gr.Row():\n", + " dataset_size=gr.Number(value=10, label=\"Enter the number of data samples to generate.\", show_label=True)\n", + " format=gr.Dropdown([\"json\", \"csv\", \"txt\", \"markdown\"], label=\"Select the format for the dataset\", show_label=True)\n", + " with gr.Row():\n", + " scenario=gr.Textbox(label=\"Business Scenario\", lines=5, placeholder=\"Describe your business scenario here\")\n", + " with gr.Row():\n", + " error = gr.Markdown(visible=False)\n", + " with gr.Row():\n", + " clear = gr.Button(\"Clear Everything\")\n", + " submit = gr.Button(\"Generate Dataset\", variant=\"primary\")\n", + "\n", + " with gr.Column(scale=1):\n", + " model_type = gr.Dropdown(MODEL_TYPES, label=\"Model Type\", show_label=True, info=\"Select the model type you want to use\")\n", + " model_name = gr.Dropdown(MODEL_NAMES[model_type.value], label=\"Model Name\", show_label=True, allow_custom_value=True, info=\"Select the model name or enter one manually\")\n", + " output_tokens= gr.Number(value=1000, label=\"Enter the max number of output tokens to generate.\", show_label=True, info=\"This will impact the length of the response containg the dataset\")\n", + "\n", + " with gr.Row():\n", + " # Chatbot Interface\n", + " chatbot = gr.Chatbot(\n", + " type='messages',\n", + " label='Chatbot',\n", + " show_label=True,\n", + " height=300,\n", + " resizable=True,\n", + " elem_id=\"chatbot\",\n", + " avatar_images=(\"🧑\", \"🤖\",)\n", + " )\n", + " with gr.Row(variant=\"compact\"):\n", + " extract_btn = gr.Button(\"Extract and Save Dataset\", variant=\"huggingface\", visible=False)\n", + " file_name = gr.Textbox(label=\"Enter file name here (without file extension)\", placeholder=\"e.g. cancer_synthetic, warehouse_synthetic (no digits)\", visible=False)\n", + " with gr.Row():\n", + " markdown_preview = gr.Markdown(visible = False)\n", + " dataset_preview = gr.Textbox(label=\"Dataset Preview\",visible=False)\n", + " with gr.Row():\n", + " file_saved = gr.Textbox(visible=False)\n", + "\n", + " def run_inference(scenario, model_type, model_name, output_tokens, dataset_size, format):\n", + " \"\"\"Run the model and get the response\"\"\"\n", + " model_type=model_type.lower()\n", + " print(f\"scenario: {scenario}\")\n", + " print(f\"model_type: {model_type}\")\n", + " print(f\"model_name: {model_name}\")\n", + " if not scenario.strip():\n", + " return gr.update(value=\"❌ **Error:** Please define a scenario first!\",visible=True), []\n", + "\n", + " user_prompt = get_user_prompt(scenario, dataset_size, format)\n", + " prompt = [\n", + " {\"role\": \"system\", \"content\": system_prompt},\n", + " {\"role\": \"user\", \"content\": user_prompt},\n", + " ]\n", + "\n", + " if model_type == \"gpt\":\n", + " response = get_chatgpt_response(prompt=prompt, model_name=model_name, output_tokens=output_tokens)\n", + " elif model_type == \"claude\":\n", + " response = get_claude_response(prompt=user_prompt, model_name=model_name, output_tokens=output_tokens)\n", + " elif model_type == \"gemini\":\n", + " response = get_gemini_response(prompt=user_prompt, model_name=model_name, output_tokens=output_tokens)\n", + " else:\n", + " response = run_hfmodel_and_get_response(prompt=prompt, model_name=model_name, output_tokens=output_tokens)\n", + " torch.cuda.empty_cache()\n", + " history = [\n", + " {\"role\": \"user\", \"content\": scenario},\n", + " {\"role\": \"assistant\", \"content\": response}\n", + " ]\n", + " return gr.update(visible=False), history\n", + "\n", + " def extract_dataset_string(response):\n", + " \"\"\"Extract dataset content between defined tags using regex.\"\"\"\n", + " # Remove known artificial tokens (common in HuggingFace or Claude)\n", + " response = re.sub(r\"<\\[.*?\\]>\", \"\", response)\n", + "\n", + " # Remove system or prompt echo if repeated before dataset\n", + " response = re.sub(r\"(?is)^.*?<<<\", \"<<<\", response.strip(), count=1)\n", + "\n", + " # 1. Match strict <<<>>>...<<<>>> tag blocks (use last match)\n", + " matches = re.findall(r\"<<<>>>[\\s\\r\\n]*(.*?)[\\s\\r\\n]*<<<>>>\", response, re.DOTALL)\n", + " if matches:\n", + " return matches[-1].strip()\n", + "\n", + " # 2. Match loose <<< ... >>> format\n", + " matches = re.findall(r\"<<<[\\s\\r\\n]*(.*?)[\\s\\r\\n]*>>>\", response, re.DOTALL)\n", + " if matches:\n", + " return matches[-1].strip()\n", + "\n", + " # 3. Match final fallback: take everything after last <<< as raw data\n", + " last_open = response.rfind(\"<<<\")\n", + " if last_open != -1:\n", + " raw = response[last_open + 3 :].strip()\n", + " # Optionally cut off noisy trailing notes, explanations, etc.\n", + " raw = re.split(r\"\\n\\s*\\n|Explanation:|Note:|---\", raw)[0]\n", + " return raw.strip()\n", + "\n", + " return \"Could not extract dataset! Try again with a different model.\"\n", + "\n", + " def extract_dataset_from_response(chatbot_history, file_name, file_type):\n", + " \"\"\"Extract dataset and update in gradio UI components\"\"\"\n", + " response = chatbot_history[-1][\"content\"]\n", + " if not response:\n", + " return gr.update(visible=True, value=\"Could not find LLM Response! Try again.\"), gr.update(visible=False)\n", + "\n", + " # match = re.search(r'<<<\\s*(.*?)\\s*>>>', response, re.DOTALL)\n", + " # print(match)\n", + " # if match and match.group(1).strip() == \"\":\n", + " # match = re.search(r'<<<>>>\\s*(.*?)\\s*<<<>>>', response, re.DOTALL)\n", + " # print(match)\n", + " # if match is None:\n", + " # return gr.update(visible=True, value=\"Could not extract dataset! Try again with a different model.\"), gr.update(visible=False)\n", + " # dataset = match.group(1).strip()\n", + " dataset = extract_dataset_string(response)\n", + " if dataset == \"Could not extract dataset! Try again with a different model.\":\n", + " return gr.update(visible=True, value=dataset), gr.update(visible=False)\n", + " text = save_dataset(dataset, file_type, file_name)\n", + " return gr.update(visible=True, value=text), gr.update(visible=True, value=dataset)\n", + "\n", + " def save_dataset(dataset, file_format, file_name):\n", + " \"\"\"Save dataset to a file based on the selected format.\"\"\"\n", + " file_name=file_name+\".\"+file_format\n", + " print(dataset)\n", + " print(file_name)\n", + " if file_format == \"json\":\n", + " try:\n", + " data = json.loads(dataset)\n", + " with open(file_name, \"w\", encoding=\"utf-8\") as f:\n", + " json.dump(data, f, indent=4)\n", + " return \"Dataset saved successfully!\"\n", + " except:\n", + " return \"Could not save dataset! Try again in another format.\"\n", + " elif file_format == \"csv\":\n", + " try:\n", + " df = pd.read_csv(StringIO(dataset))\n", + " df.to_csv(file_name, index=False)\n", + " return \"Dataset saved successfully!\"\n", + " except:\n", + " return \"Could not save dataset! Try again in another format.\"\n", + " elif file_format == \"txt\":\n", + " try:\n", + " with open(file_name, \"w\", encoding=\"utf-8\") as f:\n", + " f.write(dataset)\n", + " return \"Dataset saved successfully!\"\n", + " except:\n", + " return \"Could not save dataset! Try again in another format.\"\n", + "\n", + " def clear_chat():\n", + " \"\"\"Clear the chat history.\"\"\"\n", + " return \"\", [], gr.update(visible=False), gr.update(visible=False)\n", + "\n", + " def show_extract_btn(chatbot_history, format):\n", + " \"\"\"Show the extract button if the response has been displayed in the chatbot and format is not set to markdown\"\"\"\n", + " if chatbot_history == []:\n", + " return gr.update(visible=False), gr.update(visible=False), gr.update(visible=False)\n", + " if format == \"markdown\":\n", + " return gr.update(visible=True, value=chatbot_history[1][\"content\"]), gr.update(visible=False), gr.update(visible=False)\n", + " return gr.update(visible=False), gr.update(visible=True), gr.update(visible=True)\n", + "\n", + " extract_btn.click(\n", + " fn=extract_dataset_from_response,\n", + " inputs=[chatbot, file_name, format],\n", + " outputs=[file_saved, dataset_preview]\n", + " )\n", + "\n", + " chatbot.change(\n", + " fn=show_extract_btn,\n", + " inputs=[chatbot, format],\n", + " outputs=[markdown_preview, extract_btn, file_name]\n", + " )\n", + "\n", + " model_type.change(\n", + " fn=lambda x: gr.update(choices=MODEL_NAMES[x], value=MODEL_NAMES[x][0]),\n", + " inputs=[model_type],\n", + " outputs=[model_name]\n", + " )\n", + "\n", + " submit.click(\n", + " fn=run_inference,\n", + " inputs=[scenario, model_type, model_name, output_tokens, dataset_size, format],\n", + " outputs=[error, chatbot],\n", + " show_progress=True\n", + " )\n", + "\n", + " clear.click(\n", + " clear_chat,\n", + " outputs=[scenario, chatbot, dataset_preview, file_saved]\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "collapsed": true, + "id": "kzDUJahK8uRN", + "outputId": "c5674be2-b262-4439-ae91-4f3e1f49e041" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().\n", + "* Running on public URL: https://d076a9fef9034a4f24.gradio.live\n", + "\n", + "This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "" + ], + "text/html": [ + "
" + ] + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "scenario: Generate a dataset for training a model to approve/reject loan applications. Include features like loan amount, applicant income, co-applicant income, employment type, credit history (binary), loan term, number of dependents, education level, and loan approval status.\n", + "model_type: gpt\n", + "model_name: gpt-4o\n", + "Loan Amount,Applicant Income,Co-applicant Income,Employment Type,Credit History,Loan Term,Number of Dependents,Education Level,Loan Approval Status\n", + "250000,60000,15000,Salaried,1,240,1,Graduate,Approved\n", + "350000,80000,0,Salaried,1,360,2,Graduate,Approved\n", + "120000,30000,10000,Self-employed,0,180,1,Not Graduate,Rejected\n", + "500000,150000,50000,Self-employed,1,300,3,Graduate,Approved\n", + "75000,20000,0,Unemployed,0,120,0,Graduate,Rejected\n", + "275000,75000,25000,Salaried,0,240,2,Not Graduate,Rejected\n", + "100000,40000,20000,Salaried,1,60,0,Graduate,Approved\n", + "310000,95000,0,Self-employed,1,360,1,Graduate,Approved\n", + "450000,50000,0,Self-employed,0,180,4,Not Graduate,Rejected\n", + "200000,55000,20000,Salaried,1,120,3,Graduate,Approved\n", + "100000,35000,0,Unemployed,0,60,0,Not Graduate,Rejected\n", + "230000,68000,13000,Salaried,1,240,1,Graduate,Approved\n", + "330000,99000,40000,Self-employed,1,300,2,Graduate,Approved\n", + "150000,18000,7500,Unemployed,0,48,0,Not Graduate,Rejected\n", + "210000,64000,0,Salaried,0,120,1,Graduate,Rejected\n", + "310000,87000,30000,Self-employed,1,360,2,Graduate,Approved\n", + "50000,22000,7000,Unemployed,0,24,0,Not Graduate,Rejected\n", + "290000,92000,20000,Salaried,1,240,3,Graduate,Approved\n", + "110000,45000,0,Salaried,0,36,0,Graduate,Rejected\n", + "450000,76000,25000,Self-employed,1,360,2,Graduate,Approved\n", + "loan_approval_synthetic.txt\n", + "scenario: Generate a dataset for predicting medical appointment no-shows. Include appointment ID, scheduled date, appointment date, lead time (days between scheduling and appointment), SMS reminders sent, patient age, gender, health condition severity, and no-show status.\n", + "model_type: gpt\n", + "model_name: gpt-4o\n", + "scenario: Generate a dataset for predicting medical appointment no-shows. Include appointment ID, scheduled date, appointment date, lead time (days between scheduling and appointment), SMS reminders sent, patient age, gender, health condition severity, and no-show status.\n", + "model_type: gpt\n", + "model_name: gpt-4o\n", + "[\n", + " {\n", + " \"appointment_id\": \"AID001\",\n", + " \"scheduled_date\": \"2023-11-01\",\n", + " \"appointment_date\": \"2023-11-10\",\n", + " \"lead_time\": 9,\n", + " \"sms_reminders_sent\": 2,\n", + " \"patient_age\": 45,\n", + " \"gender\": \"Female\",\n", + " \"health_condition_severity\": 3,\n", + " \"no_show_status\": false\n", + " },\n", + " {\n", + " \"appointment_id\": \"AID002\",\n", + " \"scheduled_date\": \"2023-11-03\",\n", + " \"appointment_date\": \"2023-11-15\",\n", + " \"lead_time\": 12,\n", + " \"sms_reminders_sent\": 3,\n", + " \"patient_age\": 34,\n", + " \"gender\": \"Male\",\n", + " \"health_condition_severity\": 2,\n", + " \"no_show_status\": true\n", + " },\n", + " {\n", + " \"appointment_id\": \"AID003\",\n", + " \"scheduled_date\": \"2023-11-05\",\n", + " \"appointment_date\": \"2023-11-11\",\n", + " \"lead_time\": 6,\n", + " \"sms_reminders_sent\": 1,\n", + " \"patient_age\": 29,\n", + " \"gender\": \"Other\",\n", + " \"health_condition_severity\": 4,\n", + " \"no_show_status\": false\n", + " },\n", + " {\n", + " \"appointment_id\": \"AID004\",\n", + " \"scheduled_date\": \"2023-11-02\",\n", + " \"appointment_date\": \"2023-11-14\",\n", + " \"lead_time\": 12,\n", + " \"sms_reminders_sent\": 2,\n", + " \"patient_age\": 62,\n", + " \"gender\": \"Female\",\n", + " \"health_condition_severity\": 5,\n", + " \"no_show_status\": true\n", + " },\n", + " {\n", + " \"appointment_id\": \"AID005\",\n", + " \"scheduled_date\": \"2023-11-06\",\n", + " \"appointment_date\": \"2023-11-13\",\n", + " \"lead_time\": 7,\n", + " \"sms_reminders_sent\": 0,\n", + " \"patient_age\": 21,\n", + " \"gender\": \"Male\",\n", + " \"health_condition_severity\": 1,\n", + " \"no_show_status\": false\n", + " },\n", + " {\n", + " \"appointment_id\": \"AID006\",\n", + " \"scheduled_date\": \"2023-11-08\",\n", + " \"appointment_date\": \"2023-11-17\",\n", + " \"lead_time\": 9,\n", + " \"sms_reminders_sent\": 3,\n", + " \"patient_age\": 58,\n", + " \"gender\": \"Female\",\n", + " \"health_condition_severity\": 4,\n", + " \"no_show_status\": true\n", + " },\n", + " {\n", + " \"appointment_id\": \"AID007\",\n", + " \"scheduled_date\": \"2023-11-10\",\n", + " \"appointment_date\": \"2023-11-18\",\n", + " \"lead_time\": 8,\n", + " \"sms_reminders_sent\": 1,\n", + " \"patient_age\": 41,\n", + " \"gender\": \"Other\",\n", + " \"health_condition_severity\": 2,\n", + " \"no_show_status\": false\n", + " },\n", + " {\n", + " \"appointment_id\": \"AID008\",\n", + " \"scheduled_date\": \"2023-11-07\",\n", + " \"appointment_date\": \"2023-11-12\",\n", + " \"lead_time\": 5,\n", + " \"sms_reminders_sent\": 0,\n", + " \"patient_age\": 67,\n", + " \"gender\": \"Male\",\n", + " \"health_condition_severity\": 3,\n", + " \"no_show_status\": true\n", + " },\n", + " {\n", + " \"appointment_id\": \"AID009\",\n", + " \"scheduled_date\": \"2023-11-12\",\n", + " \"appointment_date\": \"2023-11-20\",\n", + " \"lead_time\": 8,\n", + " \"sms_reminders_sent\": 2,\n", + " \"patient_age\": 74,\n", + " \"gender\": \"Female\",\n", + " \"health_condition_severity\": 5,\n", + " \"no_show_status\": false\n", + " },\n", + " {\n", + " \"appointment_id\": \"AID010\",\n", + " \"scheduled_date\": \"2023-11-09\",\n", + " \"appointment_date\": \"2023-11-16\",\n", + " \"lead_time\": 7,\n", + " \"sms_reminders_sent\": 3,\n", + " \"patient_age\": 25,\n", + " \"gender\": \"Male\",\n", + " \"health_condition_severity\": 4,\n", + " \"no_show_status\": true\n", + " },\n", + " {\n", + " \"appointment_id\": \"AID011\",\n", + " \"scheduled_date\": \"2023-11-13\",\n", + " \"appointment_date\": \"2023-11-21\",\n", + " \"lead_time\": 8,\n", + " \"sms_reminders_sent\": 1,\n", + " \"patient_age\": 32,\n", + " \"gender\": \"Female\",\n", + " \"health_condition_severity\": 2,\n", + " \"no_show_status\": false\n", + " },\n", + " {\n", + " \"appointment_id\": \"AID012\",\n", + " \"scheduled_date\": \"2023-11-14\",\n", + " \"appointment_date\": \"2023-11-25\",\n", + " \"lead_time\": 11,\n", + " \"sms_reminders_sent\": 2,\n", + " \"patient_age\": 48,\n", + " \"gender\": \"Other\",\n", + " \"health_condition_severity\": 1,\n", + " \"no_show_status\": true\n", + " },\n", + " {\n", + " \"appointment_id\": \"AID013\",\n", + " \"scheduled_date\": \"2023-11-15\",\n", + " \"appointment_date\": \"2023-11-27\",\n", + " \"lead_time\": 12,\n", + " \"sms_reminders_sent\": 3,\n", + " \"patient_age\": 36,\n", + " \"gender\": \"Male\",\n", + " \"health_condition_severity\": 5,\n", + " \"no_show_status\": false\n", + " },\n", + " {\n", + " \"appointment_id\": \"AID014\",\n", + " \"scheduled_date\": \"2023-11-17\",\n", + " \"appointment_date\": \"2023-12-02\",\n", + " \"lead_time\": 15,\n", + " \"sms_reminders_sent\": 0,\n", + " \"patient_age\": 28,\n", + " \"gender\": \"Female\",\n", + " \"health_condition_severity\": 3,\n", + " \"no_show_status\": true\n", + " },\n", + " {\n", + " \"appointment_id\": \"AID015\",\n", + " \"scheduled_date\": \"2023-11-16\",\n", + " \"appointment_date\": \"2023-12-01\",\n", + " \"lead_time\": 15,\n", + " \"sms_reminders_sent\": 1,\n", + " \"patient_age\": 60,\n", + " \"gender\": \"Male\",\n", + " \"health_condition_severity\": 2,\n", + " \"no_show_status\": false\n", + " },\n", + " {\n", + " \"appointment_id\": \"AID016\",\n", + " \"scheduled_date\": \"2023-11-18\",\n", + " \"appointment_date\": \"2023-12-05\",\n", + " \"lead_time\": 17,\n", + " \"sms_reminders_sent\": 2,\n", + " \"patient_age\": 40,\n", + " \"gender\": \"Other\",\n", + " \"health_condition_severity\": 4,\n", + " \"no_show_status\": true\n", + " },\n", + " {\n", + " \"appointment_id\": \"AID017\",\n", + " \"scheduled_date\": \"2023-11-19\",\n", + " \"appointment_date\": \"2023-12-03\",\n", + " \"lead_time\": 14,\n", + " \"sms_reminders_sent\": 3,\n", + " \"patient_age\": 19,\n", + " \"gender\": \"Female\",\n", + " \"health_condition_severity\": 1,\n", + " \"no_show_status\": false\n", + " },\n", + " {\n", + " \"appointment_id\": \"AID018\",\n", + " \"scheduled_date\": \"2023-11-21\",\n", + " \"appointment_date\": \"2023-12-07\",\n", + " \"lead_time\": 16,\n", + " \"sms_reminders_sent\": 0,\n", + " \"patient_age\": 51,\n", + " \"gender\": \"Male\",\n", + " \"health_condition_severity\": 3,\n", + " \"no_show_status\": true\n", + " },\n", + " {\n", + " \"appointment_id\": \"AID019\",\n", + " \"scheduled_date\": \"2023-11-23\",\n", + " \"appointment_date\": \"2023-12-09\",\n", + " \"lead_time\": 16,\n", + " \"sms_reminders_sent\": 1,\n", + " \"patient_age\": 55,\n", + " \"gender\": \"Female\",\n", + " \"health_condition_severity\": 4,\n", + " \"no_show_status\": false\n", + " },\n", + " {\n", + " \"appointment_id\": \"AID020\",\n", + " \"scheduled_date\": \"2023-11-22\",\n", + " \"appointment_date\": \"2023-12-08\",\n", + " \"lead_time\": 16,\n", + " \"sms_reminders_sent\": 2,\n", + " \"patient_age\": 23,\n", + " \"gender\": \"Other\",\n", + " \"health_condition_severity\": 5,\n", + " \"no_show_status\": true\n", + " },\n", + " {\n", + " \"appointment_id\": \"AID021\",\n", + " \"scheduled_date\": \"2023-11-24\",\n", + " \"appointment_date\": \"2023-12-10\",\n", + " \"lead_time\": 16,\n", + " \"sms_reminders_sent\": 3,\n", + " \"patient_age\": 47,\n", + " \"gender\": \"Male\",\n", + " \"health_condition_severity\": 2,\n", + " \"no_show_status\": false\n", + " },\n", + " {\n", + " \"appointment_id\": \"AID022\",\n", + " \"scheduled_date\": \"2023-11-25\",\n", + " \"appointment_date\": \"2023-12-12\",\n", + " \"lead_time\": 17,\n", + " \"sms_reminders_sent\": 1,\n", + " \"patient_age\": 33,\n", + " \"gender\": \"Female\",\n", + " \"health_condition_severity\": 1,\n", + " \"no_show_status\": true\n", + " },\n", + " {\n", + " \"appointment_id\": \"AID023\",\n", + " \"scheduled_date\": \"2023-11-27\",\n", + " \"appointment_date\": \"2023-12-14\",\n", + " \"lead_time\": 17,\n", + " \"sms_reminders_sent\": 0,\n", + " \"patient_age\": 42,\n", + " \"gender\": \"Male\",\n", + " \"health_condition_severity\": 3,\n", + " \"no_show_status\": false\n", + " },\n", + " {\n", + " \"appointment_id\": \"AID024\",\n", + " \"scheduled_date\": \"2023-11-29\",\n", + " \"appointment_date\": \"2023-12-15\",\n", + " \"lead_time\": 16,\n", + " \"sms_reminders_sent\": 2,\n", + " \"patient_age\": 64,\n", + " \"gender\": \"Other\",\n", + " \"health_condition_severity\": 4,\n", + " \"no_show_status\": true\n", + " },\n", + " {\n", + " \"appointment_id\": \"AID025\",\n", + " \"scheduled_date\": \"2023-12-01\",\n", + " \"appointment_date\": \"2023-12-20\",\n", + " \"lead_time\": 19,\n", + " \"sms_reminders_sent\": 3,\n", + " \"patient_age\": 26,\n", + " \"gender\": \"Female\",\n", + " \"health_condition_severity\": 5,\n", + " \"no_show_status\": false\n", + " },\n", + " {\n", + " \"appointment_id\": \"AID026\",\n", + " \"scheduled_date\": \"2023-12-03\",\n", + " \"appointment_date\": \"2023-12-22\",\n", + " \"lead_time\": 19,\n", + " \"sms_reminders_sent\": 1,\n", + " \"patient_age\": 31,\n", + " \"gender\": \"Male\",\n", + " \"health_condition_severity\": 2,\n", + " \"no_show_status\": true\n", + " },\n", + " {\n", + " \"appointment_id\": \"AID027\",\n", + " \"scheduled_date\": \"2023-12-05\",\n", + " \"appointment_date\": \"2023-12-24\",\n", + " \"lead_time\": 19,\n", + " \"sms_reminders_sent\": 2,\n", + " \"patient_age\": 50,\n", + " \"gender\": \"Female\",\n", + " \"health_condition_severity\": 1,\n", + " \"no_show_status\": false\n", + " },\n", + " {\n", + " \"appointment_id\": \"AID028\",\n", + " \"scheduled_date\": \"2023-12-06\",\n", + " \"appointment_date\": \"2023-12-25\",\n", + " \"lead_time\": 19,\n", + " \"sms_reminders_sent\": 0,\n", + " \"patient_age\": 39,\n", + " \"gender\": \"Other\",\n", + " \"health_condition_severity\": 3,\n", + " \"no_show_status\": true\n", + " },\n", + " {\n", + " \"appointment_id\": \"AID029\",\n", + " \"scheduled_date\": \"2023-12-07\",\n", + " \"appointment_date\": \"2023-12-27\",\n", + " \"lead_time\": 20,\n", + " \"sms_reminders_sent\": 3,\n", + " \"patient_age\": 71,\n", + " \"gender\": \"Male\",\n", + " \"health_condition_severity\": 4,\n", + " \"no_show_status\": false\n", + " },\n", + " {\n", + " \"appointment_id\": \"AID030\",\n", + " \"scheduled_date\": \"2023-12-08\",\n", + " \"appointment_date\": \"2023-12-28\",\n", + " \"lead_time\": 20,\n", + " \"sms_reminders_sent\": 1,\n", + " \"patient_age\": 44,\n", + " \"gender\": \"Female\",\n", + " \"health_condition_severity\": 5,\n", + " \"no_show_status\": true\n", + " },\n", + " {\n", + " \"appointment_id\": \"AID031\",\n", + " \"scheduled_date\": \"2023-12-10\",\n", + " \"appointment_date\": \"2023-12-31\",\n", + " \"lead_time\": 21,\n", + " \"sms_reminders_sent\": 2,\n", + " \"patient_age\": 38,\n", + " \"gender\": \"Male\",\n", + " \"health_condition_severity\": 2,\n", + " \"no_show_status\": false\n", + " },\n", + " {\n", + " \"appointment_id\": \"AID032\",\n", + " \"scheduled_date\": \"2023-12-11\",\n", + " \"appointment_date\": \"2024-01-02\",\n", + " \"lead_time\": 22,\n", + " \"sms_reminders_sent\": 0,\n", + " \"patient_age\": 53,\n", + " \"gender\": \"Female\",\n", + " \"health_condition_severity\": 1,\n", + " \"no_show_status\": true\n", + " },\n", + " {\n", + " \"appointment_id\": \"AID033\",\n", + " \"scheduled_date\": \"2023-12-13\",\n", + " \"appointment_date\": \"2024-01-04\",\n", + " \"lead_time\": 22,\n", + " \"sms_reminders_sent\": 1,\n", + " \"patient_age\": 27,\n", + " \"gender\": \"Other\",\n", + " \"health_condition_severity\": 3,\n", + " \"no_show_status\": false\n", + " },\n", + " {\n", + " \"appointment_id\": \"AID034\",\n", + " \"scheduled_date\": \"2023-12-15\",\n", + " \"appointment_date\": \"2024-01-06\",\n", + " \"lead_time\": 22,\n", + " \"sms_reminders_sent\": 3,\n", + " \"patient_age\": 46,\n", + " \"gender\": \"Male\",\n", + " \"health_condition_severity\": 4,\n", + " \"no_show_status\": true\n", + " },\n", + " {\n", + " \"appointment_id\": \"AID035\",\n", + " \"scheduled_date\": \"2023-12-17\",\n", + " \"appointment_date\": \"2024-01-09\",\n", + " \"lead_time\": 23,\n", + " \"sms_reminders_sent\": 2,\n", + " \"patient_age\": 68,\n", + " \"gender\": \"Female\",\n", + " \"health_condition_severity\": 5,\n", + " \"no_show_status\": false\n", + " },\n", + " {\n", + " \"appointment_id\": \"AID036\",\n", + " \"scheduled_date\": \"2023-12-19\",\n", + " \"appointment_date\": \"2024-01-10\",\n", + " \"lead_time\": 22,\n", + " \"sms_reminders_sent\": 0,\n", + " \"patient_age\": 37,\n", + " \"gender\": \"Male\",\n", + " \"health_condition_severity\": 2,\n", + " \"no_show_status\": true\n", + " },\n", + " {\n", + " \"appointment_id\": \"AID037\",\n", + " \"scheduled_date\": \"2023-12-20\",\n", + " \"appointment_date\": \"2024-01-12\",\n", + " \"lead_time\": 23,\n", + " \"sms_reminders_sent\": 1,\n", + " \"patient_age\": 57,\n", + " \"gender\": \"Female\",\n", + " \"health_condition_severity\": 1,\n", + " \"no_show_status\": false\n", + " },\n", + " {\n", + " \"appointment_id\": \"AID038\",\n", + " \"scheduled_date\": \"2023-12-22\",\n", + " \"appointment_date\": \"2024-01-14\",\n", + " \"lead_time\": 23,\n", + " \"sms_reminders_sent\": 3,\n", + " \"patient_age\": 43,\n", + " \"gender\": \"Other\",\n", + " \"health_condition_severity\": 3,\n", + " \"no_show_status\": true\n", + " },\n", + " {\n", + " \"appointment_id\": \"AID039\",\n", + " \"scheduled_date\": \"2023-12-23\",\n", + " \"appointment_date\": \"2024-01-16\",\n", + " \"lead_time\": 24,\n", + " \"sms_reminders_sent\": 2,\n", + " \"patient_age\": 65,\n", + " \"gender\": \"Male\",\n", + " \"health_condition_severity\": 4,\n", + " \"no_show_status\": false\n", + " },\n", + " {\n", + " \"appointment_id\": \"AID040\",\n", + " \"scheduled_date\": \"2023-12-25\",\n", + " \"appointment_date\": \"2024-01-17\",\n", + " \"lead_time\": 23,\n", + " \"sms_reminders_sent\": 0,\n", + " \"patient_age\": 49,\n", + " \"gender\": \"Female\",\n", + " \"health_condition_severity\": 5,\n", + " \"no_show_status\": true\n", + " },\n", + " {\n", + " \"appointment_id\": \"AID041\",\n", + " \"scheduled_date\": \"2023-12-27\",\n", + " \"appointment_date\": \"2024-01-20\",\n", + " \"lead_time\": 24,\n", + " \"sms_reminders_sent\": 1,\n", + " \"patient_age\": 30,\n", + " \"gender\": \"Male\",\n", + " \"health_condition_severity\": 2,\n", + " \"no_show_status\": false\n", + " },\n", + " {\n", + " \"appointment_id\": \"AID042\",\n", + " \"scheduled_date\": \"2023-12-29\",\n", + " \"appointment_date\": \"2024-01-22\",\n", + " \"lead_time\": 24,\n", + " \"sms_reminders_sent\": 3,\n", + " \"patient_age\": 24,\n", + " \"gender\": \"Female\",\n", + " \"health_condition_severity\": 1,\n", + " \"no_show_status\": true\n", + " },\n", + " {\n", + " \"appointment_id\": \"AID043\",\n", + " \"scheduled_date\": \"2024-01-01\",\n", + " \"appointment_date\": \"2024-01-25\",\n", + " \"lead_time\": 24,\n", + " \"sms_reminders_sent\": 2,\n", + " \"patient_age\": 72,\n", + " \"gender\": \"Other\",\n", + " \"health_condition_severity\": 3,\n", + " \"no_show_status\": false\n", + " },\n", + " {\n", + " \"appointment_id\": \"AID044\",\n", + " \"scheduled_date\": \"2024-01-03\",\n", + " \"appointment_date\": \"2024-01-27\",\n", + " \"lead_time\": 24,\n", + " \"sms_reminders_sent\": 0,\n", + " \"patient_age\": 35,\n", + " \"gender\": \"Male\",\n", + " \"health_condition_severity\": 4,\n", + " \"no_show_status\": true\n", + " },\n", + " {\n", + " \"appointment_id\": \"AID045\",\n", + " \"scheduled_date\": \"2024-01-04\",\n", + " \"appointment_date\": \"2024-01-28\",\n", + " \"lead_time\": 24,\n", + " \"sms_reminders_sent\": 1,\n", + " \"patient_age\": 61,\n", + " \"gender\": \"Female\",\n", + " \"health_condition_severity\": 5,\n", + " \"no_show_status\": false\n", + " },\n", + " {\n", + " \"appointment_id\": \"AID046\",\n", + " \"scheduled_date\": \"2024-01-05\",\n", + " \"appointment_date\": \"2024-01-30\",\n", + " \"lead_time\": 25,\n", + " \"sms_reminders_sent\": 3,\n", + " \"patient_age\": 68,\n", + " \"gender\": \"Male\",\n", + " \"health_condition_severity\": 2,\n", + " \"no_show_status\": true\n", + " },\n", + " {\n", + " \"appointment_id\": \"AID047\",\n", + " \"scheduled_date\": \"2024-01-07\",\n", + " \"appointment_date\": \"2024-02-01\",\n", + " \"lead_time\": 25,\n", + " \"sms_reminders_sent\": 2,\n", + " \"patient_age\": 22,\n", + " \"gender\": \"Female\",\n", + " \"health_condition_severity\": 1,\n", + " \"no_show_status\": false\n", + " },\n", + " {\n", + " \"appointment_id\": \"AID048\",\n", + " \"scheduled_date\": \"2024-01-08\",\n", + " \"appointment_date\": \"2024-02-03\",\n", + " \"lead_time\": 26,\n", + " \"sms_reminders_sent\": 0,\n", + " \"patient_age\": 52,\n", + " \"gender\": \"Other\",\n", + " \"health_condition_severity\": 3,\n", + " \"no_show_status\": true\n", + " },\n", + " {\n", + " \"appointment_id\": \"AID049\",\n", + " \"scheduled_date\": \"2024-01-10\",\n", + " \"appointment_date\": \"2024-02-04\",\n", + " \"lead_time\": 25,\n", + " \"sms_reminders_sent\": 1,\n", + " \"patient_age\": 73,\n", + " \"gender\": \"Male\",\n", + " \"health_condition_severity\": 4,\n", + " \"no_show_status\": false\n", + " },\n", + " {\n", + " \"appointment_id\": \"AID050\",\n", + " \"scheduled_date\": \"2024-01-12\",\n", + " \"appointment_date\": \"2024-02-06\",\n", + " \"lead_time\": 25,\n", + " \"sms_reminders_sent\": 3,\n", + " \"patient_age\": 56,\n", + " \"gender\": \"Female\",\n", + " \"health_condition_severity\": 5,\n", + " \"no_show_status\": true\n", + " },\n", + " {\n", + " \"appointment_id\": \"AID051\",\n", + " \"scheduled_date\": \"2024-01-15\",\n", + " \"appointment_date\": \"2024-02-07\",\n", + " \"lead_time\": 23,\n", + " \"sms_reminders_sent\": 1,\n", + " \"patient_age\": 62,\n", + " \"gender\": \"Male\",\n", + " \"health_condition_severity\": 2,\n", + " \"no_show_status\": false\n", + " },\n", + " {\n", + " \"appointment_id\": \"AID052\",\n", + " \"scheduled_date\": \"2024-01-17\",\n", + " \"appointment_date\": \"2024-02-10\",\n", + " \"lead_time\": 24,\n", + " \"sms_reminders_sent\": 0,\n", + " \"patient_age\": 80,\n", + " \"gender\": \"Female\",\n", + " \"health_condition_severity\": 1,\n", + " \"no_show_status\": true\n", + " },\n", + " {\n", + " \"appointment_id\": \"AID053\",\n", + " \"scheduled_date\": \"2024-01-19\",\n", + " \"appointment_date\": \"2024-02-12\",\n", + " \"lead_time\": 24,\n", + " \"sms_reminders_sent\": 2,\n", + " \"patient_age\": 29,\n", + " \"gender\": \"Other\",\n", + " \"health_condition_severity\": 3,\n", + " \"no_show_status\": false\n", + " },\n", + " {\n", + " \"appointment_id\": \"AID054\",\n", + " \"scheduled_date\": \"2024-01-21\",\n", + " \"appointment_date\": \"2024-02-13\",\n", + " \"lead_time\": 23,\n", + " \"sms_reminders_sent\": 1,\n", + " \"patient_age\": 66,\n", + " \"gender\": \"Male\",\n", + " \"health_condition_severity\": 4,\n", + " \"no_show_status\": true\n", + " },\n", + " {\n", + " \"appointment_id\": \"AID055\",\n", + " \"scheduled_date\": \"2024-01-23\",\n", + " \"appointment_date\": \"2024-02-15\",\n", + " \"lead_time\": 23,\n", + " \"sms_reminders_sent\": 3,\n", + " \"patient_age\": 77,\n", + " \"gender\": \"Female\",\n", + " \"health_condition_severity\": 5,\n", + " \"no_show_status\": false\n", + " },\n", + " {\n", + " \"appointment_id\": \"AID056\",\n", + " \"scheduled_date\": \"2024-01-25\",\n", + " \"appointment_date\": \"2024-02-17\",\n", + " \"lead_time\": 23,\n", + " \"sms_reminders_sent\": 2,\n", + " \"patient_age\": 54,\n", + " \"gender\": \"Male\",\n", + " \"health_condition_severity\": 2,\n", + " \"no_show_status\": true\n", + " },\n", + " {\n", + " \"appointment_id\": \"AID057\",\n", + " \"scheduled_date\": \"2024-01-28\",\n", + " \"appointment_date\": \"2024-02-19\",\n", + " \"lead_time\": 22,\n", + " \"sms_reminders_sent\": 0,\n", + " \"patient_age\": 28,\n", + " \"gender\": \"Female\",\n", + " \"health_condition_severity\": 1,\n", + " \"no_show_status\": false\n", + " },\n", + " {\n", + " \"appointment_id\": \"AID058\",\n", + " \"scheduled_date\": \"2024-01-30\",\n", + " \"appointment_date\": \"2024-02-22\",\n", + " \"lead_time\": 23,\n", + " \"sms_reminders_sent\": 1,\n", + " \"patient_age\": 45,\n", + " \"gender\": \"Other\",\n", + " \"health_condition_severity\": 3,\n", + " \"no_show_status\": true\n", + " },\n", + " {\n", + " \"appointment_id\": \"AID059\",\n", + " \"scheduled_date\": \"2024-02-01\",\n", + " \"appointment_date\": \"2024-02-24\",\n", + " \"lead_time\": 23,\n", + " \"sms_reminders_sent\": 3,\n", + " \"patient_age\": 69,\n", + " \"gender\": \"Male\",\n", + " \"health_condition_severity\": 4,\n", + " \"no_show_status\": false\n", + " },\n", + " {\n", + " \"appointment_id\": \"AID060\",\n", + " \"scheduled_date\": \"2024-02-02\",\n", + " \"appointment_date\": \"2024-02-26\",\n", + " \"lead_time\": 24,\n", + " \"sms_reminders_sent\": 2,\n", + " \"patient_age\": 51,\n", + " \"gender\": \"Female\",\n", + " \"health_condition_severity\": 5,\n", + " \"no_show_status\": true\n", + " },\n", + " {\n", + " \"appointment_id\": \"AID061\",\n", + " \"scheduled_date\": \"2024-02-04\",\n", + " \"appointment_date\": \"2024-02-27\",\n", + " \"lead_time\": 23,\n", + " \"sms_reminders_sent\": 1,\n", + " \"patient_age\": 33,\n", + " \"gender\": \"Male\",\n", + " \"health_condition_severity\": 2,\n", + " \"no_show_status\": false\n", + " },\n", + " {\n", + " \"appointment_id\": \"AID062\",\n", + " \"scheduled_date\": \"2024-02-06\",\n", + " \"appointment_date\": \"2024-03-01\",\n", + " \"lead_time\": 24,\n", + " \"sms_reminders_sent\": 0,\n", + " \"patient_age\": 84,\n", + " \"gender\": \"Female\",\n", + " \"health_condition_severity\": 1,\n", + " \"no_show_status\": true\n", + " },\n", + " {\n", + " \"appointment_id\": \"AID063\",\n", + " \"scheduled_date\": \"2024-02-09\",\n", + " \"appointment_date\": \"2024-03-04\",\n", + " \"lead_time\": 24,\n", + " \"sms_reminders_sent\": 3,\n", + " \"patient_age\": 47,\n", + " \"gender\": \"Male\",\n", + " \"health_condition_severity\": 3,\n", + " \"no_show_status\": false\n", + " },\n", + " {\n", + " \"appointment_id\": \"AID064\",\n", + " \"scheduled_date\": \"2024-02-10\",\n", + " \"appointment_date\": \"2024-03-06\",\n", + " \"lead_time\": 25,\n", + " \"sms_reminders_sent\": 2,\n", + " \"patient_age\": 59,\n", + " \"gender\": \"Female\",\n", + " \"health_condition_severity\": 4,\n", + " \"no_show_status\": true\n", + " },\n", + " {\n", + " \"appointment_id\": \"AID065\",\n", + " \"scheduled_date\": \"2024-02-12\",\n", + " \"appointment_date\": \"2024-03-08\",\n", + " \"lead_time\": 25,\n", + " \"sms_reminders_sent\": 0,\n", + " \"patient_age\": 20,\n", + " \"gender\": \"Other\",\n", + " \"health_condition_severity\": 5,\n", + " \"no_show_status\": false\n", + " },\n", + " {\n", + " \"appointment_id\": \"AID066\",\n", + " \"scheduled_date\": \"2024-02-14\",\n", + " \"appointment_date\": \"2024-03-10\",\n", + " \"lead_time\": 25,\n", + " \"sms_reminders_sent\": 1,\n", + " \"patient_age\": 48,\n", + " \"gender\": \"Male\",\n", + " \"health_condition_severity\": 2,\n", + " \"no_show_status\": true\n", + " },\n", + " {\n", + " \"appointment_id\": \"AID067\",\n", + " \"scheduled_date\": \"2024-02-17\",\n", + " \"appointment_date\": \"2024-03-12\",\n", + " \"lead_time\": 24,\n", + " \"sms_reminders_sent\": 2,\n", + " \"patient_age\": 38,\n", + " \"gender\": \"Female\",\n", + " \"health_condition_severity\": 1,\n", + " \"no_show_status\": false\n", + " },\n", + " {\n", + " \"appointment_id\": \"AID068\",\n", + " \"scheduled_date\": \"2024-02-19\",\n", + " \"appointment_date\": \"2024-03-14\",\n", + " \"lead_time\": 24,\n", + " \"sms_reminders_sent\": 3,\n", + " \"patient_age\": 76,\n", + " \"gender\": \"Male\",\n", + " \"health_condition_severity\": 3,\n", + " \"no_show_status\": true\n", + " },\n", + " {\n", + " \"appointment_id\": \"AID069\",\n", + " \"scheduled_date\": \"2024-02-21\",\n", + " \"appointment_date\": \"2024-03-15\",\n", + " \"lead_time\": 22,\n", + " \"sms_reminders_sent\": 0,\n", + " \"patient_age\": 34,\n", + " \"gender\": \"Female\",\n", + " \"health_condition_severity\": 4,\n", + " \"no_show_status\": false\n", + " },\n", + " {\n", + " \"appointment_id\": \"AID070\",\n", + " \"scheduled_date\": \"2024-02-23\",\n", + " \"appointment_date\": \"2024-03-17\",\n", + " \"lead_time\": 23,\n", + " \"sms_reminders_sent\": 1,\n", + " \"patient_age\": 26,\n", + " \"gender\": \"Male\",\n", + " \"health_condition_severity\": 5,\n", + " \"no_show_status\": true\n", + " },\n", + " {\n", + " \"appointment_id\": \"AID071\",\n", + " \"scheduled_date\": \"2024-02-25\",\n", + " \"appointment_date\": \"2024-03-19\",\n", + " \"lead_time\": 22,\n", + " \"sms_reminders_sent\": 2,\n", + " \"patient_age\": 22,\n", + " \"gender\": \"Female\",\n", + " \"health_condition_severity\": 2,\n", + " \"no_show_status\": false\n", + " },\n", + " {\n", + " \"appointment_id\": \"AID072\",\n", + " \"scheduled_date\": \"2024-02-27\",\n", + " \"appointment_date\": \"2024-03-20\",\n", + " \"lead_time\": 22,\n", + " \"sms_reminders_sent\": 0,\n", + " \"patient_age\": 58,\n", + " \"gender\": \"Other\",\n", + " \"health_condition_severity\": 1,\n", + " \"no_show_status\": true\n", + " },\n", + " {\n", + " \"appointment_id\": \"AID073\",\n", + " \"scheduled_date\": \"2024-02-29\",\n", + " \"appointment_date\": \"2024-03-22\",\n", + " \"lead_time\": 22,\n", + " \"sms_reminders_sent\": 3,\n", + " \"patient_age\": 67,\n", + " \"gender\": \"Male\",\n", + " \"health_condition_severity\": 3,\n", + " \"no_show_status\": false\n", + " },\n", + " {\n", + " \"appointment_id\": \"AID074\",\n", + " \"scheduled_date\": \"2024-03-02\",\n", + " \"appointment_date\": \"2024-03-24\",\n", + " \"lead_time\": 22,\n", + " \"sms_reminders_sent\": 2,\n", + " \"patient_age\": 32,\n", + " \"gender\": \"Female\",\n", + " \"health_condition_severity\": 4,\n", + " \"no_show_status\": true\n", + " },\n", + " {\n", + " \"appointment_id\": \"AID075\",\n", + " \"scheduled_date\": \"2024-03-04\",\n", + " \"appointment_date\": \"2024-03-26\",\n", + " \"lead_time\": 22,\n", + " \"sms_reminders_sent\": 1,\n", + " \"patient_age\": 46,\n", + " \"gender\": \"Male\",\n", + " \"health_condition_severity\": 5,\n", + " \"no_show_status\": false\n", + " }\n", + "]\n", + "medical_appointment.json\n", + "scenario: Create a dataset of credit card transactions for detecting fraud. Include transaction ID, amount, timestamp, merchant category, customer location, card presence (yes/no), transaction device type, and fraud label (yes/no).\n", + "model_type: claude\n", + "model_name: claude-3-7-sonnet-latest\n", + "scenario: Create a dataset of credit card transactions for detecting fraud. Include transaction ID, amount, timestamp, merchant category, customer location, card presence (yes/no), transaction device type, and fraud label (yes/no).\n", + "model_type: claude\n", + "model_name: claude-3-7-sonnet-latest\n", + "transaction_id,amount,timestamp,merchant_category,customer_location,card_presence,device_type,fraud_label\n", + "TX123456789,45.99,2023-11-01 08:23:15,Retail,New York,Yes,POS Terminal,No\n", + "TX123456790,899.50,2023-11-01 09:45:22,Electronics,Chicago,Yes,POS Terminal,No\n", + "TX123456791,12.35,2023-11-01 10:12:45,Food & Beverage,Los Angeles,No,Mobile,No\n", + "TX123456792,5423.80,2023-11-01 11:30:18,Jewelry,Miami,No,Web Browser,Yes\n", + "TX123456793,76.24,2023-11-01 14:22:56,Groceries,Denver,Yes,POS Terminal,No\n", + "TX123456794,149.99,2023-11-02 07:15:33,Clothing,Seattle,No,Mobile,No\n", + "TX123456795,2500.00,2023-11-02 08:45:12,Electronics,Toronto,No,Web Browser,Yes\n", + "TX123456796,35.50,2023-11-02 12:33:47,Food & Beverage,Boston,Yes,POS Terminal,No\n", + "TX123456797,10.99,2023-11-02 15:20:09,Entertainment,Philadelphia,No,Mobile,No\n", + "TX123456798,750.25,2023-11-02 16:45:18,Travel,San Francisco,No,Web Browser,No\n", + "TX123456799,65.40,2023-11-02 19:22:31,Retail,Austin,Yes,POS Terminal,No\n", + "TX123456800,3299.99,2023-11-03 05:45:22,Electronics,London,No,Web Browser,Yes\n", + "TX123456801,22.50,2023-11-03 08:12:40,Food & Beverage,Atlanta,Yes,POS Terminal,No\n", + "TX123456802,129.95,2023-11-03 10:33:27,Clothing,Chicago,No,Mobile,No\n", + "TX123456803,50.00,2023-11-03 12:15:39,Gas Station,Dallas,Yes,POS Terminal,No\n", + "TX123456804,1999.00,2023-11-03 14:30:45,Electronics,Singapore,No,Web Browser,No\n", + "TX123456805,8.75,2023-11-03 18:22:14,Food & Beverage,Montreal,No,Mobile,No\n", + "TX123456806,459.99,2023-11-04 09:15:33,Home Goods,Houston,Yes,POS Terminal,No\n", + "TX123456807,2750.00,2023-11-04 10:45:28,Travel,Paris,No,Web Browser,Yes\n", + "TX123456808,85.00,2023-11-04 11:33:52,Healthcare,New York,Yes,POS Terminal,No\n", + "TX123456809,17.25,2023-11-04 13:10:44,Food & Beverage,Los Angeles,No,Mobile,No\n", + "TX123456810,150.49,2023-11-04 15:22:18,Entertainment,Miami,No,Mobile,No\n", + "TX123456811,4500.00,2023-11-04 19:45:02,Jewelry,Dubai,No,Web Browser,Yes\n", + "TX123456812,27.99,2023-11-05 08:33:27,Groceries,Seattle,Yes,POS Terminal,No\n", + "TX123456813,1250.00,2023-11-05 10:15:42,Electronics,Tokyo,No,Web Browser,No\n", + "TX123456814,56.75,2023-11-05 12:20:35,Clothing,San Diego,No,Mobile,No\n", + "TX123456815,18.50,2023-11-05 14:30:19,Food & Beverage,Denver,Yes,POS Terminal,No\n", + "TX123456816,3750.25,2023-11-05 16:45:08,Travel,Sydney,No,Web Browser,Yes\n", + "TX123456817,95.00,2023-11-05 18:22:56,Healthcare,Boston,No,Mobile,No\n", + "TX123456818,2345.67,2023-11-05 20:15:33,Electronics,Berlin,No,Web Browser,Yes\n", + "fraud_transactions.csv\n", + "scenario: Generate a dataset of investment customers with fields like portfolio value, age, income bracket, risk appetite (low/medium/high), number of transactions per month, preferred investment types, and risk score.\n", + "model_type: gemini\n", + "model_name: gemini-1.5-pro\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "WARNING:tornado.access:429 POST /v1beta/models/gemini-1.5-pro:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 409.67ms\n", + "Traceback (most recent call last):\n", + " File \"/usr/local/lib/python3.11/dist-packages/gradio/queueing.py\", line 625, in process_events\n", + " response = await route_utils.call_process_api(\n", + " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", + " File \"/usr/local/lib/python3.11/dist-packages/gradio/route_utils.py\", line 322, in call_process_api\n", + " output = await app.get_blocks().process_api(\n", + " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", + " File \"/usr/local/lib/python3.11/dist-packages/gradio/blocks.py\", line 2181, in process_api\n", + " result = await self.call_function(\n", + " ^^^^^^^^^^^^^^^^^^^^^^^^^\n", + " File \"/usr/local/lib/python3.11/dist-packages/gradio/blocks.py\", line 1692, in call_function\n", + " prediction = await anyio.to_thread.run_sync( # type: ignore\n", + " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", + " File \"/usr/local/lib/python3.11/dist-packages/anyio/to_thread.py\", line 56, in run_sync\n", + " return await get_async_backend().run_sync_in_worker_thread(\n", + " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", + " File \"/usr/local/lib/python3.11/dist-packages/anyio/_backends/_asyncio.py\", line 2470, in run_sync_in_worker_thread\n", + " return await future\n", + " ^^^^^^^^^^^^\n", + " File \"/usr/local/lib/python3.11/dist-packages/anyio/_backends/_asyncio.py\", line 967, in run\n", + " result = context.run(func, *args)\n", + " ^^^^^^^^^^^^^^^^^^^^^^^^\n", + " File \"/usr/local/lib/python3.11/dist-packages/gradio/utils.py\", line 889, in wrapper\n", + " response = f(*args, **kwargs)\n", + " ^^^^^^^^^^^^^^^^^^\n", + " File \"\", line 62, in run_inference\n", + " response = get_gemini_response(prompt=user_prompt, model_name=model_name, output_tokens=output_tokens)\n", + " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", + " File \"\", line 30, in get_gemini_response\n", + " response = model.generate_content(prompt, generation_config={\n", + " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", + " File \"/usr/local/lib/python3.11/dist-packages/google/generativeai/generative_models.py\", line 331, in generate_content\n", + " response = self._client.generate_content(\n", + " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", + " File \"/usr/local/lib/python3.11/dist-packages/google/ai/generativelanguage_v1beta/services/generative_service/client.py\", line 835, in generate_content\n", + " response = rpc(\n", + " ^^^^\n", + " File \"/usr/local/lib/python3.11/dist-packages/google/api_core/gapic_v1/method.py\", line 131, in __call__\n", + " return wrapped_func(*args, **kwargs)\n", + " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", + " File \"/usr/local/lib/python3.11/dist-packages/google/api_core/retry/retry_unary.py\", line 293, in retry_wrapped_func\n", + " return retry_target(\n", + " ^^^^^^^^^^^^^\n", + " File \"/usr/local/lib/python3.11/dist-packages/google/api_core/retry/retry_unary.py\", line 153, in retry_target\n", + " _retry_error_helper(\n", + " File \"/usr/local/lib/python3.11/dist-packages/google/api_core/retry/retry_base.py\", line 212, in _retry_error_helper\n", + " raise final_exc from source_exc\n", + " File \"/usr/local/lib/python3.11/dist-packages/google/api_core/retry/retry_unary.py\", line 144, in retry_target\n", + " result = target()\n", + " ^^^^^^^^\n", + " File \"/usr/local/lib/python3.11/dist-packages/google/api_core/timeout.py\", line 130, in func_with_timeout\n", + " return func(*args, **kwargs)\n", + " ^^^^^^^^^^^^^^^^^^^^^\n", + " File \"/usr/local/lib/python3.11/dist-packages/google/api_core/grpc_helpers.py\", line 76, in error_remapped_callable\n", + " return callable_(*args, **kwargs)\n", + " ^^^^^^^^^^^^^^^^^^^^^^^^^^\n", + " File \"/usr/local/lib/python3.11/dist-packages/google/ai/generativelanguage_v1beta/services/generative_service/transports/rest.py\", line 1161, in __call__\n", + " raise core_exceptions.from_http_response(response)\n", + "google.api_core.exceptions.TooManyRequests: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-pro:generateContent?%24alt=json%3Benum-encoding%3Dint: You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits.\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "scenario: Generate a dataset of investment customers with fields like portfolio value, age, income bracket, risk appetite (low/medium/high), number of transactions per month, preferred investment types, and risk score.\n", + "model_type: gemini\n", + "model_name: gemini-2.0-flash\n", + "CustomerID,PortfolioValue,Age,IncomeBracket,RiskAppetite,TransactionsPerMonth,PreferredInvestmentType,RiskScore\n", + "1,75000.00,32,Medium,High,8,\"Stocks, Options\",78\n", + "2,120000.50,45,High,Medium,3,\"Bonds, Mutual Funds\",55\n", + "3,30000.75,28,Low,Low,1,\"Bonds\",25\n", + "4,250000.00,58,High,High,12,\"Stocks, Real Estate\",85\n", + "5,80000.25,39,Medium,Medium,5,\"Mutual Funds\",60\n", + "6,150000.00,48,High,Low,2,\"Bonds, ETFs\",40\n", + "7,45000.50,25,Low,Medium,4,\"Stocks\",50\n", + "8,300000.75,62,High,High,15,\"Stocks, Options, Real Estate\",92\n", + "9,90000.00,35,Medium,Medium,6,\"ETFs, Mutual Funds\",65\n", + "10,180000.25,50,High,Low,1,\"Bonds\",35\n", + "11,60000.50,29,Low,Low,2,\"Bonds, ETFs\",30\n", + "12,400000.00,65,High,High,18,\"Stocks, Options, Cryptocurrency\",95\n", + "13,100000.75,42,Medium,Medium,7,\"Mutual Funds, Real Estate\",70\n", + "14,200000.00,55,High,Low,0,\"Bonds, Annuities\",20\n", + "15,70000.25,31,Low,Medium,3,\"Stocks, ETFs\",58\n", + "16,130000.50,47,High,Medium,4,\"Bonds, Mutual Funds\",52\n", + "17,35000.75,27,Low,Low,1,\"Bonds\",28\n", + "18,280000.00,60,High,High,14,\"Stocks, Real Estate\",88\n", + "19,85000.25,37,Medium,Medium,5,\"ETFs\",63\n", + "20,160000.00,52,High,Low,2,\"Bonds, CDs\",38\n", + "21,50000.50,26,Low,Low,1,\"Bonds, Government Securities\",22\n", + "22,450000.75,68,High,High,20,\"Stocks, Options, Venture Capital\",97\n", + "23,110000.00,44,Medium,Medium,8,\"Mutual Funds, ETFs\",73\n", + "24,220000.25,57,High,Low,0,\"Bonds, Treasury Bills\",18\n", + "25,72000.50,33,Low,Medium,4,\"Stocks\",56\n", + "26,140000.00,49,High,Medium,3,\"Bonds, Mutual Funds\",54\n", + "27,32000.75,29,Low,Low,1,\"Bonds\",26\n", + "28,260000.00,61,High,High,13,\"Stocks, Real Estate\",86\n", + "29,82000.25,38,Medium,Medium,6,\"ETFs, Index Funds\",61\n", + "30,170000.50,53,High,Low,2,\"Bonds\",36\n", + "31,55000.75,24,Low,Low,2,\"Bonds, Money Market Accounts\",24\n", + "32,350000.00,64,High,High,17,\"Stocks, Options, Commodities\",93\n", + "33,95000.25,41,Medium,Medium,7,\"Mutual Funds, REITs\",68\n", + "34,190000.50,56,High,Low,0,\"Bonds, Fixed Income\",19\n", + "35,65000.00,30,Low,Medium,3,\"Stocks, Small Cap Stocks\",59\n", + "36,125000.75,46,High,Medium,4,\"Bonds, Large Cap Funds\",51\n", + "37,33000.25,28,Low,Low,1,\"Bonds\",27\n", + "38,270000.50,59,High,High,14,\"Stocks, Emerging Markets\",87\n", + "39,88000.00,36,Medium,Medium,5,\"ETFs, Balanced Funds\",64\n", + "40,155000.75,51,High,Low,2,\"Bonds, Corporate Bonds\",37\n", + "41,48000.25,25,Low,Low,1,\"Bonds, Municipal Bonds\",21\n", + "42,420000.00,67,High,High,19,\"Stocks, Options, Derivatives\",96\n", + "43,105000.75,43,Medium,Medium,8,\"Mutual Funds, Sector Funds\",71\n", + "44,210000.00,54,High,Low,0,\"Bonds, Government Bonds\",17\n", + "45,71000.25,32,Low,Medium,4,\"Stocks\",57\n", + "46,135000.50,48,High,Medium,3,\"Bonds, Index Funds\",53\n", + "47,34000.75,27,Low,Low,1,\"Bonds\",29\n", + "48,290000.00,63,High,High,16,\"Stocks, Real Estate, Private Equity\",90\n", + "49,89000.25,40,Medium,Medium,6,\"ETFs\",62\n", + "50,175000.50,50,High,Low,2,\"Bonds, Preferred Stocks\",39\n", + "investment_customers.csv\n", + "scenario: Generate a dataset for predicting customer churn in a subscription-based telecom company. Include features like monthly charges, contract type, tenure (in months), number of support calls, internet usage (in GB), payment method, and whether the customer has churned.\n", + "model_type: gemini\n", + "model_name: gemini-2.0-flash\n", + "scenario: Generate a dataset for predicting customer churn in a subscription-based telecom company. Include features like monthly charges, contract type, tenure (in months), number of support calls, internet usage (in GB), payment method, and whether the customer has churned.\n", + "model_type: gemini\n", + "model_name: gemini-2.0-flash\n", + "\n", + "testinggemini.json\n", + "scenario: Generate a dataset for predicting customer churn in a subscription-based telecom company. Include features like monthly charges, contract type, tenure (in months), number of support calls, internet usage (in GB), payment method, and whether the customer has churned.\n", + "model_type: gemini\n", + "model_name: gemini-2.0-flash\n", + "CustomerID,MonthlyCharges,ContractType,Tenure,SupportCalls,InternetUsage,PaymentMethod,Churned\n", + "TEL2847592374,67.55,Month-to-Month,9,3,145.2,Electronic Check,Yes\n", + "TEL9283746510,92.30,One Year,48,1,87.9,Credit Card,No\n", + "TEL1837465921,25.00,Month-to-Month,2,0,25.6,Mailed Check,Yes\n", + "TEL7364582910,115.75,Two Year,65,2,203.4,Bank Transfer,No\n", + "TEL5928374615,48.20,Month-to-Month,15,4,98.7,Electronic Check,Yes\n", + "TEL3847592016,78.90,One Year,36,1,167.1,Credit Card,No\n", + "TEL8273645910,31.50,Month-to-Month,3,0,30.2,Mailed Check,Yes\n", + "TEL6354789210,102.40,Two Year,70,3,185.9,Bank Transfer,No\n", + "TEL4738291056,55.85,Month-to-Month,11,2,112.5,Electronic Check,Yes\n", + "TEL1928374650,85.60,One Year,42,1,76.3,Credit Card,No\n", + "TEL7463529108,28.75,Month-to-Month,5,0,28.9,Mailed Check,Yes\n", + "TEL5293847610,110.30,Two Year,68,2,192.7,Bank Transfer,No\n", + "TEL3647582910,62.10,Month-to-Month,13,3,134.8,Electronic Check,Yes\n", + "TEL9182736450,98.45,One Year,39,1,91.5,Credit Card,No\n", + "TEL2736458109,34.90,Month-to-Month,7,0,33.6,Mailed Check,Yes\n", + "TEL8547392016,107.60,Two Year,62,2,179.3,Bank Transfer,No\n", + "TEL6192837450,59.35,Month-to-Month,10,3,123.4,Electronic Check,Yes\n", + "TEL4928374651,82.90,One Year,45,1,82.1,Credit Card,No\n", + "TEL1635294810,22.50,Month-to-Month,4,0,22.3,Mailed Check,Yes\n", + "TEL7283746509,118.20,Two Year,71,2,210.5,Bank Transfer,No\n", + "TEL5829374610,69.70,Month-to-Month,12,3,156.9,Electronic Check,Yes\n", + "TEL3918273640,95.15,One Year,40,1,89.7,Credit Card,No\n", + "TEL9374628105,37.40,Month-to-Month,6,0,36.2,Mailed Check,Yes\n", + "TEL6458293710,104.90,Two Year,67,2,188.1,Bank Transfer,No\n", + "TEL4829374615,57.10,Month-to-Month,14,3,118.2,Electronic Check,Yes\n", + "TEL1536472910,80.55,One Year,43,1,78.9,Credit Card,No\n", + "TEL7192837465,25.30,Month-to-Month,2,0,25.9,Mailed Check,Yes\n", + "TEL5374829106,112.90,Two Year,69,2,195.3,Bank Transfer,No\n", + "TEL3746582910,64.85,Month-to-Month,8,3,140.6,Electronic Check,Yes\n", + "TEL9263548107,90.20,One Year,46,1,85.5,Credit Card,No\n", + "TEL2635478109,32.65,Month-to-Month,4,0,31.4,Mailed Check,Yes\n", + "TEL8473920165,109.70,Two Year,63,2,182.5,Bank Transfer,No\n", + "TEL6283749105,54.50,Month-to-Month,16,3,110.1,Electronic Check,Yes\n", + "TEL4192837460,77.30,One Year,41,1,75.2,Credit Card,No\n", + "TEL1746352910,29.90,Month-to-Month,5,0,29.6,Mailed Check,Yes\n", + "TEL7382910564,117.10,Two Year,72,2,207.9,Bank Transfer,No\n", + "TEL5928374610,72.00,Month-to-Month,13,3,159.7,Electronic Check,Yes\n", + "TEL3847592016,97.85,One Year,38,1,93.2,Credit Card,No\n", + "TEL9182736450,39.55,Month-to-Month,7,0,38.3,Mailed Check,Yes\n", + "TEL6354789210,106.30,Two Year,66,2,190.8,Bank Transfer,No\n", + "TEL4738291056,51.75,Month-to-Month,11,3,105.9,Electronic Check,Yes\n", + "TEL1928374650,74.60,One Year,44,1,73.1,Credit Card,No\n", + "TEL7463529108,27.10,Month-to-Month,3,0,26.7,Mailed Check,Yes\n", + "TEL5293847610,114.50,Two Year,70,2,198.6,Bank Transfer,No\n", + "TEL3647582910,66.45,Month-to-Month,12,3,138.5,Electronic Check,Yes\n", + "TEL9182736450,93.50,One Year,47,1,84.2,Credit Card,No\n", + "TEL2736458109,35.15,Month-to-Month,6,0,34.9,Mailed Check,Yes\n", + "TEL8547392016,103.80,Two Year,64,2,176.1,Bank Transfer,No\n", + "TEL6192837450,58.20,Month-to-Month,14,3,120.7,Electronic Check,Yes\n", + "TEL4928374651,81.65,One Year,41,1,80.5,Credit Card,No\n", + "TEL1635294810,23.70,Month-to-Month,5,0,23.4,Mailed Check,Yes\n", + "TEL7283746509,119.90,Two Year,68,2,213.2,Bank Transfer,No\n", + "TEL5829374610,70.85,Month-to-Month,9,3,153.7,Electronic Check,Yes\n", + "TEL3918273640,96.20,One Year,45,1,92.4,Credit Card,No\n", + "TEL9374628105,36.80,Month-to-Month,7,0,35.6,Mailed Check,Yes\n", + "TEL6458293710,105.50,Two Year,69,2,185.4,Bank Transfer,No\n", + "TEL4829374615,56.30,Month-to-Month,15,3,115.1,Electronic Check,Yes\n", + "TEL1536472910,79.40,One Year,42,1,77.8,Credit Card,No\n", + "TEL7192837465,24.50,Month-to-Month,4,0,24.2,Mailed Check,Yes\n", + "TEL5374829106,111.80,Two Year,67,2,193.9,Bank Transfer,No\n", + "TEL3746582910,63.70,Month-to-Month,10,3,137.4,Electronic Check,Yes\n", + "TEL9263548107,89.10,One Year,40,1,83.9,Credit Card,No\n", + "TEL2635478109,33.85,Month-to-Month,6,0,32.5,Mailed Check,Yes\n", + "TEL8473920165,108.60,Two Year,65,2,179.9,Bank Transfer,No\n", + "TEL6283749105,53.40,Month-to-Month,11,3,107.8,Electronic Check,Yes\n", + "TEL4192837460,76.20,One Year,43,1,74.1,Credit Card,No\n", + "TEL1746352910,30.50,Month-to-Month,5,0,30.2,Mailed Check,Yes\n", + "TEL7382910564,116.00,Two Year,71,2,205.3,Bank Transfer,No\n", + "TEL5928374610,71.15,Month-to-Month,16,3,157.6,Electronic Check,Yes\n", + "TEL3847592016,97.00,One Year,39,1,90.9,Credit Card,No\n", + "TEL9182736450,38.70,Month-to-Month,3,0,37.4,Mailed Check,Yes\n", + "TEL6354789210,105.20,Two Year,68,2,188.7,Bank Transfer,No\n", + "TEL4738291056,52.55,Month-to-Month,14,3,104.2,Electronic Check,Yes\n", + "TEL1928374650,75.40,One Year,46,1,72.4,Credit Card,No\n", + "TEL7463529108,26.30,Month-to-Month,2,0,26.0,Mailed Check,Yes\n", + "TEL5293847610,113.70,Two Year,66,2,196.8,Bank Transfer,No\n", + "TEL3647582910,65.60,Month-to-Month,15,3,139.1,Electronic Check,Yes\n", + "TEL9182736450,94.35,One Year,42,1,86.8,Credit Card,No\n", + "TEL2736458109,34.30,Month-to-Month,4,0,34.0,Mailed Check,Yes\n", + "TEL8547392016,102.70,Two Year,63,2,173.5,Bank Transfer,No\n", + "TEL6192837450,59.90,Month-to-Month,13,3,121.3,Electronic Check,Yes\n", + "TEL4928374651,82.20,One Year,47,1,79.2,Credit Card,No\n", + "TEL1635294810,23.10,Month-to-Month,6,0,22.8,Mailed Check,Yes\n", + "TEL7283746509,119.30,Two Year,69,2,211.6,Bank Transfer,No\n", + "TEL5829374610,71.40,Month-to-Month,10,3,154.3,Electronic Check,Yes\n", + "TEL3918273640,96.70,One Year,44,1,91.7,Credit Card,No\n", + "TEL9374628105,37.10,Month-to-Month,5,0,36.8,Mailed Check,Yes\n", + "TEL6458293710,106.00,Two Year,70,2,186.1,Bank Transfer,No\n", + "TEL4829374615,55.70,Month-to-Month,12,3,112.0,Electronic Check,Yes\n", + "TEL1536472910,78.80,One Year,41,1,76.5,Credit Card,No\n", + "TEL7192837465,25.00,Month-to-Month,7,0,24.7,Mailed Check,Yes\n", + "TEL5374829106,111.20,Two Year,64,2,191.3,Bank Transfer,No\n", + "TEL3746582910,64.20,Month-to-Month,14,3,136.1,Electronic Check,Yes\n", + "TEL9263548107,90.80,One Year,43,1,82.6,Credit Card,No\n", + "TEL2635478109,33.20,Month-to-Month,5,0,31.9,Mailed Check,Yes\n", + "TEL8473920165,109.10,Two Year,67,2,177.4,Bank Transfer,No\n", + "TEL6283749105,54.00,Month-to-Month,16,3,109.4,Electronic Check,Yes\n", + "TEL4192837460,75.60,One Year,40,1,73.4,Credit Card,No\n", + "TEL1746352910,31.10,Month-to-Month,3,0,30.8,Mailed Check,Yes\n", + "TEL7382910564,115.40,Two Year,65,2,202.7,Bank Transfer,No\n", + "testinggemini.txt\n", + "Keyboard interruption in main thread... closing server.\n", + "Killing tunnel 127.0.0.1:7860 <> https://d076a9fef9034a4f24.gradio.live\n" + ] + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [] + }, + "metadata": {}, + "execution_count": 11 + } + ], + "source": [ + "# Example Scenarios\n", + "\n", + "# Generate a dataset for predicting customer churn in a subscription-based telecom company. Include features like monthly charges, contract type, tenure (in months), number of support calls, internet usage (in GB), payment method, and whether the customer has churned.\n", + "# Generate a dataset for training a model to approve/reject loan applications. Include features like loan amount, applicant income, co-applicant income, employment type, credit history (binary), loan term, number of dependents, education level, and loan approval status.\n", + "# Create a dataset of credit card transactions for detecting fraud. Include transaction ID, amount, timestamp, merchant category, customer location, card presence (yes/no), transaction device type, and fraud label (yes/no).\n", + "# Generate a dataset of investment customers with fields like portfolio value, age, income bracket, risk appetite (low/medium/high), number of transactions per month, preferred investment types, and risk score.\n", + "# Create a dataset of hospitalized patients to predict readmission within 30 days. Include patient ID, age, gender, number of prior admissions, diagnosis codes, length of stay, discharge type, medications prescribed, and readmission label.\n", + "# Generate a dataset for predicting medical appointment no-shows. Include appointment ID, scheduled date, appointment date, lead time (days between scheduling and appointment), SMS reminders sent, patient age, gender, health condition severity, and no-show status.\n", + "\n", + "generator_ui.launch(share=True, debug=True, inbrowser=True)" + ] + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "id": "_9HIC_AzfZBZ" + }, + "execution_count": null, + "outputs": [] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "gpuType": "T4", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file