Modify readme and app
This commit is contained in:
@@ -1,6 +1,8 @@
|
||||
# Synthetic Data Generator
|
||||
**NOTE:** This is a copy of the repository https://github.com/Jsrodrigue/synthetic-data-creator.
|
||||
|
||||
# Synthetic Data Generator
|
||||
|
||||
An intelligent synthetic data generator that uses OpenAI models to create realistic tabular datasets based on reference data. This project includes an intuitive web interface built with Gradio.
|
||||
|
||||
> **🎓 Educational Project**: This project was inspired by the highly regarded LLM Engineering course on Udemy: [LLM Engineering: Master AI and Large Language Models](https://www.udemy.com/course/llm-engineering-master-ai-and-large-language-models/learn/lecture/52941433#questions/23828099). It demonstrates practical applications of LLM engineering principles, prompt engineering, and synthetic data generation techniques.
|
||||
@@ -61,7 +63,7 @@ An intelligent synthetic data generator that uses OpenAI models to create realis
|
||||
- Python 3.12+
|
||||
- OpenAI account with API key
|
||||
|
||||
### Installation with pip
|
||||
### Option 1: Using pip
|
||||
```bash
|
||||
# Create virtual environment
|
||||
python -m venv venv
|
||||
@@ -71,11 +73,11 @@ source venv/bin/activate # On Windows: venv\Scripts\activate
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
### Installation with uv
|
||||
### Option 2: Using uv
|
||||
```bash
|
||||
# Clone the repository
|
||||
git clone https://github.com/Jsrodrigue/synthetic-data-creator.git
|
||||
cd synthetic_data
|
||||
cd synthetic-data-creator
|
||||
|
||||
# Install dependencies
|
||||
uv sync
|
||||
@@ -100,8 +102,15 @@ OPENAI_API_KEY=your_api_key_here
|
||||
## 🎯 Usage
|
||||
|
||||
### Start the application
|
||||
|
||||
You can run the app either with **Python** or with **uv** (recommended if you installed dependencies using `uv sync`):
|
||||
|
||||
```bash
|
||||
# Option 1: using Python
|
||||
python app.py
|
||||
|
||||
# Option 2: using uv (no need to activate venv manually)
|
||||
uv run app.py
|
||||
```
|
||||
|
||||
The script will print a local URL (e.g., http://localhost:7860) — open that link in your browser.
|
||||
|
||||
@@ -1,131 +1,156 @@
|
||||
import gradio as gr
|
||||
import os
|
||||
import atexit
|
||||
from src.IO_utils import cleanup_temp_files
|
||||
from src.data_generation import generate_and_evaluate_data
|
||||
from src.plot_utils import display_reference_csv
|
||||
from dotenv import load_dotenv
|
||||
import os
|
||||
|
||||
import gradio as gr
|
||||
import openai
|
||||
from dotenv import load_dotenv
|
||||
|
||||
from src.constants import PROJECT_TEMP_DIR, SYSTEM_PROMPT, USER_PROMPT
|
||||
from src.data_generation import generate_and_evaluate_data
|
||||
from src.IO_utils import cleanup_temp_files
|
||||
from src.plot_utils import display_reference_csv
|
||||
|
||||
# ==========================================================
|
||||
# Setup
|
||||
# ==========================================================
|
||||
|
||||
#Load the api key
|
||||
load_dotenv()
|
||||
openai.api_key = os.getenv("OPENAI_API_KEY")
|
||||
def main():
|
||||
# ==========================================================
|
||||
# Setup
|
||||
# ==========================================================
|
||||
|
||||
# Temporary folder for images
|
||||
os.makedirs(PROJECT_TEMP_DIR, exist_ok=True)
|
||||
# Load the api key
|
||||
load_dotenv()
|
||||
openai.api_key = os.getenv("OPENAI_API_KEY")
|
||||
|
||||
# Ensure temporary plot images are deleted when the program exits
|
||||
atexit.register(lambda: cleanup_temp_files(PROJECT_TEMP_DIR))
|
||||
# Temporary folder for images
|
||||
os.makedirs(PROJECT_TEMP_DIR, exist_ok=True)
|
||||
|
||||
# ==========================================================
|
||||
# Gradio App
|
||||
# ==========================================================
|
||||
with gr.Blocks() as demo:
|
||||
# Ensure temporary plot images are deleted when the program exits
|
||||
atexit.register(lambda: cleanup_temp_files(PROJECT_TEMP_DIR))
|
||||
|
||||
# Store temp folder in state
|
||||
temp_dir_state = gr.State(value=PROJECT_TEMP_DIR)
|
||||
# ==========================================================
|
||||
# Gradio App
|
||||
# ==========================================================
|
||||
with gr.Blocks() as demo:
|
||||
|
||||
gr.Markdown("# 🧠 Synthetic Data Generator (with OpenAI)")
|
||||
# Store temp folder in state
|
||||
temp_dir_state = gr.State(value=PROJECT_TEMP_DIR)
|
||||
|
||||
# ======================================================
|
||||
# Tabs for organized sections
|
||||
# ======================================================
|
||||
with gr.Tabs():
|
||||
gr.Markdown("# 🧠 Synthetic Data Generator (with OpenAI)")
|
||||
|
||||
# ------------------------------
|
||||
# Tab 1: Input
|
||||
# ------------------------------
|
||||
with gr.Tab("Input"):
|
||||
# ======================================================
|
||||
# Tabs for organized sections
|
||||
# ======================================================
|
||||
with gr.Tabs():
|
||||
|
||||
# System prompt in collapsible
|
||||
with gr.Accordion("System Prompt (click to expand)", open=False):
|
||||
system_prompt_input = gr.Textbox(
|
||||
label="System Prompt",
|
||||
value=SYSTEM_PROMPT,
|
||||
lines=20
|
||||
# ------------------------------
|
||||
# Tab 1: Input
|
||||
# ------------------------------
|
||||
with gr.Tab("Input"):
|
||||
|
||||
# System prompt in collapsible
|
||||
with gr.Accordion("System Prompt (click to expand)", open=False):
|
||||
system_prompt_input = gr.Textbox(
|
||||
label="System Prompt", value=SYSTEM_PROMPT, lines=20
|
||||
)
|
||||
|
||||
# User prompt box
|
||||
user_prompt_input = gr.Textbox(
|
||||
label="User Prompt", value=USER_PROMPT, lines=5
|
||||
)
|
||||
|
||||
# User prompt box
|
||||
user_prompt_input = gr.Textbox(label="User Prompt", value=USER_PROMPT, lines=5)
|
||||
# Model selection
|
||||
model_select = gr.Dropdown(
|
||||
label="OpenAI Model",
|
||||
choices=["gpt-4o-mini", "gpt-4.1-mini"],
|
||||
value="gpt-4o-mini",
|
||||
)
|
||||
|
||||
# Model selection
|
||||
model_select = gr.Dropdown(
|
||||
label="OpenAI Model",
|
||||
choices=["gpt-4o-mini", "gpt-4.1-mini"],
|
||||
value="gpt-4o-mini"
|
||||
)
|
||||
# Reference CSV upload
|
||||
reference_input = gr.File(
|
||||
label="Reference CSV (optional)", file_types=[".csv"]
|
||||
)
|
||||
|
||||
# Reference CSV upload
|
||||
reference_input = gr.File(label="Reference CSV (optional)", file_types=[".csv"])
|
||||
# Examples
|
||||
gr.Examples(
|
||||
examples=[
|
||||
"data/sentiment_reference.csv",
|
||||
"data/people_reference.csv",
|
||||
"data/wine_reference.csv",
|
||||
],
|
||||
inputs=reference_input,
|
||||
)
|
||||
|
||||
# Examples
|
||||
gr.Examples(
|
||||
examples=["data/sentiment_reference.csv","data/people_reference.csv","data/wine_reference.csv"],
|
||||
inputs=reference_input
|
||||
)
|
||||
# Generate button
|
||||
generate_btn = gr.Button("🚀 Generate Data")
|
||||
|
||||
# Generate button
|
||||
generate_btn = gr.Button("🚀 Generate Data")
|
||||
# Download button
|
||||
download_csv = gr.File(label="Download CSV")
|
||||
|
||||
# Download button
|
||||
download_csv = gr.File(label="Download CSV")
|
||||
# ------------------------------
|
||||
# Tab 2: Reference Table
|
||||
# ------------------------------
|
||||
with gr.Tab("Reference Table"):
|
||||
reference_display = gr.DataFrame(label="Reference CSV Preview")
|
||||
|
||||
# ------------------------------
|
||||
# Tab 2: Reference Table
|
||||
# ------------------------------
|
||||
with gr.Tab("Reference Table"):
|
||||
reference_display = gr.DataFrame(label="Reference CSV Preview")
|
||||
# ------------------------------
|
||||
# Tab 3: Generated Table
|
||||
# ------------------------------
|
||||
with gr.Tab("Generated Table"):
|
||||
output_df = gr.DataFrame(label="Generated Data")
|
||||
|
||||
# ------------------------------
|
||||
# Tab 3: Generated Table
|
||||
# ------------------------------
|
||||
with gr.Tab("Generated Table"):
|
||||
output_df = gr.DataFrame(label="Generated Data")
|
||||
|
||||
# ------------------------------
|
||||
# Tab 4: Evaluation
|
||||
# ------------------------------
|
||||
with gr.Tab("Comparison"):
|
||||
with gr.Accordion("Evaluation Results (click to expand)", open=True):
|
||||
evaluation_df = gr.DataFrame(label="Evaluation Results")
|
||||
|
||||
# ------------------------------
|
||||
# Tab 4: Evaluation
|
||||
# ------------------------------
|
||||
with gr.Tab("Comparison"):
|
||||
with gr.Accordion("Evaluation Results (click to expand)", open=True):
|
||||
evaluation_df = gr.DataFrame(label="Evaluation Results")
|
||||
# ------------------------------
|
||||
# Tab 5: Visualizations
|
||||
# ------------------------------
|
||||
|
||||
# ------------------------------
|
||||
# Tab 5: Visualizations
|
||||
# ------------------------------
|
||||
with gr.Tab("Visualizations"):
|
||||
gr.Markdown("# Click on the box to expand")
|
||||
|
||||
with gr.Tab("Visualizations"):
|
||||
gr.Markdown("# Click on the box to expand")
|
||||
|
||||
images_gallery = gr.Gallery(
|
||||
label="Column Visualizations",
|
||||
show_label=True,
|
||||
columns=2,
|
||||
height='auto',
|
||||
interactive=True
|
||||
)
|
||||
images_gallery = gr.Gallery(
|
||||
label="Column Visualizations",
|
||||
show_label=True,
|
||||
columns=2,
|
||||
height="auto",
|
||||
interactive=True,
|
||||
)
|
||||
|
||||
# Hidden state for internal use
|
||||
generated_state = gr.State()
|
||||
# Hidden state for internal use
|
||||
generated_state = gr.State()
|
||||
|
||||
# ======================================================
|
||||
# Event bindings
|
||||
# ======================================================
|
||||
generate_btn.click(
|
||||
fn=generate_and_evaluate_data,
|
||||
inputs=[system_prompt_input, user_prompt_input, temp_dir_state, reference_input, model_select],
|
||||
outputs=[output_df, download_csv, evaluation_df, generated_state, images_gallery]
|
||||
)
|
||||
# ======================================================
|
||||
# Event bindings
|
||||
# ======================================================
|
||||
generate_btn.click(
|
||||
fn=generate_and_evaluate_data,
|
||||
inputs=[
|
||||
system_prompt_input,
|
||||
user_prompt_input,
|
||||
temp_dir_state,
|
||||
reference_input,
|
||||
model_select,
|
||||
],
|
||||
outputs=[
|
||||
output_df,
|
||||
download_csv,
|
||||
evaluation_df,
|
||||
generated_state,
|
||||
images_gallery,
|
||||
],
|
||||
)
|
||||
|
||||
reference_input.change(
|
||||
fn=display_reference_csv,
|
||||
inputs=[reference_input],
|
||||
outputs=[reference_display]
|
||||
)
|
||||
reference_input.change(
|
||||
fn=display_reference_csv,
|
||||
inputs=[reference_input],
|
||||
outputs=[reference_display],
|
||||
)
|
||||
|
||||
demo.launch(debug=True)
|
||||
demo.launch(debug=True)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
||||
Reference in New Issue
Block a user