Files
LLM_Engineering_OLD/week3/community-contributions/juan_synthetic_data/app.py
2025-10-23 15:29:54 +01:00

132 lines
4.3 KiB
Python

import gradio as gr
import os
import atexit
from src.IO_utils import cleanup_temp_files
from src.data_generation import generate_and_evaluate_data
from src.plot_utils import display_reference_csv
from dotenv import load_dotenv
import openai
from src.constants import PROJECT_TEMP_DIR, SYSTEM_PROMPT, USER_PROMPT
# ==========================================================
# Setup
# ==========================================================
#Load the api key
load_dotenv()
openai.api_key = os.getenv("OPENAI_API_KEY")
# Temporary folder for images
os.makedirs(PROJECT_TEMP_DIR, exist_ok=True)
# Ensure temporary plot images are deleted when the program exits
atexit.register(lambda: cleanup_temp_files(PROJECT_TEMP_DIR))
# ==========================================================
# Gradio App
# ==========================================================
with gr.Blocks() as demo:
# Store temp folder in state
temp_dir_state = gr.State(value=PROJECT_TEMP_DIR)
gr.Markdown("# 🧠 Synthetic Data Generator (with OpenAI)")
# ======================================================
# Tabs for organized sections
# ======================================================
with gr.Tabs():
# ------------------------------
# Tab 1: Input
# ------------------------------
with gr.Tab("Input"):
# System prompt in collapsible
with gr.Accordion("System Prompt (click to expand)", open=False):
system_prompt_input = gr.Textbox(
label="System Prompt",
value=SYSTEM_PROMPT,
lines=20
)
# User prompt box
user_prompt_input = gr.Textbox(label="User Prompt", value=USER_PROMPT, lines=5)
# Model selection
model_select = gr.Dropdown(
label="OpenAI Model",
choices=["gpt-4o-mini", "gpt-4.1-mini"],
value="gpt-4o-mini"
)
# Reference CSV upload
reference_input = gr.File(label="Reference CSV (optional)", file_types=[".csv"])
# Examples
gr.Examples(
examples=["data/sentiment_reference.csv","data/people_reference.csv","data/wine_reference.csv"],
inputs=reference_input
)
# Generate button
generate_btn = gr.Button("🚀 Generate Data")
# Download button
download_csv = gr.File(label="Download CSV")
# ------------------------------
# Tab 2: Reference Table
# ------------------------------
with gr.Tab("Reference Table"):
reference_display = gr.DataFrame(label="Reference CSV Preview")
# ------------------------------
# Tab 3: Generated Table
# ------------------------------
with gr.Tab("Generated Table"):
output_df = gr.DataFrame(label="Generated Data")
# ------------------------------
# Tab 4: Evaluation
# ------------------------------
with gr.Tab("Comparison"):
with gr.Accordion("Evaluation Results (click to expand)", open=True):
evaluation_df = gr.DataFrame(label="Evaluation Results")
# ------------------------------
# Tab 5: Visualizations
# ------------------------------
with gr.Tab("Visualizations"):
gr.Markdown("# Click on the box to expand")
images_gallery = gr.Gallery(
label="Column Visualizations",
show_label=True,
columns=2,
height='auto',
interactive=True
)
# Hidden state for internal use
generated_state = gr.State()
# ======================================================
# Event bindings
# ======================================================
generate_btn.click(
fn=generate_and_evaluate_data,
inputs=[system_prompt_input, user_prompt_input, temp_dir_state, reference_input, model_select],
outputs=[output_df, download_csv, evaluation_df, generated_state, images_gallery]
)
reference_input.change(
fn=display_reference_csv,
inputs=[reference_input],
outputs=[reference_display]
)
demo.launch(debug=True)