Modify readme and app

2025-10-23 23:40:53 +01:00
parent 101b0baf62
commit 9c5d5fb99e
2 changed files with 139 additions and 105 deletions
--- a/week3/community-contributions/juan_synthetic_data/README.md
+++ b/week3/community-contributions/juan_synthetic_data/README.md
@@ -1,6 +1,8 @@
 # Synthetic Data Generator
 **NOTE:** This is a copy of the repository https://github.com/Jsrodrigue/synthetic-data-creator.
 # Synthetic Data Generator
 An intelligent synthetic data generator that uses OpenAI models to create realistic tabular datasets based on reference data. This project includes an intuitive web interface built with Gradio.
 > **🎓 Educational Project**: This project was inspired by the highly regarded LLM Engineering course on Udemy: [LLM Engineering: Master AI and Large Language Models](https://www.udemy.com/course/llm-engineering-master-ai-and-large-language-models/learn/lecture/52941433#questions/23828099). It demonstrates practical applications of LLM engineering principles, prompt engineering, and synthetic data generation techniques.
@@ -61,7 +63,7 @@ An intelligent synthetic data generator that uses OpenAI models to create realis
 - Python 3.12+
 - OpenAI account with API key
-### Installation with pip
+### Option 1: Using pip
 ```bash
 # Create virtual environment
 python -m venv venv
@@ -71,11 +73,11 @@ source venv/bin/activate  # On Windows: venv\Scripts\activate
 pip install -r requirements.txt
 ```
-### Installation with uv
+### Option 2: Using uv
 ```bash
 # Clone the repository
 git clone https://github.com/Jsrodrigue/synthetic-data-creator.git
-cd synthetic_data
+cd synthetic-data-creator
 # Install dependencies
 uv sync
@@ -100,8 +102,15 @@ OPENAI_API_KEY=your_api_key_here
 ## 🎯 Usage
 ### Start the application
 You can run the app either with **Python** or with **uv** (recommended if you installed dependencies using `uv sync`):
 ```bash
 # Option 1: using Python
 python app.py
 # Option 2: using uv (no need to activate venv manually)
 uv run app.py
 ```
 The script will print a local URL (e.g., http://localhost:7860) — open that link in your browser.
--- a/week3/community-contributions/juan_synthetic_data/app.py
+++ b/week3/community-contributions/juan_synthetic_data/app.py
@@ -1,131 +1,156 @@
 import gradio as gr
 import os
 import atexit
-from src.IO_utils import cleanup_temp_files
+import os
-from src.data_generation import generate_and_evaluate_data
+
-from src.plot_utils import display_reference_csv
+import gradio as gr
 from dotenv import load_dotenv
 import openai
 from dotenv import load_dotenv
 from src.constants import PROJECT_TEMP_DIR, SYSTEM_PROMPT, USER_PROMPT
 from src.data_generation import generate_and_evaluate_data
 from src.IO_utils import cleanup_temp_files
 from src.plot_utils import display_reference_csv
 # ==========================================================
 # Setup
 # ==========================================================
-#Load the api key
+def main():
-load_dotenv()
+    # ==========================================================
-openai.api_key = os.getenv("OPENAI_API_KEY")
+    # Setup
    # ==========================================================
-# Temporary folder for images
+    # Load the api key
-os.makedirs(PROJECT_TEMP_DIR, exist_ok=True)
+    load_dotenv()
    openai.api_key = os.getenv("OPENAI_API_KEY")
-# Ensure temporary plot images are deleted when the program exits
+    # Temporary folder for images
-atexit.register(lambda: cleanup_temp_files(PROJECT_TEMP_DIR))
+    os.makedirs(PROJECT_TEMP_DIR, exist_ok=True)
-# ==========================================================
+    # Ensure temporary plot images are deleted when the program exits
-# Gradio App
+    atexit.register(lambda: cleanup_temp_files(PROJECT_TEMP_DIR))
 # ==========================================================
 with gr.Blocks() as demo:
-    # Store temp folder in state
+    # ==========================================================
-    temp_dir_state = gr.State(value=PROJECT_TEMP_DIR)
+    # Gradio App
    # ==========================================================
    with gr.Blocks() as demo:
-    gr.Markdown("# 🧠 Synthetic Data Generator (with OpenAI)")
+        # Store temp folder in state
        temp_dir_state = gr.State(value=PROJECT_TEMP_DIR)
-    # ======================================================
+        gr.Markdown("# 🧠 Synthetic Data Generator (with OpenAI)")
    # Tabs for organized sections
    # ======================================================
    with gr.Tabs():
-        # ------------------------------
+        # ======================================================
-        # Tab 1: Input
+        # Tabs for organized sections
-        # ------------------------------
+        # ======================================================
-        with gr.Tab("Input"):
+        with gr.Tabs():
-            # System prompt in collapsible
+            # ------------------------------
-            with gr.Accordion("System Prompt (click to expand)", open=False):
+            # Tab 1: Input
-                system_prompt_input = gr.Textbox(
+            # ------------------------------
-                    label="System Prompt",
+            with gr.Tab("Input"):
-                    value=SYSTEM_PROMPT,
+
-                    lines=20
+                # System prompt in collapsible
                with gr.Accordion("System Prompt (click to expand)", open=False):
                    system_prompt_input = gr.Textbox(
                        label="System Prompt", value=SYSTEM_PROMPT, lines=20
                    )
                # User prompt box
                user_prompt_input = gr.Textbox(
                    label="User Prompt", value=USER_PROMPT, lines=5
                )
-            # User prompt box
+                # Model selection
-            user_prompt_input = gr.Textbox(label="User Prompt", value=USER_PROMPT, lines=5)
+                model_select = gr.Dropdown(
                    label="OpenAI Model",
                    choices=["gpt-4o-mini", "gpt-4.1-mini"],
                    value="gpt-4o-mini",
                )
-            # Model selection
+                # Reference CSV upload
-            model_select = gr.Dropdown(
+                reference_input = gr.File(
-                label="OpenAI Model",
+                    label="Reference CSV (optional)", file_types=[".csv"]
-                choices=["gpt-4o-mini", "gpt-4.1-mini"],
+                )
                value="gpt-4o-mini"
            )
-            # Reference CSV upload
+                # Examples
-            reference_input = gr.File(label="Reference CSV (optional)", file_types=[".csv"])
+                gr.Examples(
                    examples=[
                        "data/sentiment_reference.csv",
                        "data/people_reference.csv",
                        "data/wine_reference.csv",
                    ],
                    inputs=reference_input,
                )
-            # Examples
+                # Generate button
-            gr.Examples(
+                generate_btn = gr.Button("🚀 Generate Data")
                examples=["data/sentiment_reference.csv","data/people_reference.csv","data/wine_reference.csv"],
                inputs=reference_input
            )
-            # Generate button
+                # Download button
-            generate_btn = gr.Button("🚀 Generate Data")
+                download_csv = gr.File(label="Download CSV")
-            # Download button
+            # ------------------------------
-            download_csv = gr.File(label="Download CSV")
+            # Tab 2: Reference Table
            # ------------------------------
            with gr.Tab("Reference Table"):
                reference_display = gr.DataFrame(label="Reference CSV Preview")
-        # ------------------------------
+            # ------------------------------
-        # Tab 2: Reference Table
+            # Tab 3: Generated Table
-        # ------------------------------
+            # ------------------------------
-        with gr.Tab("Reference Table"):
+            with gr.Tab("Generated Table"):
-            reference_display = gr.DataFrame(label="Reference CSV Preview")
+                output_df = gr.DataFrame(label="Generated Data")
-        # ------------------------------
+            # ------------------------------
-        # Tab 3: Generated Table
+            # Tab 4: Evaluation
-        # ------------------------------
+            # ------------------------------
-        with gr.Tab("Generated Table"):
+            with gr.Tab("Comparison"):
-            output_df = gr.DataFrame(label="Generated Data")
+                with gr.Accordion("Evaluation Results (click to expand)", open=True):
-            
+                    evaluation_df = gr.DataFrame(label="Evaluation Results")
-        # ------------------------------
+            # ------------------------------
-        # Tab 4: Evaluation
+            # Tab 5: Visualizations
-        # ------------------------------
+            # ------------------------------
        with gr.Tab("Comparison"):
            with gr.Accordion("Evaluation Results (click to expand)", open=True):
                evaluation_df = gr.DataFrame(label="Evaluation Results")
-        # ------------------------------
+            with gr.Tab("Visualizations"):
-        # Tab 5: Visualizations
+                gr.Markdown("# Click on the box to expand")
        # ------------------------------
-        with gr.Tab("Visualizations"):
+                images_gallery = gr.Gallery(
-            gr.Markdown("# Click on the box to expand")
+                    label="Column Visualizations",
-            
+                    show_label=True,
-            images_gallery = gr.Gallery(
+                    columns=2,
-                label="Column Visualizations",
+                    height="auto",
-                show_label=True,
+                    interactive=True,
-                columns=2,
+                )
                height='auto',
                interactive=True
            )
-        # Hidden state for internal use
+            # Hidden state for internal use
-        generated_state = gr.State()
+            generated_state = gr.State()
-    # ======================================================
+        # ======================================================
-    # Event bindings
+        # Event bindings
-    # ======================================================
+        # ======================================================
-    generate_btn.click(
+        generate_btn.click(
-        fn=generate_and_evaluate_data,
+            fn=generate_and_evaluate_data,
-        inputs=[system_prompt_input, user_prompt_input, temp_dir_state, reference_input, model_select],
+            inputs=[
-        outputs=[output_df, download_csv, evaluation_df, generated_state, images_gallery]
+                system_prompt_input,
-    )
+                user_prompt_input,
                temp_dir_state,
                reference_input,
                model_select,
            ],
            outputs=[
                output_df,
                download_csv,
                evaluation_df,
                generated_state,
                images_gallery,
            ],
        )
-    reference_input.change(
+        reference_input.change(
-        fn=display_reference_csv,
+            fn=display_reference_csv,
-        inputs=[reference_input],
+            inputs=[reference_input],
-        outputs=[reference_display]
+            outputs=[reference_display],
-    )
+        )
-demo.launch(debug=True)
+    demo.launch(debug=True)
 if __name__ == "__main__":
    main()