Modify readme and app

2025-10-23 23:40:53 +01:00
parent 101b0baf62
commit 9c5d5fb99e
2 changed files with 139 additions and 105 deletions
--- a/week3/community-contributions/juan_synthetic_data/README.md
+++ b/week3/community-contributions/juan_synthetic_data/README.md
@@ -1,6 +1,8 @@
 # Synthetic Data Generator
 **NOTE:** This is a copy of the repository https://github.com/Jsrodrigue/synthetic-data-creator.

+# Synthetic Data Generator
+
 An intelligent synthetic data generator that uses OpenAI models to create realistic tabular datasets based on reference data. This project includes an intuitive web interface built with Gradio.

 > **🎓 Educational Project**: This project was inspired by the highly regarded LLM Engineering course on Udemy: [LLM Engineering: Master AI and Large Language Models](https://www.udemy.com/course/llm-engineering-master-ai-and-large-language-models/learn/lecture/52941433#questions/23828099). It demonstrates practical applications of LLM engineering principles, prompt engineering, and synthetic data generation techniques.
@@ -61,7 +63,7 @@ An intelligent synthetic data generator that uses OpenAI models to create realis
 - Python 3.12+
 - OpenAI account with API key

-### Installation with pip
+### Option 1: Using pip
 ```bash
 # Create virtual environment
 python -m venv venv
@@ -71,11 +73,11 @@ source venv/bin/activate  # On Windows: venv\Scripts\activate
 pip install -r requirements.txt
 ```

-### Installation with uv
+### Option 2: Using uv
 ```bash
 # Clone the repository
 git clone https://github.com/Jsrodrigue/synthetic-data-creator.git
-cd synthetic_data
+cd synthetic-data-creator

 # Install dependencies
 uv sync
@@ -100,8 +102,15 @@ OPENAI_API_KEY=your_api_key_here
 ## 🎯 Usage

 ### Start the application
+
+You can run the app either with **Python** or with **uv** (recommended if you installed dependencies using `uv sync`):
+
 ```bash
+# Option 1: using Python
 python app.py
+
+# Option 2: using uv (no need to activate venv manually)
+uv run app.py
 ```

 The script will print a local URL (e.g., http://localhost:7860) — open that link in your browser.
--- a/week3/community-contributions/juan_synthetic_data/app.py
+++ b/week3/community-contributions/juan_synthetic_data/app.py
@@ -1,131 +1,156 @@
-import gradio as gr
-import os
 import atexit
-from src.IO_utils import cleanup_temp_files
-from src.data_generation import generate_and_evaluate_data
-from src.plot_utils import display_reference_csv
-from dotenv import load_dotenv
+import os
+
+import gradio as gr
 import openai
+from dotenv import load_dotenv
+
 from src.constants import PROJECT_TEMP_DIR, SYSTEM_PROMPT, USER_PROMPT
+from src.data_generation import generate_and_evaluate_data
+from src.IO_utils import cleanup_temp_files
+from src.plot_utils import display_reference_csv

-# ==========================================================
-# Setup
-# ==========================================================

-#Load the api key
-load_dotenv()
-openai.api_key = os.getenv("OPENAI_API_KEY")
+def main():
+    # ==========================================================
+    # Setup
+    # ==========================================================

-# Temporary folder for images
-os.makedirs(PROJECT_TEMP_DIR, exist_ok=True)
+    # Load the api key
+    load_dotenv()
+    openai.api_key = os.getenv("OPENAI_API_KEY")

-# Ensure temporary plot images are deleted when the program exits
-atexit.register(lambda: cleanup_temp_files(PROJECT_TEMP_DIR))
+    # Temporary folder for images
+    os.makedirs(PROJECT_TEMP_DIR, exist_ok=True)

-# ==========================================================
-# Gradio App
-# ==========================================================
-with gr.Blocks() as demo:
+    # Ensure temporary plot images are deleted when the program exits
+    atexit.register(lambda: cleanup_temp_files(PROJECT_TEMP_DIR))

-    # Store temp folder in state
-    temp_dir_state = gr.State(value=PROJECT_TEMP_DIR)
+    # ==========================================================
+    # Gradio App
+    # ==========================================================
+    with gr.Blocks() as demo:

-    gr.Markdown("# 🧠 Synthetic Data Generator (with OpenAI)")
+        # Store temp folder in state
+        temp_dir_state = gr.State(value=PROJECT_TEMP_DIR)

-    # ======================================================
-    # Tabs for organized sections
-    # ======================================================
-    with gr.Tabs():
+        gr.Markdown("# 🧠 Synthetic Data Generator (with OpenAI)")

-        # ------------------------------
-        # Tab 1: Input
-        # ------------------------------
-        with gr.Tab("Input"):
+        # ======================================================
+        # Tabs for organized sections
+        # ======================================================
+        with gr.Tabs():

-            # System prompt in collapsible
-            with gr.Accordion("System Prompt (click to expand)", open=False):
-                system_prompt_input = gr.Textbox(
-                    label="System Prompt",
-                    value=SYSTEM_PROMPT,
-                    lines=20
+            # ------------------------------
+            # Tab 1: Input
+            # ------------------------------
+            with gr.Tab("Input"):
+
+                # System prompt in collapsible
+                with gr.Accordion("System Prompt (click to expand)", open=False):
+                    system_prompt_input = gr.Textbox(
+                        label="System Prompt", value=SYSTEM_PROMPT, lines=20
+                    )
+
+                # User prompt box
+                user_prompt_input = gr.Textbox(
+                    label="User Prompt", value=USER_PROMPT, lines=5
                )

-            # User prompt box
-            user_prompt_input = gr.Textbox(label="User Prompt", value=USER_PROMPT, lines=5)
+                # Model selection
+                model_select = gr.Dropdown(
+                    label="OpenAI Model",
+                    choices=["gpt-4o-mini", "gpt-4.1-mini"],
+                    value="gpt-4o-mini",
+                )

-            # Model selection
-            model_select = gr.Dropdown(
-                label="OpenAI Model",
-                choices=["gpt-4o-mini", "gpt-4.1-mini"],
-                value="gpt-4o-mini"
-            )
+                # Reference CSV upload
+                reference_input = gr.File(
+                    label="Reference CSV (optional)", file_types=[".csv"]
+                )

-            # Reference CSV upload
-            reference_input = gr.File(label="Reference CSV (optional)", file_types=[".csv"])
+                # Examples
+                gr.Examples(
+                    examples=[
+                        "data/sentiment_reference.csv",
+                        "data/people_reference.csv",
+                        "data/wine_reference.csv",
+                    ],
+                    inputs=reference_input,
+                )

-            # Examples
-            gr.Examples(
-                examples=["data/sentiment_reference.csv","data/people_reference.csv","data/wine_reference.csv"],
-                inputs=reference_input
-            )
+                # Generate button
+                generate_btn = gr.Button("🚀 Generate Data")

-            # Generate button
-            generate_btn = gr.Button("🚀 Generate Data")
+                # Download button
+                download_csv = gr.File(label="Download CSV")

-            # Download button
-            download_csv = gr.File(label="Download CSV")
+            # ------------------------------
+            # Tab 2: Reference Table
+            # ------------------------------
+            with gr.Tab("Reference Table"):
+                reference_display = gr.DataFrame(label="Reference CSV Preview")

-        # ------------------------------
-        # Tab 2: Reference Table
-        # ------------------------------
-        with gr.Tab("Reference Table"):
-            reference_display = gr.DataFrame(label="Reference CSV Preview")
+            # ------------------------------
+            # Tab 3: Generated Table
+            # ------------------------------
+            with gr.Tab("Generated Table"):
+                output_df = gr.DataFrame(label="Generated Data")

-        # ------------------------------
-        # Tab 3: Generated Table
-        # ------------------------------
-        with gr.Tab("Generated Table"):
-            output_df = gr.DataFrame(label="Generated Data")
-            
+            # ------------------------------
+            # Tab 4: Evaluation
+            # ------------------------------
+            with gr.Tab("Comparison"):
+                with gr.Accordion("Evaluation Results (click to expand)", open=True):
+                    evaluation_df = gr.DataFrame(label="Evaluation Results")

-        # ------------------------------
-        # Tab 4: Evaluation
-        # ------------------------------
-        with gr.Tab("Comparison"):
-            with gr.Accordion("Evaluation Results (click to expand)", open=True):
-                evaluation_df = gr.DataFrame(label="Evaluation Results")
+            # ------------------------------
+            # Tab 5: Visualizations
+            # ------------------------------

-        # ------------------------------
-        # Tab 5: Visualizations
-        # ------------------------------
+            with gr.Tab("Visualizations"):
+                gr.Markdown("# Click on the box to expand")

-        with gr.Tab("Visualizations"):
-            gr.Markdown("# Click on the box to expand")
-            
-            images_gallery = gr.Gallery(
-                label="Column Visualizations",
-                show_label=True,
-                columns=2,
-                height='auto',
-                interactive=True
-            )
+                images_gallery = gr.Gallery(
+                    label="Column Visualizations",
+                    show_label=True,
+                    columns=2,
+                    height="auto",
+                    interactive=True,
+                )

-        # Hidden state for internal use
-        generated_state = gr.State()
+            # Hidden state for internal use
+            generated_state = gr.State()

-    # ======================================================
-    # Event bindings
-    # ======================================================
-    generate_btn.click(
-        fn=generate_and_evaluate_data,
-        inputs=[system_prompt_input, user_prompt_input, temp_dir_state, reference_input, model_select],
-        outputs=[output_df, download_csv, evaluation_df, generated_state, images_gallery]
-    )
+        # ======================================================
+        # Event bindings
+        # ======================================================
+        generate_btn.click(
+            fn=generate_and_evaluate_data,
+            inputs=[
+                system_prompt_input,
+                user_prompt_input,
+                temp_dir_state,
+                reference_input,
+                model_select,
+            ],
+            outputs=[
+                output_df,
+                download_csv,
+                evaluation_df,
+                generated_state,
+                images_gallery,
+            ],
+        )

-    reference_input.change(
-        fn=display_reference_csv,
-        inputs=[reference_input],
-        outputs=[reference_display]
-    )
+        reference_input.change(
+            fn=display_reference_csv,
+            inputs=[reference_input],
+            outputs=[reference_display],
+        )

-demo.launch(debug=True)
+    demo.launch(debug=True)
+
+
+if __name__ == "__main__":
+    main()