Cleared cell outputs

2025-10-26 22:12:39 +00:00
parent fc1bff1e0e
commit 118954b1df
1 changed files with 229 additions and 0 deletions
--- a/week3/community-contributions/samuel_bootcamp_wk3/data_generator.ipynb
+++ b/week3/community-contributions/samuel_bootcamp_wk3/data_generator.ipynb
@@ -0,0 +1,229 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "2714fa36",
+   "metadata": {},
+   "source": [
+    "## Week 3 Data Generator With Opensource Models\n",
+    "# Generate synthetic data for Pizza cusromers within Nairobi "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "761622db",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!pip install requests pandas ipywidgets gradio"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "cc7347c4",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "import gradio as gr\n",
+    "from huggingface_hub import InferenceClient\n",
+    "import random\n",
+    "import os\n",
+    "from dotenv import load_dotenv\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f20cd822",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#Load API Key\n",
+    "\n",
+    "load_dotenv(override=True)\n",
+    "HF_API_KEY = os.getenv('HF_TOKEN')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "856cd8cb",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n",
+    "# Define available models with correct Hugging Face model IDs\n",
+    "MODELS = {\n",
+    "    \"Mistral-7B\": \"mistralai/Mistral-7B-Instruct-v0.2\",\n",
+    "    \"Llama-2-7B\": \"meta-llama/Llama-2-7b-chat-hf\",\n",
+    "    \"Phi-2\": \"microsoft/phi-2\",\n",
+    "    \"GPT-2\": \"gpt2\"\n",
+    "}\n",
+    "\n",
+    "# Nairobi branches\n",
+    "BRANCHES = [\"Westlands\", \"Karen\", \"Kilimani\", \"CBD\", \"Parklands\"]\n",
+    "\n",
+    "# Global variable to store generated data\n",
+    "generated_df = None\n",
+    "\n",
+    "def generate_feedback_data(model_name, num_records):\n",
+    "    \"\"\"Generate synthetic pizza feedback data using selected AI model\"\"\"\n",
+    "    global generated_df\n",
+    "    \n",
+    "    try:\n",
+    "        # Initialize the Hugging Face Inference Client\n",
+    "        model_id = MODELS[model_name]\n",
+    "        client = InferenceClient(model=model_id, token=None)  # Add your HF token if needed\n",
+    "        \n",
+    "        feedback_data = []\n",
+    "        \n",
+    "        for i in range(num_records):\n",
+    "            # Random branch\n",
+    "            branch = random.choice(BRANCHES)\n",
+    "            \n",
+    "            # Generate feedback using the AI model\n",
+    "            prompt = f\"Generate a brief customer feedback comment about a pizza order from {branch} branch in Nairobi. Make it realistic and varied (positive, negative, or neutral). Keep it under 30 words.\"\n",
+    "            \n",
+    "            try:\n",
+    "                response = client.text_generation(\n",
+    "                    prompt,\n",
+    "                    max_new_tokens=50,\n",
+    "                    temperature=0.8\n",
+    "                )\n",
+    "                feedback = response.strip()\n",
+    "            except Exception as e:\n",
+    "                # Fallback to template-based generation if API fails\n",
+    "                feedback = generate_fallback_feedback(branch)\n",
+    "            \n",
+    "            # Generate other fields\n",
+    "            record = {\n",
+    "                \"Customer_ID\": f\"CUST{1000 + i}\",\n",
+    "                \"Branch\": branch,\n",
+    "                \"Rating\": random.randint(1, 5),\n",
+    "                \"Order_Type\": random.choice([\"Delivery\", \"Dine-in\", \"Takeaway\"]),\n",
+    "                \"Feedback\": feedback,\n",
+    "                \"Date\": f\"2024-{random.randint(1, 12):02d}-{random.randint(1, 28):02d}\"\n",
+    "            }\n",
+    "            \n",
+    "            feedback_data.append(record)\n",
+    "        \n",
+    "        # Create DataFrame\n",
+    "        generated_df = pd.DataFrame(feedback_data)\n",
+    "        \n",
+    "        return generated_df, f\"✓ Successfully generated {num_records} records using {model_name}\"\n",
+    "    \n",
+    "    except Exception as e:\n",
+    "        return pd.DataFrame(), f\"✗ Error: {str(e)}\"\n",
+    "\n",
+    "def generate_fallback_feedback(branch):\n",
+    "    \"\"\"Fallback feedback generator if API fails\"\"\"\n",
+    "    templates = [\n",
+    "        f\"Great pizza from {branch}! Quick delivery and hot food.\",\n",
+    "        f\"Pizza was cold when it arrived at {branch}. Disappointed.\",\n",
+    "        f\"Excellent service at {branch} branch. Will order again!\",\n",
+    "        f\"Average experience. Pizza was okay but nothing special.\",\n",
+    "        f\"Long wait time at {branch} but the pizza was worth it.\",\n",
+    "    ]\n",
+    "    return random.choice(templates)\n",
+    "\n",
+    "def download_csv():\n",
+    "    \"\"\"Save generated data as CSV\"\"\"\n",
+    "    global generated_df\n",
+    "    if generated_df is not None:\n",
+    "        generated_df.to_csv('pizza_feedback_data.csv', index=False)\n",
+    "        return \"CSV downloaded!\"\n",
+    "    return \"No data to download\"\n",
+    "\n",
+    "def download_json():\n",
+    "    \"\"\"Save generated data as JSON\"\"\"\n",
+    "    global generated_df\n",
+    "    if generated_df is not None:\n",
+    "        generated_df.to_json('pizza_feedback_data.json', orient='records', indent=2)\n",
+    "        return \"JSON downloaded!\"\n",
+    "    return \"No data to download\"\n",
+    "\n",
+    "# Create Gradio interface\n",
+    "with gr.Blocks(title=\"Pizza Feedback Data Generator\") as demo:\n",
+    "    gr.Markdown(\"\"\"\n",
+    "    # 🍕 Pizza Feedback Data Generator\n",
+    "    Generate synthetic customer feedback for Nairobi pizza branches using AI models\n",
+    "    \"\"\")\n",
+    "    \n",
+    "    with gr.Row():\n",
+    "        with gr.Column():\n",
+    "            model_selector = gr.Radio(\n",
+    "                choices=list(MODELS.keys()),\n",
+    "                label=\"Select AI Model\",\n",
+    "                value=list(MODELS.keys())[0]\n",
+    "            )\n",
+    "            \n",
+    "            num_records_slider = gr.Slider(\n",
+    "                minimum=1,\n",
+    "                maximum=50,\n",
+    "                value=10,\n",
+    "                step=1,\n",
+    "                label=\"Number of Records\"\n",
+    "            )\n",
+    "            \n",
+    "            generate_btn = gr.Button(\"Generate Feedback Data\", variant=\"primary\")\n",
+    "    \n",
+    "    with gr.Row():\n",
+    "        status_output = gr.Textbox(label=\"Status\", interactive=False)\n",
+    "    \n",
+    "    with gr.Row():\n",
+    "        dataframe_output = gr.Dataframe(\n",
+    "            label=\"Generated Feedback Data\",\n",
+    "            interactive=False\n",
+    "        )\n",
+    "    \n",
+    "    with gr.Row():\n",
+    "        csv_btn = gr.Button(\"Download CSV\")\n",
+    "        json_btn = gr.Button(\"Download JSON\")\n",
+    "    \n",
+    "    # Event handlers\n",
+    "    generate_btn.click(\n",
+    "        fn=generate_feedback_data,\n",
+    "        inputs=[model_selector, num_records_slider],\n",
+    "        outputs=[dataframe_output, status_output]\n",
+    "    )\n",
+    "    \n",
+    "    csv_btn.click(\n",
+    "        fn=download_csv,\n",
+    "        outputs=status_output\n",
+    "    )\n",
+    "    \n",
+    "    json_btn.click(\n",
+    "        fn=download_json,\n",
+    "        outputs=status_output\n",
+    "    )\n",
+    "\n",
+    "# Launch the interface\n",
+    "demo.launch()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": ".venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}