From 0ceb9136d5a2248495e9ecf1619cb9613da0e9b2 Mon Sep 17 00:00:00 2001
From: The Top Dev <oluoch.joshua@gmail.com>
Date: Thu, 23 Oct 2025 08:48:44 +0300
Subject: [PATCH] Cleared output for the Synthentic survey data generator

---
 ...3_Exercise_survey_Dataset_Generation.ipynb | 1037 +----------------
 1 file changed, 23 insertions(+), 1014 deletions(-)
diff --git a/week3/community-contributions/week3_Exercise_survey_Dataset_Generation.ipynb b/week3/community-contributions/week3_Exercise_survey_Dataset_Generation.ipynb
index a11fa96..a4474af 100644
--- a/week3/community-contributions/week3_Exercise_survey_Dataset_Generation.ipynb
+++ b/week3/community-contributions/week3_Exercise_survey_Dataset_Generation.ipynb
@@ -10,18 +10,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 34,
+   "execution_count": null,
    "id": "8d86f629",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "✅ Base libraries ready. Pandera available: True\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "\n",
     "import os, re, json, time, uuid, math, random\n",
@@ -35,7 +27,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 23,
+   "execution_count": null,
    "id": "f196ae73",
    "metadata": {},
    "outputs": [],
@@ -94,18 +86,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 36,
+   "execution_count": null,
    "id": "d16bd03a",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Loaded config for 800 rows and 18 fields.\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "\n",
     "CFG = {\n",
@@ -145,7 +129,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 37,
+   "execution_count": null,
    "id": "d2f5fdff",
    "metadata": {},
    "outputs": [],
@@ -208,196 +192,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 38,
+   "execution_count": null,
    "id": "cd61330d",
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>response_id</th>\n",
-       "      <th>respondent_id</th>\n",
-       "      <th>submitted_at</th>\n",
-       "      <th>country</th>\n",
-       "      <th>language</th>\n",
-       "      <th>device</th>\n",
-       "      <th>age</th>\n",
-       "      <th>gender</th>\n",
-       "      <th>education</th>\n",
-       "      <th>income_band</th>\n",
-       "      <th>completion_seconds</th>\n",
-       "      <th>attention_passed</th>\n",
-       "      <th>q_quality</th>\n",
-       "      <th>q_value</th>\n",
-       "      <th>q_ease</th>\n",
-       "      <th>q_support</th>\n",
-       "      <th>nps</th>\n",
-       "      <th>is_detractor</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>f099c1b6-a4ae-4fb0-ba98-89a81008c424</td>\n",
-       "      <td>71615</td>\n",
-       "      <td>2024-04-13 19:02:44</td>\n",
-       "      <td>ZA</td>\n",
-       "      <td>en</td>\n",
-       "      <td>web</td>\n",
-       "      <td>47</td>\n",
-       "      <td>male</td>\n",
-       "      <td>secondary</td>\n",
-       "      <td>low</td>\n",
-       "      <td>897.995012</td>\n",
-       "      <td>True</td>\n",
-       "      <td>5</td>\n",
-       "      <td>3</td>\n",
-       "      <td>1</td>\n",
-       "      <td>3</td>\n",
-       "      <td>4</td>\n",
-       "      <td>True</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>f2e20ad1-1ed1-4e33-8beb-5dd0ba23715b</td>\n",
-       "      <td>68564</td>\n",
-       "      <td>2024-03-05 23:30:30</td>\n",
-       "      <td>KE</td>\n",
-       "      <td>en</td>\n",
-       "      <td>android</td>\n",
-       "      <td>67</td>\n",
-       "      <td>female</td>\n",
-       "      <td>bachelor</td>\n",
-       "      <td>lower_mid</td>\n",
-       "      <td>935.607966</td>\n",
-       "      <td>True</td>\n",
-       "      <td>1</td>\n",
-       "      <td>5</td>\n",
-       "      <td>2</td>\n",
-       "      <td>3</td>\n",
-       "      <td>5</td>\n",
-       "      <td>False</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>a9345f69-be75-46b9-8cd3-a276ce0a66bd</td>\n",
-       "      <td>59689</td>\n",
-       "      <td>2024-11-10 03:38:07</td>\n",
-       "      <td>RW</td>\n",
-       "      <td>sw</td>\n",
-       "      <td>android</td>\n",
-       "      <td>23</td>\n",
-       "      <td>male</td>\n",
-       "      <td>bachelor</td>\n",
-       "      <td>low</td>\n",
-       "      <td>1431.517701</td>\n",
-       "      <td>True</td>\n",
-       "      <td>5</td>\n",
-       "      <td>2</td>\n",
-       "      <td>5</td>\n",
-       "      <td>5</td>\n",
-       "      <td>7</td>\n",
-       "      <td>False</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>b4fa8625-d153-4465-ad73-1c4a48eed2f1</td>\n",
-       "      <td>20742</td>\n",
-       "      <td>2024-11-19 17:40:58</td>\n",
-       "      <td>KE</td>\n",
-       "      <td>en</td>\n",
-       "      <td>ios</td>\n",
-       "      <td>68</td>\n",
-       "      <td>female</td>\n",
-       "      <td>secondary</td>\n",
-       "      <td>upper_mid</td>\n",
-       "      <td>448.519416</td>\n",
-       "      <td>True</td>\n",
-       "      <td>5</td>\n",
-       "      <td>5</td>\n",
-       "      <td>5</td>\n",
-       "      <td>3</td>\n",
-       "      <td>10</td>\n",
-       "      <td>False</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>e0ad4bbc-b576-4913-8786-302f06b5e9f7</td>\n",
-       "      <td>63459</td>\n",
-       "      <td>2024-07-28 04:23:37</td>\n",
-       "      <td>KE</td>\n",
-       "      <td>en</td>\n",
-       "      <td>ios</td>\n",
-       "      <td>34</td>\n",
-       "      <td>male</td>\n",
-       "      <td>secondary</td>\n",
-       "      <td>low</td>\n",
-       "      <td>1179.970734</td>\n",
-       "      <td>True</td>\n",
-       "      <td>3</td>\n",
-       "      <td>1</td>\n",
-       "      <td>3</td>\n",
-       "      <td>3</td>\n",
-       "      <td>5</td>\n",
-       "      <td>False</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "                            response_id  respondent_id         submitted_at  \\\n",
-       "0  f099c1b6-a4ae-4fb0-ba98-89a81008c424          71615  2024-04-13 19:02:44   \n",
-       "1  f2e20ad1-1ed1-4e33-8beb-5dd0ba23715b          68564  2024-03-05 23:30:30   \n",
-       "2  a9345f69-be75-46b9-8cd3-a276ce0a66bd          59689  2024-11-10 03:38:07   \n",
-       "3  b4fa8625-d153-4465-ad73-1c4a48eed2f1          20742  2024-11-19 17:40:58   \n",
-       "4  e0ad4bbc-b576-4913-8786-302f06b5e9f7          63459  2024-07-28 04:23:37   \n",
-       "\n",
-       "  country language   device  age  gender  education income_band  \\\n",
-       "0      ZA       en      web   47    male  secondary         low   \n",
-       "1      KE       en  android   67  female   bachelor   lower_mid   \n",
-       "2      RW       sw  android   23    male   bachelor         low   \n",
-       "3      KE       en      ios   68  female  secondary   upper_mid   \n",
-       "4      KE       en      ios   34    male  secondary         low   \n",
-       "\n",
-       "   completion_seconds  attention_passed  q_quality  q_value  q_ease  \\\n",
-       "0          897.995012              True          5        3       1   \n",
-       "1          935.607966              True          1        5       2   \n",
-       "2         1431.517701              True          5        2       5   \n",
-       "3          448.519416              True          5        5       5   \n",
-       "4         1179.970734              True          3        1       3   \n",
-       "\n",
-       "   q_support  nps  is_detractor  \n",
-       "0          3    4          True  \n",
-       "1          3    5         False  \n",
-       "2          5    7         False  \n",
-       "3          3   10         False  \n",
-       "4          3    5         False  "
-      ]
-     },
-     "execution_count": 38,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "\n",
     "def generate_rule_based(CFG: Dict[str, Any]) -> pd.DataFrame:\n",
@@ -450,245 +248,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 39,
+   "execution_count": null,
    "id": "9a4ef86a",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Validation error: {\n",
-      "    \"SCHEMA\": {\n",
-      "        \"WRONG_DATATYPE\": [\n",
-      "            {\n",
-      "                \"schema\": null,\n",
-      "                \"column\": \"respondent_id\",\n",
-      "                \"check\": \"dtype('int64')\",\n",
-      "                \"error\": \"expected series 'respondent_id' to have type int64, got int32\"\n",
-      "            },\n",
-      "            {\n",
-      "                \"schema\": null,\n",
-      "                \"column\": \"age\",\n",
-      "                \"check\": \"dtype('int64')\",\n",
-      "                \"error\": \"expected series 'age' to have type int64, got int32\"\n",
-      "            },\n",
-      "            {\n",
-      "                \"schema\": null,\n",
-      "                \"column\": \"q_quality\",\n",
-      "                \"check\": \"dtype('int64')\",\n",
-      "                \"error\": \"expected series 'q_quality' to have type int64, got int32\"\n",
-      "            },\n",
-      "            {\n",
-      "                \"schema\": null,\n",
-      "                \"column\": \"q_value\",\n",
-      "                \"check\": \"dtype('int64')\",\n",
-      "                \"error\": \"expected series 'q_value' to have type int64, got int32\"\n",
-      "            },\n",
-      "            {\n",
-      "                \"schema\": null,\n",
-      "                \"column\": \"q_ease\",\n",
-      "                \"check\": \"dtype('int64')\",\n",
-      "                \"error\": \"expected series 'q_ease' to have type int64, got int32\"\n",
-      "            },\n",
-      "            {\n",
-      "                \"schema\": null,\n",
-      "                \"column\": \"q_support\",\n",
-      "                \"check\": \"dtype('int64')\",\n",
-      "                \"error\": \"expected series 'q_support' to have type int64, got int32\"\n",
-      "            }\n",
-      "        ]\n",
-      "    }\n",
-      "}\n",
-      "{'engine': 'pandera', 'valid_rows': 800, 'invalid_rows': 0, 'notes': 'Non-strict mode.'}\n"
-     ]
-    },
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>response_id</th>\n",
-       "      <th>respondent_id</th>\n",
-       "      <th>submitted_at</th>\n",
-       "      <th>country</th>\n",
-       "      <th>language</th>\n",
-       "      <th>device</th>\n",
-       "      <th>age</th>\n",
-       "      <th>gender</th>\n",
-       "      <th>education</th>\n",
-       "      <th>income_band</th>\n",
-       "      <th>completion_seconds</th>\n",
-       "      <th>attention_passed</th>\n",
-       "      <th>q_quality</th>\n",
-       "      <th>q_value</th>\n",
-       "      <th>q_ease</th>\n",
-       "      <th>q_support</th>\n",
-       "      <th>nps</th>\n",
-       "      <th>is_detractor</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>f099c1b6-a4ae-4fb0-ba98-89a81008c424</td>\n",
-       "      <td>71615</td>\n",
-       "      <td>2024-04-13 19:02:44</td>\n",
-       "      <td>ZA</td>\n",
-       "      <td>en</td>\n",
-       "      <td>web</td>\n",
-       "      <td>47</td>\n",
-       "      <td>male</td>\n",
-       "      <td>secondary</td>\n",
-       "      <td>low</td>\n",
-       "      <td>897.995012</td>\n",
-       "      <td>True</td>\n",
-       "      <td>5</td>\n",
-       "      <td>3</td>\n",
-       "      <td>1</td>\n",
-       "      <td>3</td>\n",
-       "      <td>4</td>\n",
-       "      <td>True</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>f2e20ad1-1ed1-4e33-8beb-5dd0ba23715b</td>\n",
-       "      <td>68564</td>\n",
-       "      <td>2024-03-05 23:30:30</td>\n",
-       "      <td>KE</td>\n",
-       "      <td>en</td>\n",
-       "      <td>android</td>\n",
-       "      <td>67</td>\n",
-       "      <td>female</td>\n",
-       "      <td>bachelor</td>\n",
-       "      <td>lower_mid</td>\n",
-       "      <td>935.607966</td>\n",
-       "      <td>True</td>\n",
-       "      <td>1</td>\n",
-       "      <td>5</td>\n",
-       "      <td>2</td>\n",
-       "      <td>3</td>\n",
-       "      <td>5</td>\n",
-       "      <td>False</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>a9345f69-be75-46b9-8cd3-a276ce0a66bd</td>\n",
-       "      <td>59689</td>\n",
-       "      <td>2024-11-10 03:38:07</td>\n",
-       "      <td>RW</td>\n",
-       "      <td>sw</td>\n",
-       "      <td>android</td>\n",
-       "      <td>23</td>\n",
-       "      <td>male</td>\n",
-       "      <td>bachelor</td>\n",
-       "      <td>low</td>\n",
-       "      <td>1431.517701</td>\n",
-       "      <td>True</td>\n",
-       "      <td>5</td>\n",
-       "      <td>2</td>\n",
-       "      <td>5</td>\n",
-       "      <td>5</td>\n",
-       "      <td>7</td>\n",
-       "      <td>False</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>b4fa8625-d153-4465-ad73-1c4a48eed2f1</td>\n",
-       "      <td>20742</td>\n",
-       "      <td>2024-11-19 17:40:58</td>\n",
-       "      <td>KE</td>\n",
-       "      <td>en</td>\n",
-       "      <td>ios</td>\n",
-       "      <td>68</td>\n",
-       "      <td>female</td>\n",
-       "      <td>secondary</td>\n",
-       "      <td>upper_mid</td>\n",
-       "      <td>448.519416</td>\n",
-       "      <td>True</td>\n",
-       "      <td>5</td>\n",
-       "      <td>5</td>\n",
-       "      <td>5</td>\n",
-       "      <td>3</td>\n",
-       "      <td>10</td>\n",
-       "      <td>False</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>e0ad4bbc-b576-4913-8786-302f06b5e9f7</td>\n",
-       "      <td>63459</td>\n",
-       "      <td>2024-07-28 04:23:37</td>\n",
-       "      <td>KE</td>\n",
-       "      <td>en</td>\n",
-       "      <td>ios</td>\n",
-       "      <td>34</td>\n",
-       "      <td>male</td>\n",
-       "      <td>secondary</td>\n",
-       "      <td>low</td>\n",
-       "      <td>1179.970734</td>\n",
-       "      <td>True</td>\n",
-       "      <td>3</td>\n",
-       "      <td>1</td>\n",
-       "      <td>3</td>\n",
-       "      <td>3</td>\n",
-       "      <td>5</td>\n",
-       "      <td>False</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "                            response_id  respondent_id         submitted_at  \\\n",
-       "0  f099c1b6-a4ae-4fb0-ba98-89a81008c424          71615  2024-04-13 19:02:44   \n",
-       "1  f2e20ad1-1ed1-4e33-8beb-5dd0ba23715b          68564  2024-03-05 23:30:30   \n",
-       "2  a9345f69-be75-46b9-8cd3-a276ce0a66bd          59689  2024-11-10 03:38:07   \n",
-       "3  b4fa8625-d153-4465-ad73-1c4a48eed2f1          20742  2024-11-19 17:40:58   \n",
-       "4  e0ad4bbc-b576-4913-8786-302f06b5e9f7          63459  2024-07-28 04:23:37   \n",
-       "\n",
-       "  country language   device  age  gender  education income_band  \\\n",
-       "0      ZA       en      web   47    male  secondary         low   \n",
-       "1      KE       en  android   67  female   bachelor   lower_mid   \n",
-       "2      RW       sw  android   23    male   bachelor         low   \n",
-       "3      KE       en      ios   68  female  secondary   upper_mid   \n",
-       "4      KE       en      ios   34    male  secondary         low   \n",
-       "\n",
-       "   completion_seconds  attention_passed  q_quality  q_value  q_ease  \\\n",
-       "0          897.995012              True          5        3       1   \n",
-       "1          935.607966              True          1        5       2   \n",
-       "2         1431.517701              True          5        2       5   \n",
-       "3          448.519416              True          5        5       5   \n",
-       "4         1179.970734              True          3        1       3   \n",
-       "\n",
-       "   q_support  nps  is_detractor  \n",
-       "0          3    4          True  \n",
-       "1          3    5         False  \n",
-       "2          5    7         False  \n",
-       "3          3   10         False  \n",
-       "4          3    5         False  "
-      ]
-     },
-     "execution_count": 39,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "\n",
     "def build_pandera_schema(CFG):\n",
@@ -732,26 +295,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 40,
+   "execution_count": null,
    "id": "73626b4c",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Saved: data/survey_rule_20251023T004106Z.csv\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "C:\\Users\\Joshua\\AppData\\Local\\Temp\\ipykernel_27572\\1233117399.py:3: DeprecationWarning: datetime.datetime.utcnow() is deprecated and scheduled for removal in a future version. Use timezone-aware objects to represent datetimes in UTC: datetime.datetime.now(datetime.UTC).\n",
-      "  ts = datetime.utcnow().strftime(\"%Y%m%dT%H%M%SZ\")\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "\n",
     "from pathlib import Path\n",
@@ -772,7 +319,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 41,
+   "execution_count": null,
    "id": "24e94771",
    "metadata": {},
    "outputs": [],
@@ -1067,73 +614,7 @@
    "execution_count": null,
    "id": "e1af410e",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "🧪 Testing LLM generation...\n",
-      "🔄 Generating 10 survey responses with LLM...\n",
-      "📊 Using max_tokens: 3500 (estimated: 3500)\n",
-      "📝 Raw response length: 5236 characters\n",
-      "🔍 Parsed JSON type: <class 'dict'>\n",
-      "📊 Found data in 'responses': 10 rows\n",
-      "✅ Successfully generated 10 survey responses\n",
-      "\n",
-      "📊 Generated dataset shape: (10, 18)\n",
-      "\n",
-      "📋 First few rows:\n",
-      "                            response_id  respondent_id         submitted_at  \\\n",
-      "0  f3e9b9d1-4e9e-4f8a-9b5c-7e3cbb1c4e5e          10234  2023-10-01 14:23:45   \n",
-      "1  a1c5f6d3-1f5b-4e8a-8c7a-5e2c3f4b8e1b          20456  2023-10-01 15:10:12   \n",
-      "2  c2b3e4f5-5d6e-4b8a-9f3c-8e1a2f9b4e3c          30567  2023-10-01 16:45:30   \n",
-      "3  d4e5f6b7-6e8f-4b9a-8c7d-9e2f3c4b5e6f          40678  2023-10-01 17:30:00   \n",
-      "4  e5f6a7b8-7f9a-4c0a-9e2f-1e3c4b5e6f7a          50789  2023-10-01 18:15:15   \n",
-      "\n",
-      "  country language   device  age     gender     education income_band  \\\n",
-      "0      KE       en  android   29     female      bachelor   upper_mid   \n",
-      "1      UG       sw      web   34       male     secondary   lower_mid   \n",
-      "2      TZ       en      ios   42  nonbinary       diploma        high   \n",
-      "3      RW       sw  android   27     female      bachelor   upper_mid   \n",
-      "4      NG       en      web   36       male  postgraduate        high   \n",
-      "\n",
-      "   completion_seconds  attention_passed  q_quality  q_value  q_ease  \\\n",
-      "0               450.0              True          4        5       4   \n",
-      "1               600.5              True          3        4       3   \n",
-      "2               720.0              True          5        5       5   \n",
-      "3               390.0              True          4        4       4   \n",
-      "4               800.0              True          5        5       5   \n",
-      "\n",
-      "   q_support  nps  is_detractor  \n",
-      "0          5    9         False  \n",
-      "1          4    7         False  \n",
-      "2          5   10         False  \n",
-      "3          4    8         False  \n",
-      "4          5    9         False  \n",
-      "\n",
-      "📈 Data types:\n",
-      "response_id            object\n",
-      "respondent_id           int64\n",
-      "submitted_at           object\n",
-      "country                object\n",
-      "language               object\n",
-      "device                 object\n",
-      "age                     int64\n",
-      "gender                 object\n",
-      "education              object\n",
-      "income_band            object\n",
-      "completion_seconds    float64\n",
-      "attention_passed         bool\n",
-      "q_quality               int64\n",
-      "q_value                 int64\n",
-      "q_ease                  int64\n",
-      "q_support               int64\n",
-      "nps                     int64\n",
-      "is_detractor             bool\n",
-      "dtype: object\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "# Test the fixed LLM generation\n",
     "print(\"🧪 Testing LLM generation...\")\n",
@@ -1196,79 +677,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 43,
+   "execution_count": null,
    "id": "75c90739",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "🧪 Testing the fixed LLM generation...\n",
-      "🔄 Generating 5 survey responses with LLM...\n",
-      "📊 Using max_tokens: 2000 (estimated: 2000)\n",
-      "📝 Raw response length: 2629 characters\n",
-      "🔍 Parsed JSON type: <class 'dict'>\n",
-      "📊 Found data in 'responses': 5 rows\n",
-      "✅ Successfully generated 5 survey responses\n",
-      "\n",
-      "📊 Generated dataset shape: (5, 18)\n",
-      "\n",
-      "📋 First few rows:\n",
-      "                            response_id  respondent_id         submitted_at  \\\n",
-      "0  d8b1c6f3-6f7a-4b4f-9c5f-3a5f8b6e2f1e          12345  2023-10-01 14:30:00   \n",
-      "1  f3a8e3c1-9b4e-4e5e-9c2b-8f5e3c9b1f3d          67890  2023-10-01 15:00:00   \n",
-      "2  c9c8e3f1-2b4f-4a6c-8c2e-2a5f3c8e1f2b          54321  2023-10-01 16:15:00   \n",
-      "3  a5b3c6d2-1e4f-4c5e-9a1f-1f6a7b8e3c9f          98765  2023-10-01 17:45:00   \n",
-      "4  b8f4c3e2-2e4f-4c5e-8a2f-4c5e3b8e2f1a          13579  2023-10-01 18:30:00   \n",
-      "\n",
-      "  country language   device  age     gender     education income_band  \\\n",
-      "0      KE       en  android   29     female      bachelor   upper_mid   \n",
-      "1      UG       sw      web   34       male       diploma   lower_mid   \n",
-      "2      TZ       en      ios   42  nonbinary  postgraduate        high   \n",
-      "3      RW       sw  android   27     female     secondary         low   \n",
-      "4      NG       en      web   55       male      bachelor   upper_mid   \n",
-      "\n",
-      "   completion_seconds  attention_passed  q_quality  q_value  q_ease  \\\n",
-      "0               420.0              True          5        4       4   \n",
-      "1               600.0              True          3        3       2   \n",
-      "2               300.5              True          4        5       4   \n",
-      "3               720.0             False          2        3       3   \n",
-      "4               540.0              True          5        5       5   \n",
-      "\n",
-      "   q_support  nps  is_detractor  \n",
-      "0          5    9         False  \n",
-      "1          4    5         False  \n",
-      "2          5   10         False  \n",
-      "3          2    3          True  \n",
-      "4          5    8         False  \n",
-      "\n",
-      "📈 Data types:\n",
-      "response_id            object\n",
-      "respondent_id           int64\n",
-      "submitted_at           object\n",
-      "country                object\n",
-      "language               object\n",
-      "device                 object\n",
-      "age                     int64\n",
-      "gender                 object\n",
-      "education              object\n",
-      "income_band            object\n",
-      "completion_seconds    float64\n",
-      "attention_passed         bool\n",
-      "q_quality               int64\n",
-      "q_value                 int64\n",
-      "q_ease                  int64\n",
-      "q_support               int64\n",
-      "nps                     int64\n",
-      "is_detractor             bool\n",
-      "dtype: object\n",
-      "\n",
-      "✅ SUCCESS! LLM generation is now working!\n",
-      "📊 Generated 5 survey responses using LLM\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "# Test the fixed implementation\n",
     "print(\"🧪 Testing the fixed LLM generation...\")\n",
@@ -1290,133 +702,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 44,
+   "execution_count": null,
    "id": "dd83b842",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "🚀 Testing larger dataset generation...\n",
-      "🚀 Generating 100 survey responses with adaptive batching\n",
-      "📊 Using optimal batch size: 10\n",
-      "\n",
-      "📦 Processing batch: 10 rows (remaining: 100)\n",
-      "🔄 Generating 10 survey responses with LLM...\n",
-      "📊 Using max_tokens: 3500 (estimated: 3500)\n",
-      "📝 Raw response length: 5238 characters\n",
-      "🔍 Parsed JSON type: <class 'dict'>\n",
-      "📊 Found data in 'responses': 10 rows\n",
-      "✅ Successfully generated 10 survey responses\n",
-      "\n",
-      "📦 Processing batch: 10 rows (remaining: 90)\n",
-      "🔄 Generating 10 survey responses with LLM...\n",
-      "📊 Using max_tokens: 3500 (estimated: 3500)\n",
-      "📝 Raw response length: 5235 characters\n",
-      "🔍 Parsed JSON type: <class 'dict'>\n",
-      "📊 Found data in 'responses': 10 rows\n",
-      "✅ Successfully generated 10 survey responses\n",
-      "\n",
-      "📦 Processing batch: 10 rows (remaining: 80)\n",
-      "🔄 Generating 10 survey responses with LLM...\n",
-      "📊 Using max_tokens: 3500 (estimated: 3500)\n",
-      "📝 Raw response length: 5232 characters\n",
-      "🔍 Parsed JSON type: <class 'dict'>\n",
-      "📊 Found data in 'responses': 10 rows\n",
-      "✅ Successfully generated 10 survey responses\n",
-      "\n",
-      "📦 Processing batch: 10 rows (remaining: 70)\n",
-      "🔄 Generating 10 survey responses with LLM...\n",
-      "📊 Using max_tokens: 3500 (estimated: 3500)\n",
-      "📝 Raw response length: 5239 characters\n",
-      "🔍 Parsed JSON type: <class 'dict'>\n",
-      "📊 Found data in 'responses': 10 rows\n",
-      "✅ Successfully generated 10 survey responses\n",
-      "\n",
-      "📦 Processing batch: 10 rows (remaining: 60)\n",
-      "🔄 Generating 10 survey responses with LLM...\n",
-      "📊 Using max_tokens: 3500 (estimated: 3500)\n",
-      "📝 Raw response length: 5238 characters\n",
-      "🔍 Parsed JSON type: <class 'dict'>\n",
-      "📊 Found data in 'responses': 10 rows\n",
-      "✅ Successfully generated 10 survey responses\n",
-      "\n",
-      "📦 Processing batch: 10 rows (remaining: 50)\n",
-      "🔄 Generating 10 survey responses with LLM...\n",
-      "📊 Using max_tokens: 3500 (estimated: 3500)\n",
-      "📝 Raw response length: 5236 characters\n",
-      "🔍 Parsed JSON type: <class 'dict'>\n",
-      "📊 Found data in 'responses': 10 rows\n",
-      "✅ Successfully generated 10 survey responses\n",
-      "\n",
-      "📦 Processing batch: 10 rows (remaining: 40)\n",
-      "🔄 Generating 10 survey responses with LLM...\n",
-      "📊 Using max_tokens: 3500 (estimated: 3500)\n",
-      "📝 Raw response length: 5229 characters\n",
-      "🔍 Parsed JSON type: <class 'dict'>\n",
-      "📊 Found data in 'responses': 10 rows\n",
-      "✅ Successfully generated 10 survey responses\n",
-      "\n",
-      "📦 Processing batch: 10 rows (remaining: 30)\n",
-      "🔄 Generating 10 survey responses with LLM...\n",
-      "📊 Using max_tokens: 3500 (estimated: 3500)\n",
-      "📝 Raw response length: 5244 characters\n",
-      "🔍 Parsed JSON type: <class 'dict'>\n",
-      "📊 Found data in 'responses': 10 rows\n",
-      "✅ Successfully generated 10 survey responses\n",
-      "\n",
-      "📦 Processing batch: 10 rows (remaining: 20)\n",
-      "🔄 Generating 10 survey responses with LLM...\n",
-      "📊 Using max_tokens: 3500 (estimated: 3500)\n",
-      "📝 Raw response length: 5234 characters\n",
-      "🔍 Parsed JSON type: <class 'dict'>\n",
-      "📊 Found data in 'responses': 10 rows\n",
-      "✅ Successfully generated 10 survey responses\n",
-      "\n",
-      "📦 Processing batch: 10 rows (remaining: 10)\n",
-      "🔄 Generating 10 survey responses with LLM...\n",
-      "📊 Using max_tokens: 3500 (estimated: 3500)\n",
-      "📝 Raw response length: 5238 characters\n",
-      "🔍 Parsed JSON type: <class 'dict'>\n",
-      "📊 Found data in 'responses': 10 rows\n",
-      "✅ Successfully generated 10 survey responses\n",
-      "✅ Generated total: 100 survey responses\n",
-      "\n",
-      "📊 Large dataset shape: (100, 18)\n",
-      "\n",
-      "📈 Summary statistics:\n",
-      "       respondent_id         age  completion_seconds   q_quality     q_value  \\\n",
-      "count     100.000000  100.000000          100.000000  100.000000  100.000000   \n",
-      "mean    33513.700000   34.070000          588.525000    3.740000    3.910000   \n",
-      "std     29233.800863    7.835757          230.530212    1.001211    0.995901   \n",
-      "min     10001.000000   22.000000          120.500000    2.000000    2.000000   \n",
-      "25%     10009.000000   28.000000          420.375000    3.000000    3.000000   \n",
-      "50%     15122.500000   33.000000          600.000000    4.000000    4.000000   \n",
-      "75%     55955.750000   39.250000          720.000000    5.000000    5.000000   \n",
-      "max     98765.000000   50.000000         1500.000000    5.000000    5.000000   \n",
-      "\n",
-      "           q_ease   q_support         nps  \n",
-      "count  100.000000  100.000000  100.000000  \n",
-      "mean     3.900000    3.910000    6.990000  \n",
-      "std      0.937437    0.985706    2.333312  \n",
-      "min      2.000000    2.000000    2.000000  \n",
-      "25%      3.000000    3.000000    5.000000  \n",
-      "50%      4.000000    4.000000    7.000000  \n",
-      "75%      5.000000    5.000000    9.000000  \n",
-      "max      5.000000    5.000000   10.000000  \n",
-      "💾 Saved: data\\survey_llm_fixed_20251023T005139Z.csv\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "C:\\Users\\Joshua\\AppData\\Local\\Temp\\ipykernel_27572\\2716383900.py:12: DeprecationWarning: datetime.datetime.utcnow() is deprecated and scheduled for removal in a future version. Use timezone-aware objects to represent datetimes in UTC: datetime.datetime.now(datetime.UTC).\n",
-      "  ts = datetime.utcnow().strftime(\"%Y%m%dT%H%M%SZ\")\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "#Test larger dataset generation \n",
     "print(\"🚀 Testing larger dataset generation...\")\n",
@@ -1440,15 +729,7 @@
    "execution_count": null,
    "id": "6029d3e2",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "LLM available: True\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "\n",
     "def build_json_schema(CFG):\n",
@@ -1555,203 +836,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": null,
    "id": "2e759087",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "LLM error, fallback to rule-based mock: No JSON array found in model output.\n"
-     ]
-    },
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>response_id</th>\n",
-       "      <th>respondent_id</th>\n",
-       "      <th>submitted_at</th>\n",
-       "      <th>country</th>\n",
-       "      <th>language</th>\n",
-       "      <th>device</th>\n",
-       "      <th>age</th>\n",
-       "      <th>gender</th>\n",
-       "      <th>education</th>\n",
-       "      <th>income_band</th>\n",
-       "      <th>completion_seconds</th>\n",
-       "      <th>attention_passed</th>\n",
-       "      <th>q_quality</th>\n",
-       "      <th>q_value</th>\n",
-       "      <th>q_ease</th>\n",
-       "      <th>q_support</th>\n",
-       "      <th>nps</th>\n",
-       "      <th>is_detractor</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>9e7811bd-27ee-4b7c-9b7a-c98441e337f0</td>\n",
-       "      <td>40160</td>\n",
-       "      <td>2024-08-18 19:10:06</td>\n",
-       "      <td>KE</td>\n",
-       "      <td>sw</td>\n",
-       "      <td>web</td>\n",
-       "      <td>28</td>\n",
-       "      <td>male</td>\n",
-       "      <td>secondary</td>\n",
-       "      <td>lower_mid</td>\n",
-       "      <td>1800.000000</td>\n",
-       "      <td>True</td>\n",
-       "      <td>4</td>\n",
-       "      <td>3</td>\n",
-       "      <td>3</td>\n",
-       "      <td>3</td>\n",
-       "      <td>4</td>\n",
-       "      <td>True</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>85ec8b90-5468-4880-8309-e325da14d877</td>\n",
-       "      <td>55381</td>\n",
-       "      <td>2025-01-24 12:21:13</td>\n",
-       "      <td>TZ</td>\n",
-       "      <td>sw</td>\n",
-       "      <td>ios</td>\n",
-       "      <td>23</td>\n",
-       "      <td>female</td>\n",
-       "      <td>bachelor</td>\n",
-       "      <td>high</td>\n",
-       "      <td>431.412783</td>\n",
-       "      <td>True</td>\n",
-       "      <td>3</td>\n",
-       "      <td>2</td>\n",
-       "      <td>3</td>\n",
-       "      <td>4</td>\n",
-       "      <td>4</td>\n",
-       "      <td>False</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>498dff10-040f-4206-8170-dfce0d5a69f0</td>\n",
-       "      <td>48338</td>\n",
-       "      <td>2025-07-15 22:21:54</td>\n",
-       "      <td>TZ</td>\n",
-       "      <td>en</td>\n",
-       "      <td>ios</td>\n",
-       "      <td>49</td>\n",
-       "      <td>male</td>\n",
-       "      <td>bachelor</td>\n",
-       "      <td>low</td>\n",
-       "      <td>1800.000000</td>\n",
-       "      <td>True</td>\n",
-       "      <td>2</td>\n",
-       "      <td>3</td>\n",
-       "      <td>3</td>\n",
-       "      <td>1</td>\n",
-       "      <td>3</td>\n",
-       "      <td>False</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>ddf11d94-5d6e-4322-9811-4e763f5ed46b</td>\n",
-       "      <td>59925</td>\n",
-       "      <td>2025-01-27 00:16:57</td>\n",
-       "      <td>KE</td>\n",
-       "      <td>en</td>\n",
-       "      <td>web</td>\n",
-       "      <td>22</td>\n",
-       "      <td>male</td>\n",
-       "      <td>bachelor</td>\n",
-       "      <td>upper_mid</td>\n",
-       "      <td>656.050991</td>\n",
-       "      <td>True</td>\n",
-       "      <td>4</td>\n",
-       "      <td>4</td>\n",
-       "      <td>1</td>\n",
-       "      <td>3</td>\n",
-       "      <td>5</td>\n",
-       "      <td>False</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>2ef22a0c-fd13-4798-9276-f43831b8f7bc</td>\n",
-       "      <td>68993</td>\n",
-       "      <td>2024-08-19 04:21:49</td>\n",
-       "      <td>KE</td>\n",
-       "      <td>en</td>\n",
-       "      <td>android</td>\n",
-       "      <td>40</td>\n",
-       "      <td>male</td>\n",
-       "      <td>secondary</td>\n",
-       "      <td>lower_mid</td>\n",
-       "      <td>1553.938944</td>\n",
-       "      <td>True</td>\n",
-       "      <td>2</td>\n",
-       "      <td>2</td>\n",
-       "      <td>5</td>\n",
-       "      <td>1</td>\n",
-       "      <td>5</td>\n",
-       "      <td>False</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "                            response_id  respondent_id         submitted_at  \\\n",
-       "0  9e7811bd-27ee-4b7c-9b7a-c98441e337f0          40160  2024-08-18 19:10:06   \n",
-       "1  85ec8b90-5468-4880-8309-e325da14d877          55381  2025-01-24 12:21:13   \n",
-       "2  498dff10-040f-4206-8170-dfce0d5a69f0          48338  2025-07-15 22:21:54   \n",
-       "3  ddf11d94-5d6e-4322-9811-4e763f5ed46b          59925  2025-01-27 00:16:57   \n",
-       "4  2ef22a0c-fd13-4798-9276-f43831b8f7bc          68993  2024-08-19 04:21:49   \n",
-       "\n",
-       "  country language   device  age  gender  education income_band  \\\n",
-       "0      KE       sw      web   28    male  secondary   lower_mid   \n",
-       "1      TZ       sw      ios   23  female   bachelor        high   \n",
-       "2      TZ       en      ios   49    male   bachelor         low   \n",
-       "3      KE       en      web   22    male   bachelor   upper_mid   \n",
-       "4      KE       en  android   40    male  secondary   lower_mid   \n",
-       "\n",
-       "   completion_seconds  attention_passed  q_quality  q_value  q_ease  \\\n",
-       "0         1800.000000              True          4        3       3   \n",
-       "1          431.412783              True          3        2       3   \n",
-       "2         1800.000000              True          2        3       3   \n",
-       "3          656.050991              True          4        4       1   \n",
-       "4         1553.938944              True          2        2       5   \n",
-       "\n",
-       "   q_support  nps  is_detractor  \n",
-       "0          3    4          True  \n",
-       "1          4    4         False  \n",
-       "2          1    3         False  \n",
-       "3          3    5         False  \n",
-       "4          1    5         False  "
-      ]
-     },
-     "execution_count": 9,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "df_llm = generate_llm(CFG, total_rows=100, batch_size=50)\n",
     "df_llm.head()"
@@ -1759,89 +847,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 46,
+   "execution_count": null,
    "id": "6d4908ad",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "🧪 Testing improved LLM generation with adaptive batching...\n",
-      "\n",
-      "📦 Testing small batch (10 rows)...\n",
-      "🔄 Generating 10 survey responses with LLM...\n",
-      "📊 Using max_tokens: 3500 (estimated: 3500)\n",
-      "📝 Raw response length: 5233 characters\n",
-      "🔍 Parsed JSON type: <class 'dict'>\n",
-      "📊 Found data in 'responses': 10 rows\n",
-      "✅ Successfully generated 10 survey responses\n",
-      "✅ Small batch result: 10 rows\n",
-      "\n",
-      "📦 Testing medium dataset (30 rows) with adaptive batching...\n",
-      "🚀 Generating 30 survey responses with adaptive batching\n",
-      "📊 Using optimal batch size: 15\n",
-      "\n",
-      "📦 Processing batch: 15 rows (remaining: 30)\n",
-      "🔄 Generating 15 survey responses with LLM...\n",
-      "📊 Using max_tokens: 5000 (estimated: 5000)\n",
-      "📝 Raw response length: 7839 characters\n",
-      "🔍 Parsed JSON type: <class 'dict'>\n",
-      "📊 Found data in 'responses': 15 rows\n",
-      "✅ Successfully generated 15 survey responses\n",
-      "\n",
-      "📦 Processing batch: 15 rows (remaining: 15)\n",
-      "🔄 Generating 15 survey responses with LLM...\n",
-      "📊 Using max_tokens: 5000 (estimated: 5000)\n",
-      "📝 Raw response length: 7841 characters\n",
-      "🔍 Parsed JSON type: <class 'dict'>\n",
-      "📊 Found data in 'responses': 15 rows\n",
-      "✅ Successfully generated 15 survey responses\n",
-      "✅ Generated total: 30 survey responses\n",
-      "✅ Medium dataset result: 30 rows\n",
-      "\n",
-      "📊 Dataset shape: (30, 18)\n",
-      "\n",
-      "📋 First few rows:\n",
-      "                            response_id  respondent_id         submitted_at  \\\n",
-      "0  d1e5c4a3-4b1f-4f6b-8f9e-9f1e1f2e3d4c          10001  2023-10-01 14:30:00   \n",
-      "1  c2b1d4a6-7f8e-4c5c-9d8f-1e2c3b4a5e6f          10002  2023-10-01 15:00:00   \n",
-      "2  e3f2c5b7-8a2d-4c8e-9f1b-2c3d4e5f6a7b          10003  2023-10-01 15:30:00   \n",
-      "3  f4a5b6c8-9d3e-4b1f-9f2c-3d4e5f6a7b8c          10004  2023-10-01 16:00:00   \n",
-      "4  g5b6c7d9-0e4f-4b2a-8f3d-4e5f6a7b8c9d          10005  2023-10-01 16:30:00   \n",
-      "\n",
-      "  country language   device  age     gender     education income_band  \\\n",
-      "0      KE       en  android   28     female      bachelor   upper_mid   \n",
-      "1      UG       sw      web   35       male       diploma   lower_mid   \n",
-      "2      TZ       en      ios   42  nonbinary  postgraduate        high   \n",
-      "3      RW       sw      web   29     female     secondary   upper_mid   \n",
-      "4      NG       en  android   50       male      bachelor        high   \n",
-      "\n",
-      "   completion_seconds  attention_passed  q_quality  q_value  q_ease  \\\n",
-      "0               450.0              True          5        4       5   \n",
-      "1               600.0              True          3        2       4   \n",
-      "2               720.0              True          4        5       4   \n",
-      "3               300.0              True          3        3       3   \n",
-      "4               540.0              True          5        5       5   \n",
-      "\n",
-      "   q_support  nps  is_detractor  \n",
-      "0          4    9         False  \n",
-      "1          3    5         False  \n",
-      "2          5   10         False  \n",
-      "3          4    6         False  \n",
-      "4          5   10         False  \n",
-      "💾 Saved: data\\survey_adaptive_batch_20251023T005927Z.csv\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "C:\\Users\\Joshua\\AppData\\Local\\Temp\\ipykernel_27572\\1770033334.py:22: DeprecationWarning: datetime.datetime.utcnow() is deprecated and scheduled for removal in a future version. Use timezone-aware objects to represent datetimes in UTC: datetime.datetime.now(datetime.UTC).\n",
-      "  ts = datetime.utcnow().strftime(\"%Y%m%dT%H%M%SZ\")\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "# Test the improved LLM generation with adaptive batching\n",
     "print(\"🧪 Testing improved LLM generation with adaptive batching...\")\n",

	response_id	respondent_id	submitted_at	country	language	device	age	gender	education	income_band	completion_seconds	attention_passed	q_quality	q_value	q_ease	q_support	nps	is_detractor
0	f099c1b6-a4ae-4fb0-ba98-89a81008c424	71615	2024-04-13 19:02:44	ZA	en	web	47	male	secondary	low	897.995012	True	5	3	1	3	4	True
1	f2e20ad1-1ed1-4e33-8beb-5dd0ba23715b	68564	2024-03-05 23:30:30	KE	en	android	67	female	bachelor	lower_mid	935.607966	True	1	5	2	3	5	False
2	a9345f69-be75-46b9-8cd3-a276ce0a66bd	59689	2024-11-10 03:38:07	RW	sw	android	23	male	bachelor	low	1431.517701	True	5	2	5	5	7	False
3	b4fa8625-d153-4465-ad73-1c4a48eed2f1	20742	2024-11-19 17:40:58	KE	en	ios	68	female	secondary	upper_mid	448.519416	True	5	5	5	3	10	False
4	e0ad4bbc-b576-4913-8786-302f06b5e9f7	63459	2024-07-28 04:23:37	KE	en	ios	34	male	secondary	low	1179.970734	True	3	1	3	3	5	False
	response_id	respondent_id	submitted_at	country	language	device	age	gender	education	income_band	completion_seconds	attention_passed	q_quality	q_value	q_ease	q_support	nps	is_detractor
0	9e7811bd-27ee-4b7c-9b7a-c98441e337f0	40160	2024-08-18 19:10:06	KE	sw	web	28	male	secondary	lower_mid	1800.000000	True	4	3	3	3	4	True
1	85ec8b90-5468-4880-8309-e325da14d877	55381	2025-01-24 12:21:13	TZ	sw	ios	23	female	bachelor	high	431.412783	True	3	2	3	4	4	False
2	498dff10-040f-4206-8170-dfce0d5a69f0	48338	2025-07-15 22:21:54	TZ	en	ios	49	male	bachelor	low	1800.000000	True	2	3	3	1	3	False
3	ddf11d94-5d6e-4322-9811-4e763f5ed46b	59925	2025-01-27 00:16:57	KE	en	web	22	male	bachelor	upper_mid	656.050991	True	4	4	1	3	5	False
4	2ef22a0c-fd13-4798-9276-f43831b8f7bc	68993	2024-08-19 04:21:49	KE	en	android	40	male	secondary	lower_mid	1553.938944	True	2	2	5	1	5	False