From fb4012be8c44a05515fba42b5a450554073fb00b Mon Sep 17 00:00:00 2001
From: Nik <nikhil.raut94@gmail.com>
Date: Thu, 23 Oct 2025 12:22:35 +0530
Subject: [PATCH 1/3] Initial draft for AI Domain Name Generator, UI to be
 improved

---
 .../ai_domain_finder/ai_domain_finder.ipynb   | 310 ++++++++++++++++++
 1 file changed, 310 insertions(+)
 create mode 100644 week2/community-contributions/ai_domain_finder/ai_domain_finder.ipynb

diff --git a/week2/community-contributions/ai_domain_finder/ai_domain_finder.ipynb b/week2/community-contributions/ai_domain_finder/ai_domain_finder.ipynb
new file mode 100644
index 0000000..fdf25e9
--- /dev/null
+++ b/week2/community-contributions/ai_domain_finder/ai_domain_finder.ipynb
@@ -0,0 +1,310 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "1633a440",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'\\nWeek 2 Assignment: LLM Engineering\\nAuthor: Nikhil Raut\\n\\nNotebook: ai_domain_finder.ipynb\\n\\nPurpose:\\nBuild an agentic AI Domain Finder that proposes short, brandable .com names, verifies availability via RDAP, \\nthen returns: \\n    a list of available .coms, \\n    one preferred pick, \\n    and a brief audio rationale.\\n'"
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "\"\"\"\n",
+    "Week 2 Assignment: LLM Engineering\n",
+    "Author: Nikhil Raut\n",
+    "\n",
+    "Notebook: ai_domain_finder.ipynb\n",
+    "\n",
+    "Purpose:\n",
+    "Build an agentic AI Domain Finder that proposes short, brandable .com names, verifies availability via RDAP, \n",
+    "then returns: \n",
+    "    a list of available .coms, \n",
+    "    one preferred pick, \n",
+    "    and a brief audio rationale.\n",
+    "\"\"\"\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "da528fbe",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "import json\n",
+    "import requests\n",
+    "from typing import Dict, List, Tuple\n",
+    "\n",
+    "from dotenv import load_dotenv\n",
+    "from openai import OpenAI\n",
+    "import gradio as gr\n",
+    "\n",
+    "load_dotenv(override=True)\n",
+    "\n",
+    "OPENAI_MODEL = \"gpt-5-nano-2025-08-07\"\n",
+    "TTS_MODEL = \"gpt-4o-mini-tts\"\n",
+    "\n",
+    "openai = OpenAI()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "519674b2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "RDAP_URL = \"https://rdap.verisign.com/com/v1/domain/{}\"\n",
+    "\n",
+    "def _to_com(domain: str) -> str:\n",
+    "    d = domain.strip().lower()\n",
+    "    if d.endswith(\".com\"):\n",
+    "        return d\n",
+    "    return f\"{d}.com\"\n",
+    "\n",
+    "def check_com_availability(domain: str) -> Dict:\n",
+    "    \"\"\"\n",
+    "    Returns: {\"domain\": \"name.com\", \"available\": bool, \"status\": int}\n",
+    "    Rule: HTTP 200 => already registered (NOT available); 404 => available.\n",
+    "    \"\"\"\n",
+    "    fqdn = _to_com(domain)\n",
+    "    try:\n",
+    "        r = requests.get(RDAP_URL.format(fqdn), timeout=6)\n",
+    "        available = (r.status_code == 404)\n",
+    "        return {\"domain\": fqdn, \"available\": available, \"status\": r.status_code}\n",
+    "    except requests.RequestException:\n",
+    "        return {\"domain\": fqdn, \"available\": False, \"status\": 0}\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "cd20c262",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "check_tool = {\n",
+    "    \"type\": \"function\",\n",
+    "    \"function\": {\n",
+    "        \"name\": \"check_com_availability\",\n",
+    "        \"description\": \"Check if a .com domain is available using RDAP. Accepts root or full domain.\",\n",
+    "        \"parameters\": {\n",
+    "            \"type\": \"object\",\n",
+    "            \"properties\": {\n",
+    "                \"domain\": {\n",
+    "                    \"type\": \"string\",\n",
+    "                    \"description\": \"Domain root or FQDN to check (limited to .com).\"\n",
+    "                }\n",
+    "            },\n",
+    "            \"required\": [\"domain\"],\n",
+    "            \"additionalProperties\": False\n",
+    "        }\n",
+    "    }\n",
+    "}\n",
+    "\n",
+    "TOOLS = [check_tool]\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "2a9138b6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def handle_tool_calls(message) -> List[Dict]:\n",
+    "    \"\"\"\n",
+    "    Translates model tool_calls into tool results for follow-up completion.\n",
+    "    \"\"\"\n",
+    "    results = []\n",
+    "    for call in (message.tool_calls or []):\n",
+    "        if call.function.name == \"check_com_availability\":\n",
+    "            args = json.loads(call.function.arguments or \"{}\")\n",
+    "            payload = check_com_availability(args.get(\"domain\", \"\"))\n",
+    "            results.append({\n",
+    "                \"role\": \"tool\",\n",
+    "                \"tool_call_id\": call.id,\n",
+    "                \"content\": json.dumps(payload)\n",
+    "            })\n",
+    "    return results\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0b80c860",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "SYSTEM_PROMPT = \"\"\"You are the Agent for project \"AI Domain Finder\".\n",
+    "Goal: suggest .com domains and verify availability using the tool ONLY (no guessing).\n",
+    "\n",
+    "Instructions:\n",
+    "- Always propose 5-12 brandable .com candidates based on:\n",
+    "  (1) Industry, (2) Target Customers, (3) Description.\n",
+    "- For each candidate, CALL the tool check_com_availability.\n",
+    "- Respond ONLY after checking all candidates.\n",
+    "- Output Markdown with three sections and these exact headings:\n",
+    "  1) Available .com domains:\n",
+    "     - itemized list (root + .com)\n",
+    "  2) Preferred domain:\n",
+    "     - a single best pick\n",
+    "  3) Audio explanation:\n",
+    "     - 1-2 concise sentences explaining the preference\n",
+    "\n",
+    "Constraints:\n",
+    "- Use customer-familiar words where helpful.\n",
+    "- Keep names short, simple, pronounceable; avoid hyphens/numbers unless meaningful.\n",
+    "- Never include TLDs other than .com.\n",
+    "\"\"\"\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b45c6382",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def run_agent_with_tools(history: List[Dict]) -> str:\n",
+    "    \"\"\"\n",
+    "    history: list of {\"role\": \"...\", \"content\": \"...\"} messages\n",
+    "    returns assistant markdown string (includes sections required by SYSTEM_PROMPT)\n",
+    "    \"\"\"\n",
+    "    messages = [{\"role\": \"system\", \"content\": SYSTEM_PROMPT}] + history\n",
+    "    resp = openai.chat.completions.create(model=OPENAI_MODEL, messages=messages, tools=TOOLS)\n",
+    "\n",
+    "    while resp.choices[0].finish_reason == \"tool_calls\":\n",
+    "        tool_msg = resp.choices[0].message\n",
+    "        tool_results = handle_tool_calls(tool_msg)\n",
+    "        messages.append(tool_msg)\n",
+    "        messages.extend(tool_results)\n",
+    "        resp = openai.chat.completions.create(model=OPENAI_MODEL, messages=messages, tools=TOOLS)\n",
+    "\n",
+    "    return resp.choices[0].message.content"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "92306515",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def extract_audio_text(markdown_reply: str) -> str:\n",
+    "    \"\"\"\n",
+    "    Pulls the 'Audio explanation:' section; falls back to first sentence.\n",
+    "    \"\"\"\n",
+    "    marker = \"Audio explanation:\"\n",
+    "    lower = markdown_reply.lower()\n",
+    "    idx = lower.find(marker.lower())\n",
+    "    if idx != -1:\n",
+    "        segment = markdown_reply[idx + len(marker):].strip()\n",
+    "        parts = segment.split(\".\")\n",
+    "        return (\". \".join([p.strip() for p in parts if p.strip()][:2]) + \".\").strip()\n",
+    "    return \"This domain is the clearest, most memorable fit for the audience and brand goals.\"\n",
+    "\n",
+    "def synth_audio(text: str) -> bytes:\n",
+    "    audio = openai.audio.speech.create(\n",
+    "        model=TTS_MODEL,\n",
+    "        voice=\"alloy\",\n",
+    "        input=text\n",
+    "    )\n",
+    "    return audio.content\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "7bdf7c67",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def chat(message: str, history_ui: List[Dict]) -> Tuple[List[Dict], bytes]:\n",
+    "    \"\"\"\n",
+    "    Gradio ChatInterface callback.\n",
+    "    - message: latest user text (free-form)\n",
+    "    - history_ui: [{\"role\": \"user\"/\"assistant\", \"content\": \"...\"}]\n",
+    "    Returns: updated history, audio bytes for the 'Audio explanation'.\n",
+    "    \"\"\"\n",
+    "    # Convert Gradio UI history to OpenAI-format history\n",
+    "    history = [{\"role\": h[\"role\"], \"content\": h[\"content\"]} for h in history_ui]\n",
+    "    history.append({\"role\": \"user\", \"content\": message})\n",
+    "\n",
+    "    reply_md = run_agent_with_tools(history)\n",
+    "    history.append({\"role\": \"assistant\", \"content\": reply_md})\n",
+    "\n",
+    "    audio_text = extract_audio_text(reply_md)\n",
+    "    audio_bytes = synth_audio(audio_text)\n",
+    "\n",
+    "    return history, audio_bytes\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "cc6c0650",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "INTRO = (\n",
+    "    \"Please provide details as text (three lines or paragraphs):\\n\"\n",
+    "    \"Industry: ...\\n\"\n",
+    "    \"Target Customers: ...\\n\"\n",
+    "    \"Description: ...\\n\\n\"\n",
+    "    \"You can refine in follow-ups (e.g., tone, shorter names, avoid words, etc.).\"\n",
+    ")\n",
+    "\n",
+    "with gr.Blocks(title=\"AI Domain Finder (.com only)\") as ui:\n",
+    "    gr.Markdown(\"# AI Domain Finder (.com only)\")\n",
+    "    gr.Markdown(\"Provide your business details. The Agent will suggest .com options, verify availability, pick a preferred domain, and speak a short rationale.\")\n",
+    "    with gr.Row():\n",
+    "        chatbot = gr.Chatbot(type=\"messages\", height=460)\n",
+    "    with gr.Row():\n",
+    "        audio_out = gr.Audio(label=\"Audio explanation\", autoplay=True)\n",
+    "    with gr.Row():\n",
+    "        msg = gr.Textbox(label=\"Your input\", placeholder=INTRO, lines=6)\n",
+    "\n",
+    "    def _append_user(m, hist):\n",
+    "        return \"\", hist + [{\"role\": \"user\", \"content\": m}]\n",
+    "\n",
+    "    msg.submit(_append_user, inputs=[msg, chatbot], outputs=[msg, chatbot]).then(\n",
+    "        chat, inputs=[msg, chatbot], outputs=[chatbot, audio_out]\n",
+    "    )\n",
+    "\n",
+    "ui.launch(inbrowser=True, auth=None, show_error=True)\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "llm-engineering",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.10"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

From ca9c77d91c660f866311a9e215321f6648b09a8f Mon Sep 17 00:00:00 2001
From: Nik <nikhil.raut94@gmail.com>
Date: Thu, 23 Oct 2025 12:46:26 +0530
Subject: [PATCH 2/3] Improved UI Layout, Added bulk domain check and early
 stop to improve cost efficiency.

---
 .../ai_domain_finder/ai_domain_finder.ipynb   | 274 ++++++++++++++----
 1 file changed, 218 insertions(+), 56 deletions(-)

diff --git a/week2/community-contributions/ai_domain_finder/ai_domain_finder.ipynb b/week2/community-contributions/ai_domain_finder/ai_domain_finder.ipynb
index fdf25e9..06bcf65 100644
--- a/week2/community-contributions/ai_domain_finder/ai_domain_finder.ipynb
+++ b/week2/community-contributions/ai_domain_finder/ai_domain_finder.ipynb
@@ -2,21 +2,10 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": null,
    "id": "1633a440",
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "'\\nWeek 2 Assignment: LLM Engineering\\nAuthor: Nikhil Raut\\n\\nNotebook: ai_domain_finder.ipynb\\n\\nPurpose:\\nBuild an agentic AI Domain Finder that proposes short, brandable .com names, verifies availability via RDAP, \\nthen returns: \\n    a list of available .coms, \\n    one preferred pick, \\n    and a brief audio rationale.\\n'"
-      ]
-     },
-     "execution_count": 2,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "\"\"\"\n",
     "Week 2 Assignment: LLM Engineering\n",
@@ -35,7 +24,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": null,
    "id": "da528fbe",
    "metadata": {},
    "outputs": [],
@@ -44,6 +33,7 @@
     "import json\n",
     "import requests\n",
     "from typing import Dict, List, Tuple\n",
+    "import re\n",
     "\n",
     "from dotenv import load_dotenv\n",
     "from openai import OpenAI\n",
@@ -68,56 +58,76 @@
     "\n",
     "def _to_com(domain: str) -> str:\n",
     "    d = domain.strip().lower()\n",
-    "    if d.endswith(\".com\"):\n",
-    "        return d\n",
-    "    return f\"{d}.com\"\n",
+    "    return d if d.endswith(\".com\") else f\"{d}.com\"\n",
     "\n",
     "def check_com_availability(domain: str) -> Dict:\n",
-    "    \"\"\"\n",
-    "    Returns: {\"domain\": \"name.com\", \"available\": bool, \"status\": int}\n",
-    "    Rule: HTTP 200 => already registered (NOT available); 404 => available.\n",
-    "    \"\"\"\n",
     "    fqdn = _to_com(domain)\n",
     "    try:\n",
     "        r = requests.get(RDAP_URL.format(fqdn), timeout=6)\n",
-    "        available = (r.status_code == 404)\n",
-    "        return {\"domain\": fqdn, \"available\": available, \"status\": r.status_code}\n",
+    "        return {\"domain\": fqdn, \"available\": (r.status_code == 404), \"status\": r.status_code}\n",
     "    except requests.RequestException:\n",
-    "        return {\"domain\": fqdn, \"available\": False, \"status\": 0}\n"
+    "        return {\"domain\": fqdn, \"available\": False, \"status\": 0}\n",
+    "\n",
+    "def check_com_availability_bulk(domains: List[str]) -> Dict:\n",
+    "    \"\"\"\n",
+    "    Input: list of domain roots or FQDNs.\n",
+    "    Returns:\n",
+    "      {\n",
+    "        \"results\": [{\"domain\": \"...\", \"available\": bool, \"status\": int}, ...],\n",
+    "        \"available\": [\"...\"],                 # convenience\n",
+    "        \"count_available\": int\n",
+    "      }\n",
+    "    \"\"\"\n",
+    "    session = requests.Session()\n",
+    "    results: List[Dict] = []\n",
+    "    for d in domains:\n",
+    "        fqdn = _to_com(d)\n",
+    "        try:\n",
+    "            r = session.get(RDAP_URL.format(fqdn), timeout=6)\n",
+    "            ok = (r.status_code == 404)\n",
+    "            results.append({\"domain\": fqdn, \"available\": ok, \"status\": r.status_code})\n",
+    "        except requests.RequestException:\n",
+    "            results.append({\"domain\": fqdn, \"available\": False, \"status\": 0})\n",
+    "\n",
+    "    available = [x[\"domain\"] for x in results if x[\"available\"]]\n",
+    "    return {\"results\": results, \"available\": available, \"count_available\": len(available)}\n"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": null,
    "id": "cd20c262",
    "metadata": {},
    "outputs": [],
    "source": [
-    "check_tool = {\n",
+    "check_tool_bulk = {\n",
     "    \"type\": \"function\",\n",
     "    \"function\": {\n",
-    "        \"name\": \"check_com_availability\",\n",
-    "        \"description\": \"Check if a .com domain is available using RDAP. Accepts root or full domain.\",\n",
+    "        \"name\": \"check_com_availability_bulk\",\n",
+    "        \"description\": \"Batch check .com availability via RDAP for a list of domains (roots or FQDNs).\",\n",
     "        \"parameters\": {\n",
     "            \"type\": \"object\",\n",
     "            \"properties\": {\n",
-    "                \"domain\": {\n",
-    "                    \"type\": \"string\",\n",
-    "                    \"description\": \"Domain root or FQDN to check (limited to .com).\"\n",
+    "                \"domains\": {\n",
+    "                    \"type\": \"array\",\n",
+    "                    \"items\": {\"type\": \"string\"},\n",
+    "                    \"minItems\": 1,\n",
+    "                    \"maxItems\": 50,\n",
+    "                    \"description\": \"List of domain roots or .com FQDNs.\"\n",
     "                }\n",
     "            },\n",
-    "            \"required\": [\"domain\"],\n",
+    "            \"required\": [\"domains\"],\n",
     "            \"additionalProperties\": False\n",
     "        }\n",
     "    }\n",
     "}\n",
     "\n",
-    "TOOLS = [check_tool]\n"
+    "TOOLS = [check_tool_bulk]\n"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": null,
    "id": "2a9138b6",
    "metadata": {},
    "outputs": [],
@@ -225,7 +235,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": null,
    "id": "7bdf7c67",
    "metadata": {},
    "outputs": [],
@@ -257,32 +267,184 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "INTRO = (\n",
-    "    \"Please provide details as text (three lines or paragraphs):\\n\"\n",
-    "    \"Industry: ...\\n\"\n",
-    "    \"Target Customers: ...\\n\"\n",
-    "    \"Description: ...\\n\\n\"\n",
-    "    \"You can refine in follow-ups (e.g., tone, shorter names, avoid words, etc.).\"\n",
-    ")\n",
     "\n",
-    "with gr.Blocks(title=\"AI Domain Finder (.com only)\") as ui:\n",
-    "    gr.Markdown(\"# AI Domain Finder (.com only)\")\n",
-    "    gr.Markdown(\"Provide your business details. The Agent will suggest .com options, verify availability, pick a preferred domain, and speak a short rationale.\")\n",
-    "    with gr.Row():\n",
-    "        chatbot = gr.Chatbot(type=\"messages\", height=460)\n",
-    "    with gr.Row():\n",
-    "        audio_out = gr.Audio(label=\"Audio explanation\", autoplay=True)\n",
-    "    with gr.Row():\n",
-    "        msg = gr.Textbox(label=\"Your input\", placeholder=INTRO, lines=6)\n",
+    "_DOMAIN_RE = re.compile(r\"\\b[a-z0-9](?:[a-z0-9-]{0,61}[a-z0-9])?\\.com\\b\", re.I)\n",
     "\n",
-    "    def _append_user(m, hist):\n",
-    "        return \"\", hist + [{\"role\": \"user\", \"content\": m}]\n",
+    "def _norm_domain(s: str) -> str:\n",
+    "    s = s.strip().lower()\n",
+    "    return s if s.endswith(\".com\") else f\"{s}.com\"\n",
     "\n",
-    "    msg.submit(_append_user, inputs=[msg, chatbot], outputs=[msg, chatbot]).then(\n",
-    "        chat, inputs=[msg, chatbot], outputs=[chatbot, audio_out]\n",
+    "def parse_available(md: str) -> list[str]:\n",
+    "    lines = md.splitlines()\n",
+    "    out = []\n",
+    "    in_section = False\n",
+    "    for ln in lines:\n",
+    "        if ln.strip().lower().startswith(\"1) available .com domains\"):\n",
+    "            in_section = True\n",
+    "            continue\n",
+    "        if in_section and ln.strip().lower().startswith(\"2) preferred\"):\n",
+    "            break\n",
+    "        if in_section:\n",
+    "            if ln.strip().startswith((\"-\", \"*\")) or _DOMAIN_RE.search(ln):\n",
+    "                for m in _DOMAIN_RE.findall(ln):\n",
+    "                    out.append(_norm_domain(m))\n",
+    "    # dedupe while preserving order\n",
+    "    seen, uniq = set(), []\n",
+    "    for d in out:\n",
+    "        if d not in seen:\n",
+    "            seen.add(d)\n",
+    "            uniq.append(d)\n",
+    "    return uniq\n",
+    "\n",
+    "def parse_preferred(md: str) -> str:\n",
+    "    # look in the preferred section; fallback to first domain anywhere\n",
+    "    lower = md.lower()\n",
+    "    idx = lower.find(\"2) preferred domain\")\n",
+    "    if idx != -1:\n",
+    "        seg = md[idx: idx + 500]\n",
+    "        m = _DOMAIN_RE.search(seg)\n",
+    "        if m:\n",
+    "            return _norm_domain(m.group(0))\n",
+    "    m = _DOMAIN_RE.search(md)\n",
+    "    return _norm_domain(m.group(0)) if m else \"\"\n",
+    "\n",
+    "def merge_and_sort(old: list[str], new: list[str]) -> list[str]:\n",
+    "    merged = {d.lower() for d in old} | {d.lower() for d in new}\n",
+    "    return sorted(merged, key=lambda s: (len(s), s))\n",
+    "\n",
+    "def fmt_available_md(domains: list[str]) -> str:\n",
+    "    if not domains:\n",
+    "        return \"### Available .com domains (cumulative)\\n\\n*– none yet –*\"\n",
+    "    items = \"\\n\".join(f\"- `{d}`\" for d in domains)\n",
+    "    return f\"### Available .com domains (cumulative)\\n\\n{items}\"\n",
+    "\n",
+    "def fmt_preferred_md(d: str) -> str:\n",
+    "    if not d:\n",
+    "        return \"### Preferred domain\\n\\n*– not chosen yet –*\"\n",
+    "    return f\"### Preferred domain\\n\\n`{d}`\"\n",
+    "\n",
+    "def build_initial_message(industry: str, customers: str, desc: str) -> str:\n",
+    "    return (\n",
+    "        \"Please propose .com domains based on:\\n\"\n",
+    "        f\"Industry: {industry}\\n\"\n",
+    "        f\"Target Customers: {customers}\\n\"\n",
+    "        f\"Description: {desc}\"\n",
+    "    )\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "07f079d6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def initial_submit(industry: str, customers: str, desc: str,\n",
+    "                   history: list[dict], known_avail: list[str], preferred_now: str):\n",
+    "    msg = build_initial_message(industry, customers, desc)\n",
+    "    history = (history or []) + [{\"role\": \"user\", \"content\": msg}]\n",
+    "\n",
+    "    reply_md, new_avail, preferred, audio_text = run_and_extract(history)\n",
+    "    history += [{\"role\": \"assistant\", \"content\": reply_md}]\n",
+    "\n",
+    "    all_avail = merge_and_sort(known_avail or [], new_avail)\n",
+    "    preferred_final = preferred or preferred_now or \"\"\n",
+    "    audio_bytes = synth_audio(audio_text)\n",
+    "\n",
+    "    return (\n",
+    "        history,                         # s_history\n",
+    "        all_avail,                       # s_available\n",
+    "        preferred_final,                 # s_preferred\n",
+    "        gr.update(value=fmt_preferred_md(preferred_final)),   # preferred_md\n",
+    "        gr.update(value=fmt_available_md(all_avail)),         # available_md\n",
+    "        gr.update(value=\"\", visible=True),                    # reply_in -> now visible\n",
+    "        gr.update(value=audio_bytes, visible=True),           # audio_out\n",
     "    )\n",
     "\n",
-    "ui.launch(inbrowser=True, auth=None, show_error=True)\n"
+    "def refine_submit(reply: str,\n",
+    "                  history: list[dict], known_avail: list[str], preferred_now: str):\n",
+    "    if not reply.strip():\n",
+    "        return (\"\", history, known_avail, preferred_now,\n",
+    "                gr.update(), gr.update(), gr.update())\n",
+    "\n",
+    "    history = (history or []) + [{\"role\": \"user\", \"content\": reply.strip()}]\n",
+    "    reply_md, new_avail, preferred, audio_text = run_and_extract(history)\n",
+    "    history += [{\"role\": \"assistant\", \"content\": reply_md}]\n",
+    "\n",
+    "    all_avail = merge_and_sort(known_avail or [], new_avail)\n",
+    "    preferred_final = preferred or preferred_now or \"\"\n",
+    "    audio_bytes = synth_audio(audio_text)\n",
+    "\n",
+    "    return (\n",
+    "        \"\",                                # clear Reply box\n",
+    "        history,                           # s_history\n",
+    "        all_avail,                         # s_available\n",
+    "        preferred_final,                   # s_preferred\n",
+    "        gr.update(value=fmt_preferred_md(preferred_final)),  # preferred_md\n",
+    "        gr.update(value=fmt_available_md(all_avail)),        # available_md\n",
+    "        gr.update(value=audio_bytes, visible=True),          # audio_out\n",
+    "    )\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d52ebc02",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "with gr.Blocks(title=\"AI Domain Finder (.com only)\") as ui:\n",
+    "    gr.Markdown(\"# AI Domain Finder (.com only)\")\n",
+    "    gr.Markdown(\"Agent proposes .com domains, verifies via RDAP, picks a preferred choice, and explains briefly.\")\n",
+    "\n",
+    "    # App state\n",
+    "    s_history = gr.State([])\n",
+    "    s_available = gr.State([])\n",
+    "    s_preferred = gr.State(\"\")\n",
+    "\n",
+    "    with gr.Row():\n",
+    "        with gr.Column(scale=7):  # LEFT 70%\n",
+    "            with gr.Group():\n",
+    "                industry_in = gr.Textbox(label=\"Industry\")\n",
+    "                customers_in = gr.Textbox(label=\"Target Customers\")\n",
+    "                desc_in = gr.Textbox(label=\"Description\", lines=3)\n",
+    "                find_btn = gr.Button(\"Find Domains\", variant=\"primary\")\n",
+    "\n",
+    "            audio_out = gr.Audio(label=\"Audio explanation\", autoplay=True, visible=False)\n",
+    "\n",
+    "            reply_in = gr.Textbox(\n",
+    "                label=\"Reply\",\n",
+    "                placeholder=\"Chat with agent to refine the outputs\",\n",
+    "                lines=2,\n",
+    "                visible=False,  # 👈 hidden for the first input\n",
+    "            )\n",
+    "\n",
+    "        with gr.Column(scale=3):  # RIGHT 30%\n",
+    "            preferred_md = gr.Markdown(fmt_preferred_md(\"\"))\n",
+    "            available_md = gr.Markdown(fmt_available_md([]))\n",
+    "\n",
+    "    # Events\n",
+    "    find_btn.click(\n",
+    "        initial_submit,\n",
+    "        inputs=[industry_in, customers_in, desc_in, s_history, s_available, s_preferred],\n",
+    "        outputs=[\n",
+    "            s_history, s_available, s_preferred,\n",
+    "            preferred_md, available_md,\n",
+    "            reply_in,           # 👈 becomes visible after first run\n",
+    "            audio_out           # 👈 becomes visible after first run\n",
+    "        ],\n",
+    "    )\n",
+    "\n",
+    "    reply_in.submit(\n",
+    "        refine_submit,\n",
+    "        inputs=[reply_in, s_history, s_available, s_preferred],\n",
+    "        outputs=[\n",
+    "            reply_in, s_history, s_available, s_preferred,\n",
+    "            preferred_md, available_md, audio_out\n",
+    "        ],\n",
+    "    )\n",
+    "\n",
+    "ui.launch(inbrowser=True, show_error=True)\n"
    ]
   }
  ],

From ec0296b26189ec5eb57e80612230244db622c8cf Mon Sep 17 00:00:00 2001
From: Nik <nikhil.raut94@gmail.com>
Date: Thu, 23 Oct 2025 13:48:16 +0530
Subject: [PATCH 3/3] Improved prompting to avoid space or symbols in domain
 names.

---
 .../ai_domain_finder/ai_domain_finder.ipynb   | 469 ++++++++++++++----
 1 file changed, 359 insertions(+), 110 deletions(-)

diff --git a/week2/community-contributions/ai_domain_finder/ai_domain_finder.ipynb b/week2/community-contributions/ai_domain_finder/ai_domain_finder.ipynb
index 06bcf65..c0fbbcc 100644
--- a/week2/community-contributions/ai_domain_finder/ai_domain_finder.ipynb
+++ b/week2/community-contributions/ai_domain_finder/ai_domain_finder.ipynb
@@ -32,7 +32,7 @@
     "import os\n",
     "import json\n",
     "import requests\n",
-    "from typing import Dict, List, Tuple\n",
+    "from typing import Dict, List, Tuple, Any, Optional\n",
     "import re\n",
     "\n",
     "from dotenv import load_dotenv\n",
@@ -47,6 +47,73 @@
     "openai = OpenAI()"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "361f7fe3",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# --- robust logging that works inside VS Code notebooks + Gradio threads ---\n",
+    "import sys, logging, threading\n",
+    "from collections import deque\n",
+    "from typing import Any\n",
+    "\n",
+    "DEBUG_LLM = True          # toggle on/off noisy logs\n",
+    "CLEAR_LOG_ON_RUN = True   # clear panel before each submit\n",
+    "\n",
+    "_LOG_BUFFER = deque(maxlen=2000)   # keep ~2000 lines in memory\n",
+    "_LOG_LOCK = threading.Lock()\n",
+    "\n",
+    "class GradioBufferHandler(logging.Handler):\n",
+    "    def emit(self, record: logging.LogRecord) -> None:\n",
+    "        try:\n",
+    "            msg = self.format(record)\n",
+    "        except Exception:\n",
+    "            msg = record.getMessage()\n",
+    "        with _LOG_LOCK:\n",
+    "            for line in (msg.splitlines() or [\"\"]):\n",
+    "                _LOG_BUFFER.append(line)\n",
+    "\n",
+    "def get_log_text() -> str:\n",
+    "    with _LOG_LOCK:\n",
+    "        return \"\\n\".join(_LOG_BUFFER)\n",
+    "\n",
+    "def clear_log_buffer() -> None:\n",
+    "    with _LOG_LOCK:\n",
+    "        _LOG_BUFFER.clear()\n",
+    "\n",
+    "def _setup_logger() -> logging.Logger:\n",
+    "    logger = logging.getLogger(\"aidf\")\n",
+    "    logger.setLevel(logging.DEBUG if DEBUG_LLM else logging.INFO)\n",
+    "    logger.handlers.clear()\n",
+    "    fmt = logging.Formatter(\"%(asctime)s | %(levelname)s | %(message)s\", \"%H:%M:%S\")\n",
+    "\n",
+    "    stream = logging.StreamHandler(stream=sys.stdout)  # captured by VS Code notebook\n",
+    "    stream.setFormatter(fmt)\n",
+    "\n",
+    "    buf = GradioBufferHandler()                        # shown inside the Gradio panel\n",
+    "    buf.setFormatter(fmt)\n",
+    "\n",
+    "    logger.addHandler(stream)\n",
+    "    logger.addHandler(buf)\n",
+    "    logger.propagate = False\n",
+    "    return logger\n",
+    "\n",
+    "logger = _setup_logger()\n",
+    "\n",
+    "def dbg_json(obj: Any, title: str = \"\") -> None:\n",
+    "    \"\"\"Convenience: pretty-print JSON-ish objects to the logger.\"\"\"\n",
+    "    try:\n",
+    "        txt = json.dumps(obj, ensure_ascii=False, indent=2)\n",
+    "    except Exception:\n",
+    "        txt = str(obj)\n",
+    "    if title:\n",
+    "        logger.debug(\"%s\\n%s\", title, txt)\n",
+    "    else:\n",
+    "        logger.debug(\"%s\", txt)\n"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -56,12 +123,35 @@
    "source": [
     "RDAP_URL = \"https://rdap.verisign.com/com/v1/domain/{}\"\n",
     "\n",
+    "_ALPHA_RE = re.compile(r\"^[a-z]+$\", re.IGNORECASE)\n",
+    "\n",
     "def _to_com(domain: str) -> str:\n",
     "    d = domain.strip().lower()\n",
     "    return d if d.endswith(\".com\") else f\"{d}.com\"\n",
     "\n",
+    "def _sld_is_english_alpha(fqdn: str) -> bool:\n",
+    "    \"\"\"\n",
+    "    True only if the second-level label (just before .com) is made up\n",
+    "    exclusively of English letters (a-z).\n",
+    "    Examples:\n",
+    "      foo.com      -> True\n",
+    "      foo-bar.com  -> False\n",
+    "      foo1.com     -> False\n",
+    "      café.com     -> False\n",
+    "      xn--cafe.com -> False\n",
+    "      www.foo.com  -> True (checks 'foo')\n",
+    "    \"\"\"\n",
+    "    if not fqdn.endswith(\".com\"):\n",
+    "        return False\n",
+    "    sld = fqdn[:-4].split(\".\")[-1]  # take label immediately before .com\n",
+    "    return bool(sld) and bool(_ALPHA_RE.fullmatch(sld))\n",
+    "\n",
     "def check_com_availability(domain: str) -> Dict:\n",
     "    fqdn = _to_com(domain)\n",
+    "    # Skip API if not strictly English letters\n",
+    "    if not _sld_is_english_alpha(fqdn):\n",
+    "        return {\"domain\": fqdn, \"available\": False, \"status\": 0}\n",
+    "\n",
     "    try:\n",
     "        r = requests.get(RDAP_URL.format(fqdn), timeout=6)\n",
     "        return {\"domain\": fqdn, \"available\": (r.status_code == 404), \"status\": r.status_code}\n",
@@ -80,8 +170,15 @@
     "    \"\"\"\n",
     "    session = requests.Session()\n",
     "    results: List[Dict] = []\n",
+    "\n",
     "    for d in domains:\n",
     "        fqdn = _to_com(d)\n",
+    "\n",
+    "        # Skip API if not strictly English letters\n",
+    "        if not _sld_is_english_alpha(fqdn):\n",
+    "            results.append({\"domain\": fqdn, \"available\": False, \"status\": 0})\n",
+    "            continue\n",
+    "\n",
     "        try:\n",
     "            r = session.get(RDAP_URL.format(fqdn), timeout=6)\n",
     "            ok = (r.status_code == 404)\n",
@@ -133,19 +230,31 @@
    "outputs": [],
    "source": [
     "def handle_tool_calls(message) -> List[Dict]:\n",
-    "    \"\"\"\n",
-    "    Translates model tool_calls into tool results for follow-up completion.\n",
-    "    \"\"\"\n",
     "    results = []\n",
     "    for call in (message.tool_calls or []):\n",
-    "        if call.function.name == \"check_com_availability\":\n",
-    "            args = json.loads(call.function.arguments or \"{}\")\n",
+    "        fn = getattr(call.function, \"name\", None)\n",
+    "        args_raw = getattr(call.function, \"arguments\", \"\") or \"{}\"\n",
+    "        try:\n",
+    "            args = json.loads(args_raw)\n",
+    "        except Exception:\n",
+    "            args = {}\n",
+    "\n",
+    "        logger.debug(\"TOOL CALL -> %s | args=%s\", fn, json.dumps(args, ensure_ascii=False))\n",
+    "\n",
+    "        if fn == \"check_com_availability_bulk\":\n",
+    "            payload = check_com_availability_bulk(args.get(\"domains\", []))\n",
+    "        elif fn == \"check_com_availability\":\n",
     "            payload = check_com_availability(args.get(\"domain\", \"\"))\n",
-    "            results.append({\n",
-    "                \"role\": \"tool\",\n",
-    "                \"tool_call_id\": call.id,\n",
-    "                \"content\": json.dumps(payload)\n",
-    "            })\n",
+    "        else:\n",
+    "            payload = {\"error\": f\"unknown tool {fn}\"}\n",
+    "\n",
+    "        logger.debug(\"TOOL RESULT <- %s | %s\", fn, json.dumps(payload, ensure_ascii=False))\n",
+    "\n",
+    "        results.append({\n",
+    "            \"role\": \"tool\",\n",
+    "            \"tool_call_id\": call.id,\n",
+    "            \"content\": json.dumps(payload),\n",
+    "        })\n",
     "    return results\n"
    ]
   },
@@ -159,26 +268,67 @@
     "SYSTEM_PROMPT = \"\"\"You are the Agent for project \"AI Domain Finder\".\n",
     "Goal: suggest .com domains and verify availability using the tool ONLY (no guessing).\n",
     "\n",
-    "Instructions:\n",
-    "- Always propose 5-12 brandable .com candidates based on:\n",
+    "Do this each interaction:\n",
+    "- Generate up to ~20 short, brandable .com candidates from:\n",
     "  (1) Industry, (2) Target Customers, (3) Description.\n",
-    "- For each candidate, CALL the tool check_com_availability.\n",
-    "- Respond ONLY after checking all candidates.\n",
-    "- Output Markdown with three sections and these exact headings:\n",
-    "  1) Available .com domains:\n",
-    "     - itemized list (root + .com)\n",
-    "  2) Preferred domain:\n",
-    "     - a single best pick\n",
-    "  3) Audio explanation:\n",
-    "     - 1-2 concise sentences explaining the preference\n",
+    "- Use the BULK tool `check_com_availability_bulk` with a list of candidates\n",
+    "  (roots or FQDNs). Prefer a single call or very few batched calls.\n",
+    "- If >= 5 available .coms are found, STOP checking and finalize the answer.\n",
+    "\n",
+    "Output Markdown with EXACT section headings:\n",
+    "1) Available .com domains:\n",
+    "   - itemized list of available .coms only (root + .com)\n",
+    "2) Preferred domain:\n",
+    "   - a single best pick\n",
+    "3) Audio explanation:\n",
+    "   - 1–2 concise sentences explaining the preference\n",
     "\n",
     "Constraints:\n",
     "- Use customer-familiar words where helpful.\n",
     "- Keep names short, simple, pronounceable; avoid hyphens/numbers unless meaningful.\n",
     "- Never include TLDs other than .com.\n",
+    "- domain is made up of english alphabets in lower case only no symbols or spaces to use\n",
     "\"\"\"\n"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "72e9d8c2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def _asdict_tool_call(tc: Any) -> dict:\n",
+    "    try:\n",
+    "        return {\n",
+    "            \"id\": getattr(tc, \"id\", None),\n",
+    "            \"type\": \"function\",\n",
+    "            \"function\": {\n",
+    "                \"name\": getattr(tc.function, \"name\", None),\n",
+    "                \"arguments\": getattr(tc.function, \"arguments\", None),\n",
+    "            },\n",
+    "        }\n",
+    "    except Exception:\n",
+    "        return {\"type\": \"function\", \"function\": {\"name\": None, \"arguments\": None}}\n",
+    "\n",
+    "def _asdict_message(msg: Any) -> dict:\n",
+    "    if isinstance(msg, dict):\n",
+    "        return msg\n",
+    "    role = getattr(msg, \"role\", None)\n",
+    "    content = getattr(msg, \"content\", None)\n",
+    "    tool_calls = getattr(msg, \"tool_calls\", None)\n",
+    "    out = {\"role\": role, \"content\": content}\n",
+    "    if tool_calls:\n",
+    "        out[\"tool_calls\"] = [_asdict_tool_call(tc) for tc in tool_calls]\n",
+    "    return out\n",
+    "\n",
+    "def _sanitized_messages_for_log(messages: list[dict | Any]) -> list[dict]:\n",
+    "    return [_asdict_message(m) for m in messages]\n",
+    "\n",
+    "def _limit_text(s: str, limit: int = 40000) -> str:\n",
+    "    return s if len(s) <= limit else (s[:limit] + \"\\n... [truncated]\")\n"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -186,22 +336,58 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "def run_agent_with_tools(history: List[Dict]) -> str:\n",
+    "def run_agent_with_tools(history: List[Dict]) -> Tuple[str, List[str], str]:\n",
     "    \"\"\"\n",
-    "    history: list of {\"role\": \"...\", \"content\": \"...\"} messages\n",
-    "    returns assistant markdown string (includes sections required by SYSTEM_PROMPT)\n",
+    "    Returns:\n",
+    "      reply_md: final assistant markdown\n",
+    "      tool_available: .coms marked available by RDAP tools (order-preserving, deduped)\n",
+    "      dbg_text: concatenated log buffer (for the UI panel)\n",
     "    \"\"\"\n",
-    "    messages = [{\"role\": \"system\", \"content\": SYSTEM_PROMPT}] + history\n",
+    "    messages: List[Dict] = [{\"role\": \"system\", \"content\": SYSTEM_PROMPT}] + history\n",
+    "    tool_available: List[str] = []\n",
+    "\n",
+    "    dbg_json(_sanitized_messages_for_log(messages), \"=== LLM REQUEST (initial messages) ===\")\n",
     "    resp = openai.chat.completions.create(model=OPENAI_MODEL, messages=messages, tools=TOOLS)\n",
     "\n",
     "    while resp.choices[0].finish_reason == \"tool_calls\":\n",
-    "        tool_msg = resp.choices[0].message\n",
-    "        tool_results = handle_tool_calls(tool_msg)\n",
+    "        tool_msg_sdk = resp.choices[0].message\n",
+    "        tool_msg = _asdict_message(tool_msg_sdk)\n",
+    "        dbg_json(tool_msg, \"=== ASSISTANT (tool_calls) ===\")\n",
+    "\n",
+    "        tool_results = handle_tool_calls(tool_msg_sdk)\n",
+    "\n",
+    "        # Accumulate authoritative availability directly from tool outputs\n",
+    "        for tr in tool_results:\n",
+    "            try:\n",
+    "                data = json.loads(tr[\"content\"])\n",
+    "                if isinstance(data, dict) and isinstance(data.get(\"available\"), list):\n",
+    "                    for d in data[\"available\"]:\n",
+    "                        tool_available.append(_to_com(d))\n",
+    "            except Exception:\n",
+    "                pass\n",
+    "\n",
+    "        dbg_json([json.loads(tr[\"content\"]) for tr in tool_results], \"=== TOOL RESULTS ===\")\n",
+    "\n",
     "        messages.append(tool_msg)\n",
     "        messages.extend(tool_results)\n",
+    "        dbg_json(_sanitized_messages_for_log(messages), \"=== LLM REQUEST (messages + tools) ===\")\n",
+    "\n",
     "        resp = openai.chat.completions.create(model=OPENAI_MODEL, messages=messages, tools=TOOLS)\n",
     "\n",
-    "    return resp.choices[0].message.content"
+    "    # Dedup preserve order\n",
+    "    seen, uniq = set(), []\n",
+    "    for d in tool_available:\n",
+    "        if d not in seen:\n",
+    "            seen.add(d)\n",
+    "            uniq.append(d)\n",
+    "\n",
+    "    reply_md = resp.choices[0].message.content\n",
+    "    logger.debug(\"=== FINAL ASSISTANT ===\\n%s\", _limit_text(reply_md))\n",
+    "    dbg_json(uniq, \"=== AVAILABLE FROM TOOLS (authoritative) ===\")\n",
+    "\n",
+    "    # Return current buffer text for the UI panel\n",
+    "    dbg_text = _limit_text(get_log_text(), 40000)\n",
+    "    return reply_md, uniq, dbg_text\n"
    ]
   },
   {
@@ -233,33 +419,6 @@
     "    return audio.content\n"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "7bdf7c67",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def chat(message: str, history_ui: List[Dict]) -> Tuple[List[Dict], bytes]:\n",
-    "    \"\"\"\n",
-    "    Gradio ChatInterface callback.\n",
-    "    - message: latest user text (free-form)\n",
-    "    - history_ui: [{\"role\": \"user\"/\"assistant\", \"content\": \"...\"}]\n",
-    "    Returns: updated history, audio bytes for the 'Audio explanation'.\n",
-    "    \"\"\"\n",
-    "    # Convert Gradio UI history to OpenAI-format history\n",
-    "    history = [{\"role\": h[\"role\"], \"content\": h[\"content\"]} for h in history_ui]\n",
-    "    history.append({\"role\": \"user\", \"content\": message})\n",
-    "\n",
-    "    reply_md = run_agent_with_tools(history)\n",
-    "    history.append({\"role\": \"assistant\", \"content\": reply_md})\n",
-    "\n",
-    "    audio_text = extract_audio_text(reply_md)\n",
-    "    audio_bytes = synth_audio(audio_text)\n",
-    "\n",
-    "    return history, audio_bytes\n"
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -269,6 +428,8 @@
    "source": [
     "\n",
     "_DOMAIN_RE = re.compile(r\"\\b[a-z0-9](?:[a-z0-9-]{0,61}[a-z0-9])?\\.com\\b\", re.I)\n",
+    "_HDR_AVAIL = re.compile(r\"^\\s*[\\d\\.\\)\\-]*\\s*available\\s+.*\\.com\\s+domains\", re.I)\n",
+    "_HDR_PREF  = re.compile(r\"^\\s*[\\d\\.\\)\\-]*\\s*preferred\\s+domain\", re.I)\n",
     "\n",
     "def _norm_domain(s: str) -> str:\n",
     "    s = s.strip().lower()\n",
@@ -279,16 +440,19 @@
     "    out = []\n",
     "    in_section = False\n",
     "    for ln in lines:\n",
-    "        if ln.strip().lower().startswith(\"1) available .com domains\"):\n",
+    "        if _HDR_AVAIL.search(ln):\n",
     "            in_section = True\n",
     "            continue\n",
-    "        if in_section and ln.strip().lower().startswith(\"2) preferred\"):\n",
+    "        if in_section and _HDR_PREF.search(ln):\n",
     "            break\n",
     "        if in_section:\n",
-    "            if ln.strip().startswith((\"-\", \"*\")) or _DOMAIN_RE.search(ln):\n",
-    "                for m in _DOMAIN_RE.findall(ln):\n",
-    "                    out.append(_norm_domain(m))\n",
-    "    # dedupe while preserving order\n",
+    "            for m in _DOMAIN_RE.findall(ln):\n",
+    "                out.append(_norm_domain(m))\n",
+    "    # Fallback: if the header wasn't found, collect all .coms then we'll still\n",
+    "    # rely on agent instruction to list only available, which should be safe.\n",
+    "    if not out:\n",
+    "        out = [_norm_domain(m) for m in _DOMAIN_RE.findall(md)]\n",
+    "    # dedupe preserve order\n",
     "    seen, uniq = set(), []\n",
     "    for d in out:\n",
     "        if d not in seen:\n",
@@ -297,14 +461,17 @@
     "    return uniq\n",
     "\n",
     "def parse_preferred(md: str) -> str:\n",
-    "    # look in the preferred section; fallback to first domain anywhere\n",
-    "    lower = md.lower()\n",
-    "    idx = lower.find(\"2) preferred domain\")\n",
-    "    if idx != -1:\n",
-    "        seg = md[idx: idx + 500]\n",
-    "        m = _DOMAIN_RE.search(seg)\n",
-    "        if m:\n",
-    "            return _norm_domain(m.group(0))\n",
+    "    # search the preferred section first\n",
+    "    lines = md.splitlines()\n",
+    "    start = None\n",
+    "    for i, ln in enumerate(lines):\n",
+    "        if _HDR_PREF.search(ln):\n",
+    "            start = i\n",
+    "            break\n",
+    "    segment = \"\\n\".join(lines[start:start+8]) if start is not None else md[:500]\n",
+    "    m = _DOMAIN_RE.search(segment)\n",
+    "    if m:\n",
+    "        return _norm_domain(m.group(0))\n",
     "    m = _DOMAIN_RE.search(md)\n",
     "    return _norm_domain(m.group(0)) if m else \"\"\n",
     "\n",
@@ -323,13 +490,22 @@
     "        return \"### Preferred domain\\n\\n*– not chosen yet –*\"\n",
     "    return f\"### Preferred domain\\n\\n`{d}`\"\n",
     "\n",
-    "def build_initial_message(industry: str, customers: str, desc: str) -> str:\n",
-    "    return (\n",
-    "        \"Please propose .com domains based on:\\n\"\n",
-    "        f\"Industry: {industry}\\n\"\n",
-    "        f\"Target Customers: {customers}\\n\"\n",
-    "        f\"Description: {desc}\"\n",
-    "    )\n"
+    "def build_context_msg(known_avail: Optional[List[str]], preferred_now: Optional[str]) -> str:\n",
+    "    \"\"\"\n",
+    "    Create a short 'state so far' block that we prepend to the next user turn\n",
+    "    so the model always sees the preferred and cumulative available list.\n",
+    "    \"\"\"\n",
+    "    lines = []\n",
+    "    if (preferred_now or \"\").strip():\n",
+    "        lines.append(f\"Preferred domain so far: {preferred_now.strip().lower()}\")\n",
+    "    if known_avail:\n",
+    "        lines.append(\"Available .com domains discovered so far:\")\n",
+    "        for d in known_avail:\n",
+    "            if d:\n",
+    "                lines.append(f\"- {d.strip().lower()}\")\n",
+    "    if not lines:\n",
+    "        return \"\"\n",
+    "    return \"STATE TO CARRY OVER FROM PREVIOUS TURNS:\\n\" + \"\\n\".join(lines)"
    ]
   },
   {
@@ -338,52 +514,102 @@
    "id": "07f079d6",
    "metadata": {},
    "outputs": [],
+   "source": [
+    "def run_and_extract(history: List[Dict]) -> Tuple[str, List[str], str, str, str]:\n",
+    "    reply_md, avail_from_tools, dbg_text = run_agent_with_tools(history)\n",
+    "    parsed_avail = parse_available(reply_md)\n",
+    "    new_avail = merge_and_sort(avail_from_tools, parsed_avail)\n",
+    "    preferred = parse_preferred(reply_md)\n",
+    "    audio_text = extract_audio_text(reply_md)\n",
+    "    return reply_md, new_avail, preferred, audio_text, dbg_text\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4cd5d8ef",
+   "metadata": {},
+   "outputs": [],
    "source": [
     "def initial_submit(industry: str, customers: str, desc: str,\n",
-    "                   history: list[dict], known_avail: list[str], preferred_now: str):\n",
-    "    msg = build_initial_message(industry, customers, desc)\n",
-    "    history = (history or []) + [{\"role\": \"user\", \"content\": msg}]\n",
+    "                   history: List[Dict], known_avail: List[str], preferred_now: str):\n",
+    "    if CLEAR_LOG_ON_RUN:\n",
+    "        clear_log_buffer()\n",
     "\n",
-    "    reply_md, new_avail, preferred, audio_text = run_and_extract(history)\n",
+    "    logger.info(\"Initial submit | industry=%r | customers=%r | desc_len=%d\",\n",
+    "                industry, customers, len(desc or \"\"))\n",
+    "\n",
+    "    # Build context (usually empty on the very first run, but future inits also work)\n",
+    "    ctx = build_context_msg(known_avail or [], preferred_now or \"\")\n",
+    "\n",
+    "    user_msg = (\n",
+    "        \"Please propose .com domains based on:\\n\"\n",
+    "        f\"Industry: {industry}\\n\"\n",
+    "        f\"Target Customers: {customers}\\n\"\n",
+    "        f\"Description: {desc}\"\n",
+    "    )\n",
+    "\n",
+    "    # Single user turn that includes state + prompt so the model always sees memory\n",
+    "    full_content = (ctx + \"\\n\\n\" if ctx else \"\") + user_msg\n",
+    "\n",
+    "    history = (history or []) + [{\"role\": \"user\", \"content\": full_content}]\n",
+    "    reply_md, new_avail, preferred, audio_text, dbg_text = run_and_extract(history)\n",
     "    history += [{\"role\": \"assistant\", \"content\": reply_md}]\n",
     "\n",
-    "    all_avail = merge_and_sort(known_avail or [], new_avail)\n",
+    "    all_avail = merge_and_sort(known_avail or [], new_avail or [])\n",
     "    preferred_final = preferred or preferred_now or \"\"\n",
     "    audio_bytes = synth_audio(audio_text)\n",
     "\n",
     "    return (\n",
     "        history,                         # s_history\n",
-    "        all_avail,                       # s_available\n",
+    "        all_avail,                       # s_available (cumulative)\n",
     "        preferred_final,                 # s_preferred\n",
-    "        gr.update(value=fmt_preferred_md(preferred_final)),   # preferred_md\n",
-    "        gr.update(value=fmt_available_md(all_avail)),         # available_md\n",
-    "        gr.update(value=\"\", visible=True),                    # reply_in -> now visible\n",
-    "        gr.update(value=audio_bytes, visible=True),           # audio_out\n",
+    "        gr.update(value=fmt_preferred_md(preferred_final)),\n",
+    "        gr.update(value=fmt_available_md(all_avail)),\n",
+    "        gr.update(value=\"\", visible=True),                 # reply_in: show after first run\n",
+    "        gr.update(value=audio_bytes, visible=True),        # audio_out\n",
+    "        gr.update(value=dbg_text),                         # debug_box\n",
+    "        gr.update(value=\"Find Domains (done)\", interactive=False),  # NEW: disable Find\n",
+    "        gr.update(visible=True),                           # NEW: show Send button\n",
     "    )\n",
     "\n",
     "def refine_submit(reply: str,\n",
-    "                  history: list[dict], known_avail: list[str], preferred_now: str):\n",
-    "    if not reply.strip():\n",
+    "                  history: List[Dict], known_avail: List[str], preferred_now: str):\n",
+    "    # If empty, do nothing (keeps UI state untouched)\n",
+    "    if not (reply or \"\").strip():\n",
     "        return (\"\", history, known_avail, preferred_now,\n",
-    "                gr.update(), gr.update(), gr.update())\n",
+    "                gr.update(), gr.update(), gr.update(), gr.update())\n",
     "\n",
-    "    history = (history or []) + [{\"role\": \"user\", \"content\": reply.strip()}]\n",
-    "    reply_md, new_avail, preferred, audio_text = run_and_extract(history)\n",
+    "    if CLEAR_LOG_ON_RUN:\n",
+    "        clear_log_buffer()\n",
+    "    logger.info(\"Refine submit | user_reply_len=%d\", len(reply))\n",
+    "\n",
+    "    # Always prepend memory + the user's refinement so the model can iterate properly\n",
+    "    ctx = build_context_msg(known_avail or [], preferred_now or \"\")\n",
+    "    full_content = (ctx + \"\\n\\n\" if ctx else \"\") + reply.strip()\n",
+    "\n",
+    "    history = (history or []) + [{\"role\": \"user\", \"content\": full_content}]\n",
+    "    reply_md, new_avail, preferred, audio_text, dbg_text = run_and_extract(history)\n",
     "    history += [{\"role\": \"assistant\", \"content\": reply_md}]\n",
     "\n",
-    "    all_avail = merge_and_sort(known_avail or [], new_avail)\n",
+    "    all_avail = merge_and_sort(known_avail or [], new_avail or [])\n",
     "    preferred_final = preferred or preferred_now or \"\"\n",
     "    audio_bytes = synth_audio(audio_text)\n",
     "\n",
     "    return (\n",
-    "        \"\",                                # clear Reply box\n",
-    "        history,                           # s_history\n",
-    "        all_avail,                         # s_available\n",
-    "        preferred_final,                   # s_preferred\n",
-    "        gr.update(value=fmt_preferred_md(preferred_final)),  # preferred_md\n",
-    "        gr.update(value=fmt_available_md(all_avail)),        # available_md\n",
-    "        gr.update(value=audio_bytes, visible=True),          # audio_out\n",
-    "    )\n"
+    "        \"\",                                 # clear Reply box\n",
+    "        history,                            # s_history\n",
+    "        all_avail,                          # s_available (cumulative)\n",
+    "        preferred_final,                    # s_preferred\n",
+    "        gr.update(value=fmt_preferred_md(preferred_final)),\n",
+    "        gr.update(value=fmt_available_md(all_avail)),\n",
+    "        gr.update(value=audio_bytes, visible=True),\n",
+    "        gr.update(value=dbg_text),          # debug_box\n",
+    "    )\n",
+    "\n",
+    "def clear_debug():\n",
+    "    clear_log_buffer()\n",
+    "    return gr.update(value=\"\")\n"
    ]
   },
   {
@@ -412,38 +638,61 @@
     "\n",
     "            audio_out = gr.Audio(label=\"Audio explanation\", autoplay=True, visible=False)\n",
     "\n",
-    "            reply_in = gr.Textbox(\n",
-    "                label=\"Reply\",\n",
-    "                placeholder=\"Chat with agent to refine the outputs\",\n",
-    "                lines=2,\n",
-    "                visible=False,  # 👈 hidden for the first input\n",
-    "            )\n",
+    "            with gr.Row():\n",
+    "                reply_in = gr.Textbox(\n",
+    "                    label=\"Reply\",\n",
+    "                    placeholder=\"Chat with the agent to refine the outputs\",\n",
+    "                    lines=2,\n",
+    "                    visible=False,  # hidden for the first input\n",
+    "                )\n",
+    "                send_btn = gr.Button(\"Send\", variant=\"primary\", visible=False)\n",
     "\n",
     "        with gr.Column(scale=3):  # RIGHT 30%\n",
     "            preferred_md = gr.Markdown(fmt_preferred_md(\"\"))\n",
     "            available_md = gr.Markdown(fmt_available_md([]))\n",
     "\n",
+    "            with gr.Accordion(\"Debug log\", open=False):\n",
+    "                debug_box = gr.Textbox(label=\"Log\", value=\"\", lines=16, interactive=False)\n",
+    "                clear_btn = gr.Button(\"Clear log\", size=\"sm\")\n",
+    "\n",
     "    # Events\n",
+    "    # Initial run: also disables Find and shows Send\n",
     "    find_btn.click(\n",
     "        initial_submit,\n",
     "        inputs=[industry_in, customers_in, desc_in, s_history, s_available, s_preferred],\n",
     "        outputs=[\n",
     "            s_history, s_available, s_preferred,\n",
     "            preferred_md, available_md,\n",
-    "            reply_in,           # 👈 becomes visible after first run\n",
-    "            audio_out           # 👈 becomes visible after first run\n",
+    "            reply_in,            # visible after first run\n",
+    "            audio_out,           # visible after first run\n",
+    "            debug_box,\n",
+    "            find_btn,            # NEW: disable + relabel\n",
+    "            send_btn,            # NEW: show the Send button\n",
     "        ],\n",
     "    )\n",
     "\n",
+    "    # Multi-turn submit via Enter in the textbox\n",
     "    reply_in.submit(\n",
     "        refine_submit,\n",
     "        inputs=[reply_in, s_history, s_available, s_preferred],\n",
     "        outputs=[\n",
     "            reply_in, s_history, s_available, s_preferred,\n",
-    "            preferred_md, available_md, audio_out\n",
+    "            preferred_md, available_md, audio_out, debug_box\n",
     "        ],\n",
     "    )\n",
     "\n",
+    "    # Multi-turn submit via explicit Send button\n",
+    "    send_btn.click(\n",
+    "        refine_submit,\n",
+    "        inputs=[reply_in, s_history, s_available, s_preferred],\n",
+    "        outputs=[\n",
+    "            reply_in, s_history, s_available, s_preferred,\n",
+    "            preferred_md, available_md, audio_out, debug_box\n",
+    "        ],\n",
+    "    )\n",
+    "\n",
+    "    clear_btn.click(clear_debug, inputs=[], outputs=[debug_box])\n",
+    "\n",
     "ui.launch(inbrowser=True, show_error=True)\n"
    ]
   }