Initial draft for AI Domain Name Generator, UI to be improved

2025-10-23 12:22:35 +05:30
parent 3a10b6ea95
commit fb4012be8c
1 changed files with 310 additions and 0 deletions
--- a/week2/community-contributions/ai_domain_finder/ai_domain_finder.ipynb
+++ b/week2/community-contributions/ai_domain_finder/ai_domain_finder.ipynb
@@ -0,0 +1,310 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "1633a440",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'\\nWeek 2 Assignment: LLM Engineering\\nAuthor: Nikhil Raut\\n\\nNotebook: ai_domain_finder.ipynb\\n\\nPurpose:\\nBuild an agentic AI Domain Finder that proposes short, brandable .com names, verifies availability via RDAP, \\nthen returns: \\n    a list of available .coms, \\n    one preferred pick, \\n    and a brief audio rationale.\\n'"
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "\"\"\"\n",
+    "Week 2 Assignment: LLM Engineering\n",
+    "Author: Nikhil Raut\n",
+    "\n",
+    "Notebook: ai_domain_finder.ipynb\n",
+    "\n",
+    "Purpose:\n",
+    "Build an agentic AI Domain Finder that proposes short, brandable .com names, verifies availability via RDAP, \n",
+    "then returns: \n",
+    "    a list of available .coms, \n",
+    "    one preferred pick, \n",
+    "    and a brief audio rationale.\n",
+    "\"\"\"\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "da528fbe",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "import json\n",
+    "import requests\n",
+    "from typing import Dict, List, Tuple\n",
+    "\n",
+    "from dotenv import load_dotenv\n",
+    "from openai import OpenAI\n",
+    "import gradio as gr\n",
+    "\n",
+    "load_dotenv(override=True)\n",
+    "\n",
+    "OPENAI_MODEL = \"gpt-5-nano-2025-08-07\"\n",
+    "TTS_MODEL = \"gpt-4o-mini-tts\"\n",
+    "\n",
+    "openai = OpenAI()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "519674b2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "RDAP_URL = \"https://rdap.verisign.com/com/v1/domain/{}\"\n",
+    "\n",
+    "def _to_com(domain: str) -> str:\n",
+    "    d = domain.strip().lower()\n",
+    "    if d.endswith(\".com\"):\n",
+    "        return d\n",
+    "    return f\"{d}.com\"\n",
+    "\n",
+    "def check_com_availability(domain: str) -> Dict:\n",
+    "    \"\"\"\n",
+    "    Returns: {\"domain\": \"name.com\", \"available\": bool, \"status\": int}\n",
+    "    Rule: HTTP 200 => already registered (NOT available); 404 => available.\n",
+    "    \"\"\"\n",
+    "    fqdn = _to_com(domain)\n",
+    "    try:\n",
+    "        r = requests.get(RDAP_URL.format(fqdn), timeout=6)\n",
+    "        available = (r.status_code == 404)\n",
+    "        return {\"domain\": fqdn, \"available\": available, \"status\": r.status_code}\n",
+    "    except requests.RequestException:\n",
+    "        return {\"domain\": fqdn, \"available\": False, \"status\": 0}\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "cd20c262",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "check_tool = {\n",
+    "    \"type\": \"function\",\n",
+    "    \"function\": {\n",
+    "        \"name\": \"check_com_availability\",\n",
+    "        \"description\": \"Check if a .com domain is available using RDAP. Accepts root or full domain.\",\n",
+    "        \"parameters\": {\n",
+    "            \"type\": \"object\",\n",
+    "            \"properties\": {\n",
+    "                \"domain\": {\n",
+    "                    \"type\": \"string\",\n",
+    "                    \"description\": \"Domain root or FQDN to check (limited to .com).\"\n",
+    "                }\n",
+    "            },\n",
+    "            \"required\": [\"domain\"],\n",
+    "            \"additionalProperties\": False\n",
+    "        }\n",
+    "    }\n",
+    "}\n",
+    "\n",
+    "TOOLS = [check_tool]\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "2a9138b6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def handle_tool_calls(message) -> List[Dict]:\n",
+    "    \"\"\"\n",
+    "    Translates model tool_calls into tool results for follow-up completion.\n",
+    "    \"\"\"\n",
+    "    results = []\n",
+    "    for call in (message.tool_calls or []):\n",
+    "        if call.function.name == \"check_com_availability\":\n",
+    "            args = json.loads(call.function.arguments or \"{}\")\n",
+    "            payload = check_com_availability(args.get(\"domain\", \"\"))\n",
+    "            results.append({\n",
+    "                \"role\": \"tool\",\n",
+    "                \"tool_call_id\": call.id,\n",
+    "                \"content\": json.dumps(payload)\n",
+    "            })\n",
+    "    return results\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0b80c860",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "SYSTEM_PROMPT = \"\"\"You are the Agent for project \"AI Domain Finder\".\n",
+    "Goal: suggest .com domains and verify availability using the tool ONLY (no guessing).\n",
+    "\n",
+    "Instructions:\n",
+    "- Always propose 5-12 brandable .com candidates based on:\n",
+    "  (1) Industry, (2) Target Customers, (3) Description.\n",
+    "- For each candidate, CALL the tool check_com_availability.\n",
+    "- Respond ONLY after checking all candidates.\n",
+    "- Output Markdown with three sections and these exact headings:\n",
+    "  1) Available .com domains:\n",
+    "     - itemized list (root + .com)\n",
+    "  2) Preferred domain:\n",
+    "     - a single best pick\n",
+    "  3) Audio explanation:\n",
+    "     - 1-2 concise sentences explaining the preference\n",
+    "\n",
+    "Constraints:\n",
+    "- Use customer-familiar words where helpful.\n",
+    "- Keep names short, simple, pronounceable; avoid hyphens/numbers unless meaningful.\n",
+    "- Never include TLDs other than .com.\n",
+    "\"\"\"\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b45c6382",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def run_agent_with_tools(history: List[Dict]) -> str:\n",
+    "    \"\"\"\n",
+    "    history: list of {\"role\": \"...\", \"content\": \"...\"} messages\n",
+    "    returns assistant markdown string (includes sections required by SYSTEM_PROMPT)\n",
+    "    \"\"\"\n",
+    "    messages = [{\"role\": \"system\", \"content\": SYSTEM_PROMPT}] + history\n",
+    "    resp = openai.chat.completions.create(model=OPENAI_MODEL, messages=messages, tools=TOOLS)\n",
+    "\n",
+    "    while resp.choices[0].finish_reason == \"tool_calls\":\n",
+    "        tool_msg = resp.choices[0].message\n",
+    "        tool_results = handle_tool_calls(tool_msg)\n",
+    "        messages.append(tool_msg)\n",
+    "        messages.extend(tool_results)\n",
+    "        resp = openai.chat.completions.create(model=OPENAI_MODEL, messages=messages, tools=TOOLS)\n",
+    "\n",
+    "    return resp.choices[0].message.content"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "92306515",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def extract_audio_text(markdown_reply: str) -> str:\n",
+    "    \"\"\"\n",
+    "    Pulls the 'Audio explanation:' section; falls back to first sentence.\n",
+    "    \"\"\"\n",
+    "    marker = \"Audio explanation:\"\n",
+    "    lower = markdown_reply.lower()\n",
+    "    idx = lower.find(marker.lower())\n",
+    "    if idx != -1:\n",
+    "        segment = markdown_reply[idx + len(marker):].strip()\n",
+    "        parts = segment.split(\".\")\n",
+    "        return (\". \".join([p.strip() for p in parts if p.strip()][:2]) + \".\").strip()\n",
+    "    return \"This domain is the clearest, most memorable fit for the audience and brand goals.\"\n",
+    "\n",
+    "def synth_audio(text: str) -> bytes:\n",
+    "    audio = openai.audio.speech.create(\n",
+    "        model=TTS_MODEL,\n",
+    "        voice=\"alloy\",\n",
+    "        input=text\n",
+    "    )\n",
+    "    return audio.content\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "7bdf7c67",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def chat(message: str, history_ui: List[Dict]) -> Tuple[List[Dict], bytes]:\n",
+    "    \"\"\"\n",
+    "    Gradio ChatInterface callback.\n",
+    "    - message: latest user text (free-form)\n",
+    "    - history_ui: [{\"role\": \"user\"/\"assistant\", \"content\": \"...\"}]\n",
+    "    Returns: updated history, audio bytes for the 'Audio explanation'.\n",
+    "    \"\"\"\n",
+    "    # Convert Gradio UI history to OpenAI-format history\n",
+    "    history = [{\"role\": h[\"role\"], \"content\": h[\"content\"]} for h in history_ui]\n",
+    "    history.append({\"role\": \"user\", \"content\": message})\n",
+    "\n",
+    "    reply_md = run_agent_with_tools(history)\n",
+    "    history.append({\"role\": \"assistant\", \"content\": reply_md})\n",
+    "\n",
+    "    audio_text = extract_audio_text(reply_md)\n",
+    "    audio_bytes = synth_audio(audio_text)\n",
+    "\n",
+    "    return history, audio_bytes\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "cc6c0650",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "INTRO = (\n",
+    "    \"Please provide details as text (three lines or paragraphs):\\n\"\n",
+    "    \"Industry: ...\\n\"\n",
+    "    \"Target Customers: ...\\n\"\n",
+    "    \"Description: ...\\n\\n\"\n",
+    "    \"You can refine in follow-ups (e.g., tone, shorter names, avoid words, etc.).\"\n",
+    ")\n",
+    "\n",
+    "with gr.Blocks(title=\"AI Domain Finder (.com only)\") as ui:\n",
+    "    gr.Markdown(\"# AI Domain Finder (.com only)\")\n",
+    "    gr.Markdown(\"Provide your business details. The Agent will suggest .com options, verify availability, pick a preferred domain, and speak a short rationale.\")\n",
+    "    with gr.Row():\n",
+    "        chatbot = gr.Chatbot(type=\"messages\", height=460)\n",
+    "    with gr.Row():\n",
+    "        audio_out = gr.Audio(label=\"Audio explanation\", autoplay=True)\n",
+    "    with gr.Row():\n",
+    "        msg = gr.Textbox(label=\"Your input\", placeholder=INTRO, lines=6)\n",
+    "\n",
+    "    def _append_user(m, hist):\n",
+    "        return \"\", hist + [{\"role\": \"user\", \"content\": m}]\n",
+    "\n",
+    "    msg.submit(_append_user, inputs=[msg, chatbot], outputs=[msg, chatbot]).then(\n",
+    "        chat, inputs=[msg, chatbot], outputs=[chatbot, audio_out]\n",
+    "    )\n",
+    "\n",
+    "ui.launch(inbrowser=True, auth=None, show_error=True)\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "llm-engineering",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.10"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}