diff --git a/week2/community-contributions/ai_domain_finder/ai_domain_finder.ipynb b/week2/community-contributions/ai_domain_finder/ai_domain_finder.ipynb index fdf25e9..06bcf65 100644 --- a/week2/community-contributions/ai_domain_finder/ai_domain_finder.ipynb +++ b/week2/community-contributions/ai_domain_finder/ai_domain_finder.ipynb @@ -2,21 +2,10 @@ "cells": [ { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "id": "1633a440", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'\\nWeek 2 Assignment: LLM Engineering\\nAuthor: Nikhil Raut\\n\\nNotebook: ai_domain_finder.ipynb\\n\\nPurpose:\\nBuild an agentic AI Domain Finder that proposes short, brandable .com names, verifies availability via RDAP, \\nthen returns: \\n a list of available .coms, \\n one preferred pick, \\n and a brief audio rationale.\\n'" - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "\"\"\"\n", "Week 2 Assignment: LLM Engineering\n", @@ -35,7 +24,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "id": "da528fbe", "metadata": {}, "outputs": [], @@ -44,6 +33,7 @@ "import json\n", "import requests\n", "from typing import Dict, List, Tuple\n", + "import re\n", "\n", "from dotenv import load_dotenv\n", "from openai import OpenAI\n", @@ -68,56 +58,76 @@ "\n", "def _to_com(domain: str) -> str:\n", " d = domain.strip().lower()\n", - " if d.endswith(\".com\"):\n", - " return d\n", - " return f\"{d}.com\"\n", + " return d if d.endswith(\".com\") else f\"{d}.com\"\n", "\n", "def check_com_availability(domain: str) -> Dict:\n", - " \"\"\"\n", - " Returns: {\"domain\": \"name.com\", \"available\": bool, \"status\": int}\n", - " Rule: HTTP 200 => already registered (NOT available); 404 => available.\n", - " \"\"\"\n", " fqdn = _to_com(domain)\n", " try:\n", " r = requests.get(RDAP_URL.format(fqdn), timeout=6)\n", - " available = (r.status_code == 404)\n", - " return {\"domain\": fqdn, \"available\": available, \"status\": r.status_code}\n", + " return {\"domain\": fqdn, \"available\": (r.status_code == 404), \"status\": r.status_code}\n", " except requests.RequestException:\n", - " return {\"domain\": fqdn, \"available\": False, \"status\": 0}\n" + " return {\"domain\": fqdn, \"available\": False, \"status\": 0}\n", + "\n", + "def check_com_availability_bulk(domains: List[str]) -> Dict:\n", + " \"\"\"\n", + " Input: list of domain roots or FQDNs.\n", + " Returns:\n", + " {\n", + " \"results\": [{\"domain\": \"...\", \"available\": bool, \"status\": int}, ...],\n", + " \"available\": [\"...\"], # convenience\n", + " \"count_available\": int\n", + " }\n", + " \"\"\"\n", + " session = requests.Session()\n", + " results: List[Dict] = []\n", + " for d in domains:\n", + " fqdn = _to_com(d)\n", + " try:\n", + " r = session.get(RDAP_URL.format(fqdn), timeout=6)\n", + " ok = (r.status_code == 404)\n", + " results.append({\"domain\": fqdn, \"available\": ok, \"status\": r.status_code})\n", + " except requests.RequestException:\n", + " results.append({\"domain\": fqdn, \"available\": False, \"status\": 0})\n", + "\n", + " available = [x[\"domain\"] for x in results if x[\"available\"]]\n", + " return {\"results\": results, \"available\": available, \"count_available\": len(available)}\n" ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "id": "cd20c262", "metadata": {}, "outputs": [], "source": [ - "check_tool = {\n", + "check_tool_bulk = {\n", " \"type\": \"function\",\n", " \"function\": {\n", - " \"name\": \"check_com_availability\",\n", - " \"description\": \"Check if a .com domain is available using RDAP. Accepts root or full domain.\",\n", + " \"name\": \"check_com_availability_bulk\",\n", + " \"description\": \"Batch check .com availability via RDAP for a list of domains (roots or FQDNs).\",\n", " \"parameters\": {\n", " \"type\": \"object\",\n", " \"properties\": {\n", - " \"domain\": {\n", - " \"type\": \"string\",\n", - " \"description\": \"Domain root or FQDN to check (limited to .com).\"\n", + " \"domains\": {\n", + " \"type\": \"array\",\n", + " \"items\": {\"type\": \"string\"},\n", + " \"minItems\": 1,\n", + " \"maxItems\": 50,\n", + " \"description\": \"List of domain roots or .com FQDNs.\"\n", " }\n", " },\n", - " \"required\": [\"domain\"],\n", + " \"required\": [\"domains\"],\n", " \"additionalProperties\": False\n", " }\n", " }\n", "}\n", "\n", - "TOOLS = [check_tool]\n" + "TOOLS = [check_tool_bulk]\n" ] }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "id": "2a9138b6", "metadata": {}, "outputs": [], @@ -225,7 +235,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "id": "7bdf7c67", "metadata": {}, "outputs": [], @@ -257,32 +267,184 @@ "metadata": {}, "outputs": [], "source": [ - "INTRO = (\n", - " \"Please provide details as text (three lines or paragraphs):\\n\"\n", - " \"Industry: ...\\n\"\n", - " \"Target Customers: ...\\n\"\n", - " \"Description: ...\\n\\n\"\n", - " \"You can refine in follow-ups (e.g., tone, shorter names, avoid words, etc.).\"\n", - ")\n", "\n", - "with gr.Blocks(title=\"AI Domain Finder (.com only)\") as ui:\n", - " gr.Markdown(\"# AI Domain Finder (.com only)\")\n", - " gr.Markdown(\"Provide your business details. The Agent will suggest .com options, verify availability, pick a preferred domain, and speak a short rationale.\")\n", - " with gr.Row():\n", - " chatbot = gr.Chatbot(type=\"messages\", height=460)\n", - " with gr.Row():\n", - " audio_out = gr.Audio(label=\"Audio explanation\", autoplay=True)\n", - " with gr.Row():\n", - " msg = gr.Textbox(label=\"Your input\", placeholder=INTRO, lines=6)\n", + "_DOMAIN_RE = re.compile(r\"\\b[a-z0-9](?:[a-z0-9-]{0,61}[a-z0-9])?\\.com\\b\", re.I)\n", "\n", - " def _append_user(m, hist):\n", - " return \"\", hist + [{\"role\": \"user\", \"content\": m}]\n", + "def _norm_domain(s: str) -> str:\n", + " s = s.strip().lower()\n", + " return s if s.endswith(\".com\") else f\"{s}.com\"\n", "\n", - " msg.submit(_append_user, inputs=[msg, chatbot], outputs=[msg, chatbot]).then(\n", - " chat, inputs=[msg, chatbot], outputs=[chatbot, audio_out]\n", + "def parse_available(md: str) -> list[str]:\n", + " lines = md.splitlines()\n", + " out = []\n", + " in_section = False\n", + " for ln in lines:\n", + " if ln.strip().lower().startswith(\"1) available .com domains\"):\n", + " in_section = True\n", + " continue\n", + " if in_section and ln.strip().lower().startswith(\"2) preferred\"):\n", + " break\n", + " if in_section:\n", + " if ln.strip().startswith((\"-\", \"*\")) or _DOMAIN_RE.search(ln):\n", + " for m in _DOMAIN_RE.findall(ln):\n", + " out.append(_norm_domain(m))\n", + " # dedupe while preserving order\n", + " seen, uniq = set(), []\n", + " for d in out:\n", + " if d not in seen:\n", + " seen.add(d)\n", + " uniq.append(d)\n", + " return uniq\n", + "\n", + "def parse_preferred(md: str) -> str:\n", + " # look in the preferred section; fallback to first domain anywhere\n", + " lower = md.lower()\n", + " idx = lower.find(\"2) preferred domain\")\n", + " if idx != -1:\n", + " seg = md[idx: idx + 500]\n", + " m = _DOMAIN_RE.search(seg)\n", + " if m:\n", + " return _norm_domain(m.group(0))\n", + " m = _DOMAIN_RE.search(md)\n", + " return _norm_domain(m.group(0)) if m else \"\"\n", + "\n", + "def merge_and_sort(old: list[str], new: list[str]) -> list[str]:\n", + " merged = {d.lower() for d in old} | {d.lower() for d in new}\n", + " return sorted(merged, key=lambda s: (len(s), s))\n", + "\n", + "def fmt_available_md(domains: list[str]) -> str:\n", + " if not domains:\n", + " return \"### Available .com domains (cumulative)\\n\\n*– none yet –*\"\n", + " items = \"\\n\".join(f\"- `{d}`\" for d in domains)\n", + " return f\"### Available .com domains (cumulative)\\n\\n{items}\"\n", + "\n", + "def fmt_preferred_md(d: str) -> str:\n", + " if not d:\n", + " return \"### Preferred domain\\n\\n*– not chosen yet –*\"\n", + " return f\"### Preferred domain\\n\\n`{d}`\"\n", + "\n", + "def build_initial_message(industry: str, customers: str, desc: str) -> str:\n", + " return (\n", + " \"Please propose .com domains based on:\\n\"\n", + " f\"Industry: {industry}\\n\"\n", + " f\"Target Customers: {customers}\\n\"\n", + " f\"Description: {desc}\"\n", + " )\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "07f079d6", + "metadata": {}, + "outputs": [], + "source": [ + "def initial_submit(industry: str, customers: str, desc: str,\n", + " history: list[dict], known_avail: list[str], preferred_now: str):\n", + " msg = build_initial_message(industry, customers, desc)\n", + " history = (history or []) + [{\"role\": \"user\", \"content\": msg}]\n", + "\n", + " reply_md, new_avail, preferred, audio_text = run_and_extract(history)\n", + " history += [{\"role\": \"assistant\", \"content\": reply_md}]\n", + "\n", + " all_avail = merge_and_sort(known_avail or [], new_avail)\n", + " preferred_final = preferred or preferred_now or \"\"\n", + " audio_bytes = synth_audio(audio_text)\n", + "\n", + " return (\n", + " history, # s_history\n", + " all_avail, # s_available\n", + " preferred_final, # s_preferred\n", + " gr.update(value=fmt_preferred_md(preferred_final)), # preferred_md\n", + " gr.update(value=fmt_available_md(all_avail)), # available_md\n", + " gr.update(value=\"\", visible=True), # reply_in -> now visible\n", + " gr.update(value=audio_bytes, visible=True), # audio_out\n", " )\n", "\n", - "ui.launch(inbrowser=True, auth=None, show_error=True)\n" + "def refine_submit(reply: str,\n", + " history: list[dict], known_avail: list[str], preferred_now: str):\n", + " if not reply.strip():\n", + " return (\"\", history, known_avail, preferred_now,\n", + " gr.update(), gr.update(), gr.update())\n", + "\n", + " history = (history or []) + [{\"role\": \"user\", \"content\": reply.strip()}]\n", + " reply_md, new_avail, preferred, audio_text = run_and_extract(history)\n", + " history += [{\"role\": \"assistant\", \"content\": reply_md}]\n", + "\n", + " all_avail = merge_and_sort(known_avail or [], new_avail)\n", + " preferred_final = preferred or preferred_now or \"\"\n", + " audio_bytes = synth_audio(audio_text)\n", + "\n", + " return (\n", + " \"\", # clear Reply box\n", + " history, # s_history\n", + " all_avail, # s_available\n", + " preferred_final, # s_preferred\n", + " gr.update(value=fmt_preferred_md(preferred_final)), # preferred_md\n", + " gr.update(value=fmt_available_md(all_avail)), # available_md\n", + " gr.update(value=audio_bytes, visible=True), # audio_out\n", + " )\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d52ebc02", + "metadata": {}, + "outputs": [], + "source": [ + "with gr.Blocks(title=\"AI Domain Finder (.com only)\") as ui:\n", + " gr.Markdown(\"# AI Domain Finder (.com only)\")\n", + " gr.Markdown(\"Agent proposes .com domains, verifies via RDAP, picks a preferred choice, and explains briefly.\")\n", + "\n", + " # App state\n", + " s_history = gr.State([])\n", + " s_available = gr.State([])\n", + " s_preferred = gr.State(\"\")\n", + "\n", + " with gr.Row():\n", + " with gr.Column(scale=7): # LEFT 70%\n", + " with gr.Group():\n", + " industry_in = gr.Textbox(label=\"Industry\")\n", + " customers_in = gr.Textbox(label=\"Target Customers\")\n", + " desc_in = gr.Textbox(label=\"Description\", lines=3)\n", + " find_btn = gr.Button(\"Find Domains\", variant=\"primary\")\n", + "\n", + " audio_out = gr.Audio(label=\"Audio explanation\", autoplay=True, visible=False)\n", + "\n", + " reply_in = gr.Textbox(\n", + " label=\"Reply\",\n", + " placeholder=\"Chat with agent to refine the outputs\",\n", + " lines=2,\n", + " visible=False, # 👈 hidden for the first input\n", + " )\n", + "\n", + " with gr.Column(scale=3): # RIGHT 30%\n", + " preferred_md = gr.Markdown(fmt_preferred_md(\"\"))\n", + " available_md = gr.Markdown(fmt_available_md([]))\n", + "\n", + " # Events\n", + " find_btn.click(\n", + " initial_submit,\n", + " inputs=[industry_in, customers_in, desc_in, s_history, s_available, s_preferred],\n", + " outputs=[\n", + " s_history, s_available, s_preferred,\n", + " preferred_md, available_md,\n", + " reply_in, # 👈 becomes visible after first run\n", + " audio_out # 👈 becomes visible after first run\n", + " ],\n", + " )\n", + "\n", + " reply_in.submit(\n", + " refine_submit,\n", + " inputs=[reply_in, s_history, s_available, s_preferred],\n", + " outputs=[\n", + " reply_in, s_history, s_available, s_preferred,\n", + " preferred_md, available_md, audio_out\n", + " ],\n", + " )\n", + "\n", + "ui.launch(inbrowser=True, show_error=True)\n" ] } ],