diff --git a/week2/community-contributions/ai_domain_finder/ai_domain_finder.ipynb b/week2/community-contributions/ai_domain_finder/ai_domain_finder.ipynb new file mode 100644 index 0000000..fdf25e9 --- /dev/null +++ b/week2/community-contributions/ai_domain_finder/ai_domain_finder.ipynb @@ -0,0 +1,310 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 2, + "id": "1633a440", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'\\nWeek 2 Assignment: LLM Engineering\\nAuthor: Nikhil Raut\\n\\nNotebook: ai_domain_finder.ipynb\\n\\nPurpose:\\nBuild an agentic AI Domain Finder that proposes short, brandable .com names, verifies availability via RDAP, \\nthen returns: \\n a list of available .coms, \\n one preferred pick, \\n and a brief audio rationale.\\n'" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "\"\"\"\n", + "Week 2 Assignment: LLM Engineering\n", + "Author: Nikhil Raut\n", + "\n", + "Notebook: ai_domain_finder.ipynb\n", + "\n", + "Purpose:\n", + "Build an agentic AI Domain Finder that proposes short, brandable .com names, verifies availability via RDAP, \n", + "then returns: \n", + " a list of available .coms, \n", + " one preferred pick, \n", + " and a brief audio rationale.\n", + "\"\"\"\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "da528fbe", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import json\n", + "import requests\n", + "from typing import Dict, List, Tuple\n", + "\n", + "from dotenv import load_dotenv\n", + "from openai import OpenAI\n", + "import gradio as gr\n", + "\n", + "load_dotenv(override=True)\n", + "\n", + "OPENAI_MODEL = \"gpt-5-nano-2025-08-07\"\n", + "TTS_MODEL = \"gpt-4o-mini-tts\"\n", + "\n", + "openai = OpenAI()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "519674b2", + "metadata": {}, + "outputs": [], + "source": [ + "RDAP_URL = \"https://rdap.verisign.com/com/v1/domain/{}\"\n", + "\n", + "def _to_com(domain: str) -> str:\n", + " d = domain.strip().lower()\n", + " if d.endswith(\".com\"):\n", + " return d\n", + " return f\"{d}.com\"\n", + "\n", + "def check_com_availability(domain: str) -> Dict:\n", + " \"\"\"\n", + " Returns: {\"domain\": \"name.com\", \"available\": bool, \"status\": int}\n", + " Rule: HTTP 200 => already registered (NOT available); 404 => available.\n", + " \"\"\"\n", + " fqdn = _to_com(domain)\n", + " try:\n", + " r = requests.get(RDAP_URL.format(fqdn), timeout=6)\n", + " available = (r.status_code == 404)\n", + " return {\"domain\": fqdn, \"available\": available, \"status\": r.status_code}\n", + " except requests.RequestException:\n", + " return {\"domain\": fqdn, \"available\": False, \"status\": 0}\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "cd20c262", + "metadata": {}, + "outputs": [], + "source": [ + "check_tool = {\n", + " \"type\": \"function\",\n", + " \"function\": {\n", + " \"name\": \"check_com_availability\",\n", + " \"description\": \"Check if a .com domain is available using RDAP. Accepts root or full domain.\",\n", + " \"parameters\": {\n", + " \"type\": \"object\",\n", + " \"properties\": {\n", + " \"domain\": {\n", + " \"type\": \"string\",\n", + " \"description\": \"Domain root or FQDN to check (limited to .com).\"\n", + " }\n", + " },\n", + " \"required\": [\"domain\"],\n", + " \"additionalProperties\": False\n", + " }\n", + " }\n", + "}\n", + "\n", + "TOOLS = [check_tool]\n" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "2a9138b6", + "metadata": {}, + "outputs": [], + "source": [ + "def handle_tool_calls(message) -> List[Dict]:\n", + " \"\"\"\n", + " Translates model tool_calls into tool results for follow-up completion.\n", + " \"\"\"\n", + " results = []\n", + " for call in (message.tool_calls or []):\n", + " if call.function.name == \"check_com_availability\":\n", + " args = json.loads(call.function.arguments or \"{}\")\n", + " payload = check_com_availability(args.get(\"domain\", \"\"))\n", + " results.append({\n", + " \"role\": \"tool\",\n", + " \"tool_call_id\": call.id,\n", + " \"content\": json.dumps(payload)\n", + " })\n", + " return results\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0b80c860", + "metadata": {}, + "outputs": [], + "source": [ + "SYSTEM_PROMPT = \"\"\"You are the Agent for project \"AI Domain Finder\".\n", + "Goal: suggest .com domains and verify availability using the tool ONLY (no guessing).\n", + "\n", + "Instructions:\n", + "- Always propose 5-12 brandable .com candidates based on:\n", + " (1) Industry, (2) Target Customers, (3) Description.\n", + "- For each candidate, CALL the tool check_com_availability.\n", + "- Respond ONLY after checking all candidates.\n", + "- Output Markdown with three sections and these exact headings:\n", + " 1) Available .com domains:\n", + " - itemized list (root + .com)\n", + " 2) Preferred domain:\n", + " - a single best pick\n", + " 3) Audio explanation:\n", + " - 1-2 concise sentences explaining the preference\n", + "\n", + "Constraints:\n", + "- Use customer-familiar words where helpful.\n", + "- Keep names short, simple, pronounceable; avoid hyphens/numbers unless meaningful.\n", + "- Never include TLDs other than .com.\n", + "\"\"\"\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b45c6382", + "metadata": {}, + "outputs": [], + "source": [ + "def run_agent_with_tools(history: List[Dict]) -> str:\n", + " \"\"\"\n", + " history: list of {\"role\": \"...\", \"content\": \"...\"} messages\n", + " returns assistant markdown string (includes sections required by SYSTEM_PROMPT)\n", + " \"\"\"\n", + " messages = [{\"role\": \"system\", \"content\": SYSTEM_PROMPT}] + history\n", + " resp = openai.chat.completions.create(model=OPENAI_MODEL, messages=messages, tools=TOOLS)\n", + "\n", + " while resp.choices[0].finish_reason == \"tool_calls\":\n", + " tool_msg = resp.choices[0].message\n", + " tool_results = handle_tool_calls(tool_msg)\n", + " messages.append(tool_msg)\n", + " messages.extend(tool_results)\n", + " resp = openai.chat.completions.create(model=OPENAI_MODEL, messages=messages, tools=TOOLS)\n", + "\n", + " return resp.choices[0].message.content" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "92306515", + "metadata": {}, + "outputs": [], + "source": [ + "def extract_audio_text(markdown_reply: str) -> str:\n", + " \"\"\"\n", + " Pulls the 'Audio explanation:' section; falls back to first sentence.\n", + " \"\"\"\n", + " marker = \"Audio explanation:\"\n", + " lower = markdown_reply.lower()\n", + " idx = lower.find(marker.lower())\n", + " if idx != -1:\n", + " segment = markdown_reply[idx + len(marker):].strip()\n", + " parts = segment.split(\".\")\n", + " return (\". \".join([p.strip() for p in parts if p.strip()][:2]) + \".\").strip()\n", + " return \"This domain is the clearest, most memorable fit for the audience and brand goals.\"\n", + "\n", + "def synth_audio(text: str) -> bytes:\n", + " audio = openai.audio.speech.create(\n", + " model=TTS_MODEL,\n", + " voice=\"alloy\",\n", + " input=text\n", + " )\n", + " return audio.content\n" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "7bdf7c67", + "metadata": {}, + "outputs": [], + "source": [ + "def chat(message: str, history_ui: List[Dict]) -> Tuple[List[Dict], bytes]:\n", + " \"\"\"\n", + " Gradio ChatInterface callback.\n", + " - message: latest user text (free-form)\n", + " - history_ui: [{\"role\": \"user\"/\"assistant\", \"content\": \"...\"}]\n", + " Returns: updated history, audio bytes for the 'Audio explanation'.\n", + " \"\"\"\n", + " # Convert Gradio UI history to OpenAI-format history\n", + " history = [{\"role\": h[\"role\"], \"content\": h[\"content\"]} for h in history_ui]\n", + " history.append({\"role\": \"user\", \"content\": message})\n", + "\n", + " reply_md = run_agent_with_tools(history)\n", + " history.append({\"role\": \"assistant\", \"content\": reply_md})\n", + "\n", + " audio_text = extract_audio_text(reply_md)\n", + " audio_bytes = synth_audio(audio_text)\n", + "\n", + " return history, audio_bytes\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cc6c0650", + "metadata": {}, + "outputs": [], + "source": [ + "INTRO = (\n", + " \"Please provide details as text (three lines or paragraphs):\\n\"\n", + " \"Industry: ...\\n\"\n", + " \"Target Customers: ...\\n\"\n", + " \"Description: ...\\n\\n\"\n", + " \"You can refine in follow-ups (e.g., tone, shorter names, avoid words, etc.).\"\n", + ")\n", + "\n", + "with gr.Blocks(title=\"AI Domain Finder (.com only)\") as ui:\n", + " gr.Markdown(\"# AI Domain Finder (.com only)\")\n", + " gr.Markdown(\"Provide your business details. The Agent will suggest .com options, verify availability, pick a preferred domain, and speak a short rationale.\")\n", + " with gr.Row():\n", + " chatbot = gr.Chatbot(type=\"messages\", height=460)\n", + " with gr.Row():\n", + " audio_out = gr.Audio(label=\"Audio explanation\", autoplay=True)\n", + " with gr.Row():\n", + " msg = gr.Textbox(label=\"Your input\", placeholder=INTRO, lines=6)\n", + "\n", + " def _append_user(m, hist):\n", + " return \"\", hist + [{\"role\": \"user\", \"content\": m}]\n", + "\n", + " msg.submit(_append_user, inputs=[msg, chatbot], outputs=[msg, chatbot]).then(\n", + " chat, inputs=[msg, chatbot], outputs=[chatbot, audio_out]\n", + " )\n", + "\n", + "ui.launch(inbrowser=True, auth=None, show_error=True)\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "llm-engineering", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.10" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}