From c94a3f13c358fe966f17f0a69d9cb27af15e9e27 Mon Sep 17 00:00:00 2001
From: Nik <nikhil.raut94@gmail.com>
Date: Tue, 21 Oct 2025 00:16:01 +0530
Subject: [PATCH] Adding Domain Name Generator based on keyword ideas and
 target location using openai chat completion api

---
 .../domain_name_generator.ipynb               | 239 ++++++++++++++++++
 1 file changed, 239 insertions(+)
 create mode 100644 week1/community-contributions/domain_name_generator/domain_name_generator.ipynb

diff --git a/week1/community-contributions/domain_name_generator/domain_name_generator.ipynb b/week1/community-contributions/domain_name_generator/domain_name_generator.ipynb
new file mode 100644
index 0000000..029691d
--- /dev/null
+++ b/week1/community-contributions/domain_name_generator/domain_name_generator.ipynb
@@ -0,0 +1,239 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1633a440",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\"\"\"\n",
+    "Week 1 Assignment: LLM Engineering\n",
+    "Author: Nikhil Raut\n",
+    "\n",
+    "Notebook: domain_name_generator.ipynb\n",
+    "\n",
+    "Purpose:\n",
+    "Generate short, memorable domain root ideas (no TLD) from keywords using an OpenAI Chat Completions system+user prompt.\n",
+    "\n",
+    "Quick setup:\n",
+    "1) pip install openai python-dotenv ipython\n",
+    "2) Add OPENAI_API_KEY to a .env file in the project root\n",
+    "\n",
+    "How to use (Python script):\n",
+    "from domain_name_generator import generate_domain_ideas\n",
+    "ideas = generate_domain_ideas([\"fitness\", \"coach\", \"wellness\"], target_country=\"India\", n=20)\n",
+    "print(ideas)\n",
+    "\n",
+    "How to use (Notebook):\n",
+    "# after running config/client cells\n",
+    "generate_domain_ideas([\"fintech\", \"pay\"], target_country=\"US\", n=15)\n",
+    "\n",
+    "Notes:\n",
+    "- n: 1-50 (returns list[str] of TLD-less roots)\n",
+    "- Adjust MODEL and temperature in the config cell or function args\n",
+    "\"\"\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "da528fbe",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n",
+    "from dataclasses import dataclass, field\n",
+    "from typing import List, Dict, Tuple\n",
+    "from openai import OpenAI\n",
+    "from dotenv import load_dotenv\n",
+    "from IPython.display import Markdown, display\n",
+    "import json\n",
+    "import re\n",
+    "from typing import Optional"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "519674b2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# --- Cell 2: Config & Client\n",
+    "\n",
+    "# Load environment (.env should contain OPENAI_API_KEY)\n",
+    "load_dotenv()\n",
+    "\n",
+    "# Initialize OpenAI client (relies on OPENAI_API_KEY)\n",
+    "openai = OpenAI()\n",
+    "\n",
+    "# Model constants (feel free to change to another chat model)\n",
+    "MODEL = \"gpt-4o-mini\"\n",
+    "\n",
+    "# Deterministic-ish by default; raise temperature for wilder ideas.\n",
+    "GENERATION_TEMPERATURE = 0.8\n",
+    "SCORING_TEMPERATURE = 0.2\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "cd20c262",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n",
+    "SYSTEM_PROMPT = \"\"\"You are a helpful brand-naming assistant.\n",
+    "Generate **domain ROOT** ideas (no TLD like .com) that are:\n",
+    "- short (ideally 5–12 chars), pronounceable, and memorable\n",
+    "- alphanumeric only (no spaces or hyphens), start with a letter\n",
+    "- avoid famous trademarks and sensitive terms\n",
+    "- diverse styles: blends, portmanteau, slight misspellings, synonyms\n",
+    "Return ONLY valid JSON: {\"domains\": [\"idea1\", \"idea2\", ...]}\"\"\"\n",
+    "\n",
+    "def _build_user_prompt(keywords: list[str], target_country: Optional[str], n: int) -> str:\n",
+    "    kws = \", \".join(keywords)\n",
+    "    country_line = f\"Target country/market: {target_country}\" if target_country else \"Target country/market: (general/global)\"\n",
+    "    return (\n",
+    "        \"Given the keywords below, propose exactly \"\n",
+    "        f\"{n} short, brandable domain roots **without any TLD**.\\n\"\n",
+    "        f\"Keywords: {kws}\\n\"\n",
+    "        f\"{country_line}\\n\"\n",
+    "        \"Constraints:\\n\"\n",
+    "        \"- 1–2 syllables if possible\\n\"\n",
+    "        \"- No hyphens/underscores/spaces\\n\"\n",
+    "        \"- Avoid numbers unless they genuinely help memorability\\n\"\n",
+    "        \"Output format: a JSON object with a single key 'domains' whose value is an array of strings.\"\n",
+    "    )\n",
+    "\n",
+    "_valid_root = re.compile(r\"^[a-z][a-z0-9]{2,49}$\")  # 3–50 chars, starts with letter\n",
+    "\n",
+    "def _sanitize_root(s: str) -> str:\n",
+    "    s = s.strip().lower()\n",
+    "    # remove anything after a dot (accidental TLDs)\n",
+    "    s = s.split(\".\", 1)[0]\n",
+    "    # drop spaces and hyphens just in case\n",
+    "    s = s.replace(\" \", \"\").replace(\"-\", \"\")\n",
+    "    # keep only a–z0–9\n",
+    "    s = re.sub(r\"[^a-z0-9]\", \"\", s)\n",
+    "    # ensure starts with letter\n",
+    "    if s and not s[0].isalpha():\n",
+    "        s = re.sub(r\"^[^a-z]+\", \"\", s)\n",
+    "    return s\n",
+    "\n",
+    "def _unique_preserve_order(items: list[str]) -> list[str]:\n",
+    "    seen = set()\n",
+    "    out = []\n",
+    "    for it in items:\n",
+    "        if it not in seen:\n",
+    "            seen.add(it)\n",
+    "            out.append(it)\n",
+    "    return out"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "2a9138b6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# --- Cell 4: Core generator function (Chat Completions)\n",
+    "\n",
+    "def generate_domain_ideas(\n",
+    "    keywords: list[str],\n",
+    "    target_country: Optional[str] = None,\n",
+    "    n: int = 20,\n",
+    "    *,\n",
+    "    model: str = MODEL,\n",
+    "    temperature: float = GENERATION_TEMPERATURE,\n",
+    ") -> list[str]:\n",
+    "    \"\"\"\n",
+    "    Generate up to `n` domain ROOT ideas (no TLD).\n",
+    "    - keywords: list of seed terms\n",
+    "    - target_country: optional market hint (e.g., 'India', 'US', 'DE')\n",
+    "    - n: number of ideas to return (1–50)\n",
+    "    \"\"\"\n",
+    "    if not keywords or not any(k.strip() for k in keywords):\n",
+    "        raise ValueError(\"Provide at least one non-empty keyword.\")\n",
+    "    if not (1 <= int(n) <= 50):\n",
+    "        raise ValueError(\"`n` must be between 1 and 50.\")\n",
+    "\n",
+    "    messages = [\n",
+    "        {\"role\": \"system\", \"content\": SYSTEM_PROMPT},\n",
+    "        {\"role\": \"user\", \"content\": _build_user_prompt([k.strip() for k in keywords if k.strip()], target_country, int(n))},\n",
+    "    ]\n",
+    "\n",
+    "    resp = openai.chat.completions.create(\n",
+    "        model=model,\n",
+    "        messages=messages,\n",
+    "        temperature=temperature,\n",
+    "        response_format={\"type\": \"json_object\"},  # ask for strict JSON\n",
+    "    )\n",
+    "\n",
+    "    content = resp.choices[0].message.content\n",
+    "\n",
+    "    # Try to parse JSON; if it fails, fall back to naive extraction.\n",
+    "    ideas: list[str] = []\n",
+    "    try:\n",
+    "        data = json.loads(content)\n",
+    "        if isinstance(data, dict) and isinstance(data.get(\"domains\"), list):\n",
+    "            ideas = [str(x) for x in data[\"domains\"]]\n",
+    "    except Exception:\n",
+    "        # Fallback: split lines / commas\n",
+    "        raw = re.split(r\"[\\n,]+\", content)\n",
+    "        ideas = [r for r in raw if r.strip()]\n",
+    "\n",
+    "    # Sanitize, validate, dedupe, and enforce count\n",
+    "    ideas = [_sanitize_root(x) for x in ideas]\n",
+    "    ideas = [x for x in ideas if _valid_root.match(x)]\n",
+    "    ideas = _unique_preserve_order(ideas)[: int(n)]\n",
+    "\n",
+    "    return ideas\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0b80c860",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "example_keywords = [\"law\", \"gpt\", \"chatbot\", \"lawyer helper\"]\n",
+    "ideas = generate_domain_ideas(example_keywords, target_country=\"India\", n=20)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b45c6382",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "display(Markdown(\"## Domain ideas (no TLD)\\n\" + \"\\n\".join(f\"{i+1}. `{d}`\" for i, d in enumerate(ideas))))"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "llm-engineering",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.10"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}