Adding Domain Name Generator based on keyword ideas and target location using openai chat completion api

2025-10-21 00:16:01 +05:30
parent ebce76585b
commit c94a3f13c3
1 changed files with 239 additions and 0 deletions
--- a/week1/community-contributions/domain_name_generator/domain_name_generator.ipynb
+++ b/week1/community-contributions/domain_name_generator/domain_name_generator.ipynb
@@ -0,0 +1,239 @@
 {
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1633a440",
   "metadata": {},
   "outputs": [],
   "source": [
    "\"\"\"\n",
    "Week 1 Assignment: LLM Engineering\n",
    "Author: Nikhil Raut\n",
    "\n",
    "Notebook: domain_name_generator.ipynb\n",
    "\n",
    "Purpose:\n",
    "Generate short, memorable domain root ideas (no TLD) from keywords using an OpenAI Chat Completions system+user prompt.\n",
    "\n",
    "Quick setup:\n",
    "1) pip install openai python-dotenv ipython\n",
    "2) Add OPENAI_API_KEY to a .env file in the project root\n",
    "\n",
    "How to use (Python script):\n",
    "from domain_name_generator import generate_domain_ideas\n",
    "ideas = generate_domain_ideas([\"fitness\", \"coach\", \"wellness\"], target_country=\"India\", n=20)\n",
    "print(ideas)\n",
    "\n",
    "How to use (Notebook):\n",
    "# after running config/client cells\n",
    "generate_domain_ideas([\"fintech\", \"pay\"], target_country=\"US\", n=15)\n",
    "\n",
    "Notes:\n",
    "- n: 1-50 (returns list[str] of TLD-less roots)\n",
    "- Adjust MODEL and temperature in the config cell or function args\n",
    "\"\"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "da528fbe",
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "from dataclasses import dataclass, field\n",
    "from typing import List, Dict, Tuple\n",
    "from openai import OpenAI\n",
    "from dotenv import load_dotenv\n",
    "from IPython.display import Markdown, display\n",
    "import json\n",
    "import re\n",
    "from typing import Optional"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "519674b2",
   "metadata": {},
   "outputs": [],
   "source": [
    "# --- Cell 2: Config & Client\n",
    "\n",
    "# Load environment (.env should contain OPENAI_API_KEY)\n",
    "load_dotenv()\n",
    "\n",
    "# Initialize OpenAI client (relies on OPENAI_API_KEY)\n",
    "openai = OpenAI()\n",
    "\n",
    "# Model constants (feel free to change to another chat model)\n",
    "MODEL = \"gpt-4o-mini\"\n",
    "\n",
    "# Deterministic-ish by default; raise temperature for wilder ideas.\n",
    "GENERATION_TEMPERATURE = 0.8\n",
    "SCORING_TEMPERATURE = 0.2\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "cd20c262",
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "SYSTEM_PROMPT = \"\"\"You are a helpful brand-naming assistant.\n",
    "Generate **domain ROOT** ideas (no TLD like .com) that are:\n",
    "- short (ideally 5–12 chars), pronounceable, and memorable\n",
    "- alphanumeric only (no spaces or hyphens), start with a letter\n",
    "- avoid famous trademarks and sensitive terms\n",
    "- diverse styles: blends, portmanteau, slight misspellings, synonyms\n",
    "Return ONLY valid JSON: {\"domains\": [\"idea1\", \"idea2\", ...]}\"\"\"\n",
    "\n",
    "def _build_user_prompt(keywords: list[str], target_country: Optional[str], n: int) -> str:\n",
    "    kws = \", \".join(keywords)\n",
    "    country_line = f\"Target country/market: {target_country}\" if target_country else \"Target country/market: (general/global)\"\n",
    "    return (\n",
    "        \"Given the keywords below, propose exactly \"\n",
    "        f\"{n} short, brandable domain roots **without any TLD**.\\n\"\n",
    "        f\"Keywords: {kws}\\n\"\n",
    "        f\"{country_line}\\n\"\n",
    "        \"Constraints:\\n\"\n",
    "        \"- 1–2 syllables if possible\\n\"\n",
    "        \"- No hyphens/underscores/spaces\\n\"\n",
    "        \"- Avoid numbers unless they genuinely help memorability\\n\"\n",
    "        \"Output format: a JSON object with a single key 'domains' whose value is an array of strings.\"\n",
    "    )\n",
    "\n",
    "_valid_root = re.compile(r\"^[a-z][a-z0-9]{2,49}$\")  # 3–50 chars, starts with letter\n",
    "\n",
    "def _sanitize_root(s: str) -> str:\n",
    "    s = s.strip().lower()\n",
    "    # remove anything after a dot (accidental TLDs)\n",
    "    s = s.split(\".\", 1)[0]\n",
    "    # drop spaces and hyphens just in case\n",
    "    s = s.replace(\" \", \"\").replace(\"-\", \"\")\n",
    "    # keep only a–z0–9\n",
    "    s = re.sub(r\"[^a-z0-9]\", \"\", s)\n",
    "    # ensure starts with letter\n",
    "    if s and not s[0].isalpha():\n",
    "        s = re.sub(r\"^[^a-z]+\", \"\", s)\n",
    "    return s\n",
    "\n",
    "def _unique_preserve_order(items: list[str]) -> list[str]:\n",
    "    seen = set()\n",
    "    out = []\n",
    "    for it in items:\n",
    "        if it not in seen:\n",
    "            seen.add(it)\n",
    "            out.append(it)\n",
    "    return out"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2a9138b6",
   "metadata": {},
   "outputs": [],
   "source": [
    "# --- Cell 4: Core generator function (Chat Completions)\n",
    "\n",
    "def generate_domain_ideas(\n",
    "    keywords: list[str],\n",
    "    target_country: Optional[str] = None,\n",
    "    n: int = 20,\n",
    "    *,\n",
    "    model: str = MODEL,\n",
    "    temperature: float = GENERATION_TEMPERATURE,\n",
    ") -> list[str]:\n",
    "    \"\"\"\n",
    "    Generate up to `n` domain ROOT ideas (no TLD).\n",
    "    - keywords: list of seed terms\n",
    "    - target_country: optional market hint (e.g., 'India', 'US', 'DE')\n",
    "    - n: number of ideas to return (1–50)\n",
    "    \"\"\"\n",
    "    if not keywords or not any(k.strip() for k in keywords):\n",
    "        raise ValueError(\"Provide at least one non-empty keyword.\")\n",
    "    if not (1 <= int(n) <= 50):\n",
    "        raise ValueError(\"`n` must be between 1 and 50.\")\n",
    "\n",
    "    messages = [\n",
    "        {\"role\": \"system\", \"content\": SYSTEM_PROMPT},\n",
    "        {\"role\": \"user\", \"content\": _build_user_prompt([k.strip() for k in keywords if k.strip()], target_country, int(n))},\n",
    "    ]\n",
    "\n",
    "    resp = openai.chat.completions.create(\n",
    "        model=model,\n",
    "        messages=messages,\n",
    "        temperature=temperature,\n",
    "        response_format={\"type\": \"json_object\"},  # ask for strict JSON\n",
    "    )\n",
    "\n",
    "    content = resp.choices[0].message.content\n",
    "\n",
    "    # Try to parse JSON; if it fails, fall back to naive extraction.\n",
    "    ideas: list[str] = []\n",
    "    try:\n",
    "        data = json.loads(content)\n",
    "        if isinstance(data, dict) and isinstance(data.get(\"domains\"), list):\n",
    "            ideas = [str(x) for x in data[\"domains\"]]\n",
    "    except Exception:\n",
    "        # Fallback: split lines / commas\n",
    "        raw = re.split(r\"[\\n,]+\", content)\n",
    "        ideas = [r for r in raw if r.strip()]\n",
    "\n",
    "    # Sanitize, validate, dedupe, and enforce count\n",
    "    ideas = [_sanitize_root(x) for x in ideas]\n",
    "    ideas = [x for x in ideas if _valid_root.match(x)]\n",
    "    ideas = _unique_preserve_order(ideas)[: int(n)]\n",
    "\n",
    "    return ideas\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0b80c860",
   "metadata": {},
   "outputs": [],
   "source": [
    "example_keywords = [\"law\", \"gpt\", \"chatbot\", \"lawyer helper\"]\n",
    "ideas = generate_domain_ideas(example_keywords, target_country=\"India\", n=20)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b45c6382",
   "metadata": {},
   "outputs": [],
   "source": [
    "display(Markdown(\"## Domain ideas (no TLD)\\n\" + \"\\n\".join(f\"{i+1}. `{d}`\" for i, d in enumerate(ideas))))"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "llm-engineering",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
 }