Adding Domain Name Generator based on keyword ideas and target location using openai chat completion api

This commit is contained in:
Nik
2025-10-21 00:16:01 +05:30
parent ebce76585b
commit c94a3f13c3

View File

@@ -0,0 +1,239 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "1633a440",
"metadata": {},
"outputs": [],
"source": [
"\"\"\"\n",
"Week 1 Assignment: LLM Engineering\n",
"Author: Nikhil Raut\n",
"\n",
"Notebook: domain_name_generator.ipynb\n",
"\n",
"Purpose:\n",
"Generate short, memorable domain root ideas (no TLD) from keywords using an OpenAI Chat Completions system+user prompt.\n",
"\n",
"Quick setup:\n",
"1) pip install openai python-dotenv ipython\n",
"2) Add OPENAI_API_KEY to a .env file in the project root\n",
"\n",
"How to use (Python script):\n",
"from domain_name_generator import generate_domain_ideas\n",
"ideas = generate_domain_ideas([\"fitness\", \"coach\", \"wellness\"], target_country=\"India\", n=20)\n",
"print(ideas)\n",
"\n",
"How to use (Notebook):\n",
"# after running config/client cells\n",
"generate_domain_ideas([\"fintech\", \"pay\"], target_country=\"US\", n=15)\n",
"\n",
"Notes:\n",
"- n: 1-50 (returns list[str] of TLD-less roots)\n",
"- Adjust MODEL and temperature in the config cell or function args\n",
"\"\"\""
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "da528fbe",
"metadata": {},
"outputs": [],
"source": [
"\n",
"from dataclasses import dataclass, field\n",
"from typing import List, Dict, Tuple\n",
"from openai import OpenAI\n",
"from dotenv import load_dotenv\n",
"from IPython.display import Markdown, display\n",
"import json\n",
"import re\n",
"from typing import Optional"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "519674b2",
"metadata": {},
"outputs": [],
"source": [
"# --- Cell 2: Config & Client\n",
"\n",
"# Load environment (.env should contain OPENAI_API_KEY)\n",
"load_dotenv()\n",
"\n",
"# Initialize OpenAI client (relies on OPENAI_API_KEY)\n",
"openai = OpenAI()\n",
"\n",
"# Model constants (feel free to change to another chat model)\n",
"MODEL = \"gpt-4o-mini\"\n",
"\n",
"# Deterministic-ish by default; raise temperature for wilder ideas.\n",
"GENERATION_TEMPERATURE = 0.8\n",
"SCORING_TEMPERATURE = 0.2\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "cd20c262",
"metadata": {},
"outputs": [],
"source": [
"\n",
"SYSTEM_PROMPT = \"\"\"You are a helpful brand-naming assistant.\n",
"Generate **domain ROOT** ideas (no TLD like .com) that are:\n",
"- short (ideally 512 chars), pronounceable, and memorable\n",
"- alphanumeric only (no spaces or hyphens), start with a letter\n",
"- avoid famous trademarks and sensitive terms\n",
"- diverse styles: blends, portmanteau, slight misspellings, synonyms\n",
"Return ONLY valid JSON: {\"domains\": [\"idea1\", \"idea2\", ...]}\"\"\"\n",
"\n",
"def _build_user_prompt(keywords: list[str], target_country: Optional[str], n: int) -> str:\n",
" kws = \", \".join(keywords)\n",
" country_line = f\"Target country/market: {target_country}\" if target_country else \"Target country/market: (general/global)\"\n",
" return (\n",
" \"Given the keywords below, propose exactly \"\n",
" f\"{n} short, brandable domain roots **without any TLD**.\\n\"\n",
" f\"Keywords: {kws}\\n\"\n",
" f\"{country_line}\\n\"\n",
" \"Constraints:\\n\"\n",
" \"- 12 syllables if possible\\n\"\n",
" \"- No hyphens/underscores/spaces\\n\"\n",
" \"- Avoid numbers unless they genuinely help memorability\\n\"\n",
" \"Output format: a JSON object with a single key 'domains' whose value is an array of strings.\"\n",
" )\n",
"\n",
"_valid_root = re.compile(r\"^[a-z][a-z0-9]{2,49}$\") # 350 chars, starts with letter\n",
"\n",
"def _sanitize_root(s: str) -> str:\n",
" s = s.strip().lower()\n",
" # remove anything after a dot (accidental TLDs)\n",
" s = s.split(\".\", 1)[0]\n",
" # drop spaces and hyphens just in case\n",
" s = s.replace(\" \", \"\").replace(\"-\", \"\")\n",
" # keep only az09\n",
" s = re.sub(r\"[^a-z0-9]\", \"\", s)\n",
" # ensure starts with letter\n",
" if s and not s[0].isalpha():\n",
" s = re.sub(r\"^[^a-z]+\", \"\", s)\n",
" return s\n",
"\n",
"def _unique_preserve_order(items: list[str]) -> list[str]:\n",
" seen = set()\n",
" out = []\n",
" for it in items:\n",
" if it not in seen:\n",
" seen.add(it)\n",
" out.append(it)\n",
" return out"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "2a9138b6",
"metadata": {},
"outputs": [],
"source": [
"# --- Cell 4: Core generator function (Chat Completions)\n",
"\n",
"def generate_domain_ideas(\n",
" keywords: list[str],\n",
" target_country: Optional[str] = None,\n",
" n: int = 20,\n",
" *,\n",
" model: str = MODEL,\n",
" temperature: float = GENERATION_TEMPERATURE,\n",
") -> list[str]:\n",
" \"\"\"\n",
" Generate up to `n` domain ROOT ideas (no TLD).\n",
" - keywords: list of seed terms\n",
" - target_country: optional market hint (e.g., 'India', 'US', 'DE')\n",
" - n: number of ideas to return (150)\n",
" \"\"\"\n",
" if not keywords or not any(k.strip() for k in keywords):\n",
" raise ValueError(\"Provide at least one non-empty keyword.\")\n",
" if not (1 <= int(n) <= 50):\n",
" raise ValueError(\"`n` must be between 1 and 50.\")\n",
"\n",
" messages = [\n",
" {\"role\": \"system\", \"content\": SYSTEM_PROMPT},\n",
" {\"role\": \"user\", \"content\": _build_user_prompt([k.strip() for k in keywords if k.strip()], target_country, int(n))},\n",
" ]\n",
"\n",
" resp = openai.chat.completions.create(\n",
" model=model,\n",
" messages=messages,\n",
" temperature=temperature,\n",
" response_format={\"type\": \"json_object\"}, # ask for strict JSON\n",
" )\n",
"\n",
" content = resp.choices[0].message.content\n",
"\n",
" # Try to parse JSON; if it fails, fall back to naive extraction.\n",
" ideas: list[str] = []\n",
" try:\n",
" data = json.loads(content)\n",
" if isinstance(data, dict) and isinstance(data.get(\"domains\"), list):\n",
" ideas = [str(x) for x in data[\"domains\"]]\n",
" except Exception:\n",
" # Fallback: split lines / commas\n",
" raw = re.split(r\"[\\n,]+\", content)\n",
" ideas = [r for r in raw if r.strip()]\n",
"\n",
" # Sanitize, validate, dedupe, and enforce count\n",
" ideas = [_sanitize_root(x) for x in ideas]\n",
" ideas = [x for x in ideas if _valid_root.match(x)]\n",
" ideas = _unique_preserve_order(ideas)[: int(n)]\n",
"\n",
" return ideas\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0b80c860",
"metadata": {},
"outputs": [],
"source": [
"example_keywords = [\"law\", \"gpt\", \"chatbot\", \"lawyer helper\"]\n",
"ideas = generate_domain_ideas(example_keywords, target_country=\"India\", n=20)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b45c6382",
"metadata": {},
"outputs": [],
"source": [
"display(Markdown(\"## Domain ideas (no TLD)\\n\" + \"\\n\".join(f\"{i+1}. `{d}`\" for i, d in enumerate(ideas))))"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "llm-engineering",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.10"
}
},
"nbformat": 4,
"nbformat_minor": 5
}