Adding Domain Name Generator based on keyword ideas and target location using openai chat completion api
This commit is contained in:
@@ -0,0 +1,239 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "1633a440",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"\"\"\"\n",
|
||||||
|
"Week 1 Assignment: LLM Engineering\n",
|
||||||
|
"Author: Nikhil Raut\n",
|
||||||
|
"\n",
|
||||||
|
"Notebook: domain_name_generator.ipynb\n",
|
||||||
|
"\n",
|
||||||
|
"Purpose:\n",
|
||||||
|
"Generate short, memorable domain root ideas (no TLD) from keywords using an OpenAI Chat Completions system+user prompt.\n",
|
||||||
|
"\n",
|
||||||
|
"Quick setup:\n",
|
||||||
|
"1) pip install openai python-dotenv ipython\n",
|
||||||
|
"2) Add OPENAI_API_KEY to a .env file in the project root\n",
|
||||||
|
"\n",
|
||||||
|
"How to use (Python script):\n",
|
||||||
|
"from domain_name_generator import generate_domain_ideas\n",
|
||||||
|
"ideas = generate_domain_ideas([\"fitness\", \"coach\", \"wellness\"], target_country=\"India\", n=20)\n",
|
||||||
|
"print(ideas)\n",
|
||||||
|
"\n",
|
||||||
|
"How to use (Notebook):\n",
|
||||||
|
"# after running config/client cells\n",
|
||||||
|
"generate_domain_ideas([\"fintech\", \"pay\"], target_country=\"US\", n=15)\n",
|
||||||
|
"\n",
|
||||||
|
"Notes:\n",
|
||||||
|
"- n: 1-50 (returns list[str] of TLD-less roots)\n",
|
||||||
|
"- Adjust MODEL and temperature in the config cell or function args\n",
|
||||||
|
"\"\"\""
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "da528fbe",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"\n",
|
||||||
|
"from dataclasses import dataclass, field\n",
|
||||||
|
"from typing import List, Dict, Tuple\n",
|
||||||
|
"from openai import OpenAI\n",
|
||||||
|
"from dotenv import load_dotenv\n",
|
||||||
|
"from IPython.display import Markdown, display\n",
|
||||||
|
"import json\n",
|
||||||
|
"import re\n",
|
||||||
|
"from typing import Optional"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "519674b2",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# --- Cell 2: Config & Client\n",
|
||||||
|
"\n",
|
||||||
|
"# Load environment (.env should contain OPENAI_API_KEY)\n",
|
||||||
|
"load_dotenv()\n",
|
||||||
|
"\n",
|
||||||
|
"# Initialize OpenAI client (relies on OPENAI_API_KEY)\n",
|
||||||
|
"openai = OpenAI()\n",
|
||||||
|
"\n",
|
||||||
|
"# Model constants (feel free to change to another chat model)\n",
|
||||||
|
"MODEL = \"gpt-4o-mini\"\n",
|
||||||
|
"\n",
|
||||||
|
"# Deterministic-ish by default; raise temperature for wilder ideas.\n",
|
||||||
|
"GENERATION_TEMPERATURE = 0.8\n",
|
||||||
|
"SCORING_TEMPERATURE = 0.2\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "cd20c262",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"\n",
|
||||||
|
"SYSTEM_PROMPT = \"\"\"You are a helpful brand-naming assistant.\n",
|
||||||
|
"Generate **domain ROOT** ideas (no TLD like .com) that are:\n",
|
||||||
|
"- short (ideally 5–12 chars), pronounceable, and memorable\n",
|
||||||
|
"- alphanumeric only (no spaces or hyphens), start with a letter\n",
|
||||||
|
"- avoid famous trademarks and sensitive terms\n",
|
||||||
|
"- diverse styles: blends, portmanteau, slight misspellings, synonyms\n",
|
||||||
|
"Return ONLY valid JSON: {\"domains\": [\"idea1\", \"idea2\", ...]}\"\"\"\n",
|
||||||
|
"\n",
|
||||||
|
"def _build_user_prompt(keywords: list[str], target_country: Optional[str], n: int) -> str:\n",
|
||||||
|
" kws = \", \".join(keywords)\n",
|
||||||
|
" country_line = f\"Target country/market: {target_country}\" if target_country else \"Target country/market: (general/global)\"\n",
|
||||||
|
" return (\n",
|
||||||
|
" \"Given the keywords below, propose exactly \"\n",
|
||||||
|
" f\"{n} short, brandable domain roots **without any TLD**.\\n\"\n",
|
||||||
|
" f\"Keywords: {kws}\\n\"\n",
|
||||||
|
" f\"{country_line}\\n\"\n",
|
||||||
|
" \"Constraints:\\n\"\n",
|
||||||
|
" \"- 1–2 syllables if possible\\n\"\n",
|
||||||
|
" \"- No hyphens/underscores/spaces\\n\"\n",
|
||||||
|
" \"- Avoid numbers unless they genuinely help memorability\\n\"\n",
|
||||||
|
" \"Output format: a JSON object with a single key 'domains' whose value is an array of strings.\"\n",
|
||||||
|
" )\n",
|
||||||
|
"\n",
|
||||||
|
"_valid_root = re.compile(r\"^[a-z][a-z0-9]{2,49}$\") # 3–50 chars, starts with letter\n",
|
||||||
|
"\n",
|
||||||
|
"def _sanitize_root(s: str) -> str:\n",
|
||||||
|
" s = s.strip().lower()\n",
|
||||||
|
" # remove anything after a dot (accidental TLDs)\n",
|
||||||
|
" s = s.split(\".\", 1)[0]\n",
|
||||||
|
" # drop spaces and hyphens just in case\n",
|
||||||
|
" s = s.replace(\" \", \"\").replace(\"-\", \"\")\n",
|
||||||
|
" # keep only a–z0–9\n",
|
||||||
|
" s = re.sub(r\"[^a-z0-9]\", \"\", s)\n",
|
||||||
|
" # ensure starts with letter\n",
|
||||||
|
" if s and not s[0].isalpha():\n",
|
||||||
|
" s = re.sub(r\"^[^a-z]+\", \"\", s)\n",
|
||||||
|
" return s\n",
|
||||||
|
"\n",
|
||||||
|
"def _unique_preserve_order(items: list[str]) -> list[str]:\n",
|
||||||
|
" seen = set()\n",
|
||||||
|
" out = []\n",
|
||||||
|
" for it in items:\n",
|
||||||
|
" if it not in seen:\n",
|
||||||
|
" seen.add(it)\n",
|
||||||
|
" out.append(it)\n",
|
||||||
|
" return out"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "2a9138b6",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# --- Cell 4: Core generator function (Chat Completions)\n",
|
||||||
|
"\n",
|
||||||
|
"def generate_domain_ideas(\n",
|
||||||
|
" keywords: list[str],\n",
|
||||||
|
" target_country: Optional[str] = None,\n",
|
||||||
|
" n: int = 20,\n",
|
||||||
|
" *,\n",
|
||||||
|
" model: str = MODEL,\n",
|
||||||
|
" temperature: float = GENERATION_TEMPERATURE,\n",
|
||||||
|
") -> list[str]:\n",
|
||||||
|
" \"\"\"\n",
|
||||||
|
" Generate up to `n` domain ROOT ideas (no TLD).\n",
|
||||||
|
" - keywords: list of seed terms\n",
|
||||||
|
" - target_country: optional market hint (e.g., 'India', 'US', 'DE')\n",
|
||||||
|
" - n: number of ideas to return (1–50)\n",
|
||||||
|
" \"\"\"\n",
|
||||||
|
" if not keywords or not any(k.strip() for k in keywords):\n",
|
||||||
|
" raise ValueError(\"Provide at least one non-empty keyword.\")\n",
|
||||||
|
" if not (1 <= int(n) <= 50):\n",
|
||||||
|
" raise ValueError(\"`n` must be between 1 and 50.\")\n",
|
||||||
|
"\n",
|
||||||
|
" messages = [\n",
|
||||||
|
" {\"role\": \"system\", \"content\": SYSTEM_PROMPT},\n",
|
||||||
|
" {\"role\": \"user\", \"content\": _build_user_prompt([k.strip() for k in keywords if k.strip()], target_country, int(n))},\n",
|
||||||
|
" ]\n",
|
||||||
|
"\n",
|
||||||
|
" resp = openai.chat.completions.create(\n",
|
||||||
|
" model=model,\n",
|
||||||
|
" messages=messages,\n",
|
||||||
|
" temperature=temperature,\n",
|
||||||
|
" response_format={\"type\": \"json_object\"}, # ask for strict JSON\n",
|
||||||
|
" )\n",
|
||||||
|
"\n",
|
||||||
|
" content = resp.choices[0].message.content\n",
|
||||||
|
"\n",
|
||||||
|
" # Try to parse JSON; if it fails, fall back to naive extraction.\n",
|
||||||
|
" ideas: list[str] = []\n",
|
||||||
|
" try:\n",
|
||||||
|
" data = json.loads(content)\n",
|
||||||
|
" if isinstance(data, dict) and isinstance(data.get(\"domains\"), list):\n",
|
||||||
|
" ideas = [str(x) for x in data[\"domains\"]]\n",
|
||||||
|
" except Exception:\n",
|
||||||
|
" # Fallback: split lines / commas\n",
|
||||||
|
" raw = re.split(r\"[\\n,]+\", content)\n",
|
||||||
|
" ideas = [r for r in raw if r.strip()]\n",
|
||||||
|
"\n",
|
||||||
|
" # Sanitize, validate, dedupe, and enforce count\n",
|
||||||
|
" ideas = [_sanitize_root(x) for x in ideas]\n",
|
||||||
|
" ideas = [x for x in ideas if _valid_root.match(x)]\n",
|
||||||
|
" ideas = _unique_preserve_order(ideas)[: int(n)]\n",
|
||||||
|
"\n",
|
||||||
|
" return ideas\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "0b80c860",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"example_keywords = [\"law\", \"gpt\", \"chatbot\", \"lawyer helper\"]\n",
|
||||||
|
"ideas = generate_domain_ideas(example_keywords, target_country=\"India\", n=20)\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "b45c6382",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"display(Markdown(\"## Domain ideas (no TLD)\\n\" + \"\\n\".join(f\"{i+1}. `{d}`\" for i, d in enumerate(ideas))))"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "llm-engineering",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.12.10"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 5
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user