From c94a3f13c358fe966f17f0a69d9cb27af15e9e27 Mon Sep 17 00:00:00 2001 From: Nik Date: Tue, 21 Oct 2025 00:16:01 +0530 Subject: [PATCH] Adding Domain Name Generator based on keyword ideas and target location using openai chat completion api --- .../domain_name_generator.ipynb | 239 ++++++++++++++++++ 1 file changed, 239 insertions(+) create mode 100644 week1/community-contributions/domain_name_generator/domain_name_generator.ipynb diff --git a/week1/community-contributions/domain_name_generator/domain_name_generator.ipynb b/week1/community-contributions/domain_name_generator/domain_name_generator.ipynb new file mode 100644 index 0000000..029691d --- /dev/null +++ b/week1/community-contributions/domain_name_generator/domain_name_generator.ipynb @@ -0,0 +1,239 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "1633a440", + "metadata": {}, + "outputs": [], + "source": [ + "\"\"\"\n", + "Week 1 Assignment: LLM Engineering\n", + "Author: Nikhil Raut\n", + "\n", + "Notebook: domain_name_generator.ipynb\n", + "\n", + "Purpose:\n", + "Generate short, memorable domain root ideas (no TLD) from keywords using an OpenAI Chat Completions system+user prompt.\n", + "\n", + "Quick setup:\n", + "1) pip install openai python-dotenv ipython\n", + "2) Add OPENAI_API_KEY to a .env file in the project root\n", + "\n", + "How to use (Python script):\n", + "from domain_name_generator import generate_domain_ideas\n", + "ideas = generate_domain_ideas([\"fitness\", \"coach\", \"wellness\"], target_country=\"India\", n=20)\n", + "print(ideas)\n", + "\n", + "How to use (Notebook):\n", + "# after running config/client cells\n", + "generate_domain_ideas([\"fintech\", \"pay\"], target_country=\"US\", n=15)\n", + "\n", + "Notes:\n", + "- n: 1-50 (returns list[str] of TLD-less roots)\n", + "- Adjust MODEL and temperature in the config cell or function args\n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "da528fbe", + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "from dataclasses import dataclass, field\n", + "from typing import List, Dict, Tuple\n", + "from openai import OpenAI\n", + "from dotenv import load_dotenv\n", + "from IPython.display import Markdown, display\n", + "import json\n", + "import re\n", + "from typing import Optional" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "519674b2", + "metadata": {}, + "outputs": [], + "source": [ + "# --- Cell 2: Config & Client\n", + "\n", + "# Load environment (.env should contain OPENAI_API_KEY)\n", + "load_dotenv()\n", + "\n", + "# Initialize OpenAI client (relies on OPENAI_API_KEY)\n", + "openai = OpenAI()\n", + "\n", + "# Model constants (feel free to change to another chat model)\n", + "MODEL = \"gpt-4o-mini\"\n", + "\n", + "# Deterministic-ish by default; raise temperature for wilder ideas.\n", + "GENERATION_TEMPERATURE = 0.8\n", + "SCORING_TEMPERATURE = 0.2\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cd20c262", + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "SYSTEM_PROMPT = \"\"\"You are a helpful brand-naming assistant.\n", + "Generate **domain ROOT** ideas (no TLD like .com) that are:\n", + "- short (ideally 5–12 chars), pronounceable, and memorable\n", + "- alphanumeric only (no spaces or hyphens), start with a letter\n", + "- avoid famous trademarks and sensitive terms\n", + "- diverse styles: blends, portmanteau, slight misspellings, synonyms\n", + "Return ONLY valid JSON: {\"domains\": [\"idea1\", \"idea2\", ...]}\"\"\"\n", + "\n", + "def _build_user_prompt(keywords: list[str], target_country: Optional[str], n: int) -> str:\n", + " kws = \", \".join(keywords)\n", + " country_line = f\"Target country/market: {target_country}\" if target_country else \"Target country/market: (general/global)\"\n", + " return (\n", + " \"Given the keywords below, propose exactly \"\n", + " f\"{n} short, brandable domain roots **without any TLD**.\\n\"\n", + " f\"Keywords: {kws}\\n\"\n", + " f\"{country_line}\\n\"\n", + " \"Constraints:\\n\"\n", + " \"- 1–2 syllables if possible\\n\"\n", + " \"- No hyphens/underscores/spaces\\n\"\n", + " \"- Avoid numbers unless they genuinely help memorability\\n\"\n", + " \"Output format: a JSON object with a single key 'domains' whose value is an array of strings.\"\n", + " )\n", + "\n", + "_valid_root = re.compile(r\"^[a-z][a-z0-9]{2,49}$\") # 3–50 chars, starts with letter\n", + "\n", + "def _sanitize_root(s: str) -> str:\n", + " s = s.strip().lower()\n", + " # remove anything after a dot (accidental TLDs)\n", + " s = s.split(\".\", 1)[0]\n", + " # drop spaces and hyphens just in case\n", + " s = s.replace(\" \", \"\").replace(\"-\", \"\")\n", + " # keep only a–z0–9\n", + " s = re.sub(r\"[^a-z0-9]\", \"\", s)\n", + " # ensure starts with letter\n", + " if s and not s[0].isalpha():\n", + " s = re.sub(r\"^[^a-z]+\", \"\", s)\n", + " return s\n", + "\n", + "def _unique_preserve_order(items: list[str]) -> list[str]:\n", + " seen = set()\n", + " out = []\n", + " for it in items:\n", + " if it not in seen:\n", + " seen.add(it)\n", + " out.append(it)\n", + " return out" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2a9138b6", + "metadata": {}, + "outputs": [], + "source": [ + "# --- Cell 4: Core generator function (Chat Completions)\n", + "\n", + "def generate_domain_ideas(\n", + " keywords: list[str],\n", + " target_country: Optional[str] = None,\n", + " n: int = 20,\n", + " *,\n", + " model: str = MODEL,\n", + " temperature: float = GENERATION_TEMPERATURE,\n", + ") -> list[str]:\n", + " \"\"\"\n", + " Generate up to `n` domain ROOT ideas (no TLD).\n", + " - keywords: list of seed terms\n", + " - target_country: optional market hint (e.g., 'India', 'US', 'DE')\n", + " - n: number of ideas to return (1–50)\n", + " \"\"\"\n", + " if not keywords or not any(k.strip() for k in keywords):\n", + " raise ValueError(\"Provide at least one non-empty keyword.\")\n", + " if not (1 <= int(n) <= 50):\n", + " raise ValueError(\"`n` must be between 1 and 50.\")\n", + "\n", + " messages = [\n", + " {\"role\": \"system\", \"content\": SYSTEM_PROMPT},\n", + " {\"role\": \"user\", \"content\": _build_user_prompt([k.strip() for k in keywords if k.strip()], target_country, int(n))},\n", + " ]\n", + "\n", + " resp = openai.chat.completions.create(\n", + " model=model,\n", + " messages=messages,\n", + " temperature=temperature,\n", + " response_format={\"type\": \"json_object\"}, # ask for strict JSON\n", + " )\n", + "\n", + " content = resp.choices[0].message.content\n", + "\n", + " # Try to parse JSON; if it fails, fall back to naive extraction.\n", + " ideas: list[str] = []\n", + " try:\n", + " data = json.loads(content)\n", + " if isinstance(data, dict) and isinstance(data.get(\"domains\"), list):\n", + " ideas = [str(x) for x in data[\"domains\"]]\n", + " except Exception:\n", + " # Fallback: split lines / commas\n", + " raw = re.split(r\"[\\n,]+\", content)\n", + " ideas = [r for r in raw if r.strip()]\n", + "\n", + " # Sanitize, validate, dedupe, and enforce count\n", + " ideas = [_sanitize_root(x) for x in ideas]\n", + " ideas = [x for x in ideas if _valid_root.match(x)]\n", + " ideas = _unique_preserve_order(ideas)[: int(n)]\n", + "\n", + " return ideas\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0b80c860", + "metadata": {}, + "outputs": [], + "source": [ + "example_keywords = [\"law\", \"gpt\", \"chatbot\", \"lawyer helper\"]\n", + "ideas = generate_domain_ideas(example_keywords, target_country=\"India\", n=20)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b45c6382", + "metadata": {}, + "outputs": [], + "source": [ + "display(Markdown(\"## Domain ideas (no TLD)\\n\" + \"\\n\".join(f\"{i+1}. `{d}`\" for i, d in enumerate(ideas))))" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "llm-engineering", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.10" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}