From 2de41e45cc3a5a6d98da7ec39ef2a0bdcfe1957d Mon Sep 17 00:00:00 2001 From: Tochi-Nwachukwu Date: Tue, 21 Oct 2025 13:06:53 +0100 Subject: [PATCH] Submission for Week 3 Exercise --- ...ise_nigerian_names_dateset_generator.ipynb | 351 ++++++++++++++++++ 1 file changed, 351 insertions(+) create mode 100644 week3/community-contributions/tochi/week_3_exercise_nigerian_names_dateset_generator.ipynb diff --git a/week3/community-contributions/tochi/week_3_exercise_nigerian_names_dateset_generator.ipynb b/week3/community-contributions/tochi/week_3_exercise_nigerian_names_dateset_generator.ipynb new file mode 100644 index 0000000..21a5f3a --- /dev/null +++ b/week3/community-contributions/tochi/week_3_exercise_nigerian_names_dateset_generator.ipynb @@ -0,0 +1,351 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [], + "gpuType": "T4" + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + }, + "accelerator": "GPU" + }, + "cells": [ + { + "cell_type": "markdown", + "source": [ + "#Nigerian Names Dataset Generator\n", + "### Project Description\n", + "This is a fun project that allows you to easily generate Nigerian names based of selectin the popular tribes in Nigeria." + ], + "metadata": { + "id": "AVN03AKGhOHf" + } + }, + { + "cell_type": "markdown", + "source": [ + "### Installations" + ], + "metadata": { + "id": "mzC6k8r9hz8T" + } + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "WvBQWxyFWJFR", + "outputId": "ed902e29-cb0f-44fe-f714-6cfbf7584453" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m60.1/60.1 MB\u001b[0m \u001b[31m12.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m375.8/375.8 kB\u001b[0m \u001b[31m18.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25h" + ] + } + ], + "source": [ + "!pip install -q --upgrade bitsandbytes accelerate gradio" + ] + }, + { + "cell_type": "markdown", + "source": [ + "### Imports" + ], + "metadata": { + "id": "zyzd851bh64j" + } + }, + { + "cell_type": "code", + "source": [ + "# Imports\n", + "import os\n", + "import requests\n", + "import json\n", + "from huggingface_hub import login\n", + "from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer, BitsAndBytesConfig\n", + "import torch\n", + "import gradio as gr\n", + "from google.colab import userdata\n", + "import gc" + ], + "metadata": { + "id": "09JXEWAdWaNf" + }, + "execution_count": 2, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "### Logging into Huggingface" + ], + "metadata": { + "id": "DK0MOMG2iAi0" + } + }, + { + "cell_type": "code", + "source": [ + "hf_token = userdata.get('HF_TOKEN')\n", + "login(hf_token, add_to_git_credential=True)" + ], + "metadata": { + "id": "19eMcQjoX9gq" + }, + "execution_count": 3, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "PHI = \"microsoft/Phi-4-mini-instruct\"" + ], + "metadata": { + "id": "oqXIBtlaYLr8" + }, + "execution_count": 4, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "### System Prompt and User prompt Template" + ], + "metadata": { + "id": "kBzXo-JPiHVF" + } + }, + { + "cell_type": "code", + "source": [ + "def generate_prompt(tribe, number):\n", + " tribe_patterns = {\n", + " \"Igbo\": 'names starting with \"Chi\" (God), \"Nneka\" (mother is supreme), \"Eze\" (king), and \"Nwa\" (child)',\n", + " \"Yoruba\": 'names starting with \"Ade\" (crown), \"Olu\" (God/Lord), \"Ayo\" (joy), and \"Ife\" (love)',\n", + " \"Hausa\": 'names like \"Aisha\", \"Fatima\", \"Muhammad\", and \"Ibrahim\" reflecting Islamic influence',\n", + " }\n", + " naming_pattern = tribe_patterns.get(\n", + " tribe,\n", + " \"meaningful translations and reflect the circumstances of birth, family values, or spiritual beliefs\"\n", + " )\n", + "\n", + " system_prompt = f\"\"\"You are a Nigerian name generator specializing in {tribe} names. When asked to generate names, follow these rules:\n", + "\n", + "1. Generate exactly {number} unique first names from the {tribe} tribe of Nigeria\n", + "2. Never repeat any name in your list\n", + "3. Provide only first names (no surnames or family names)\n", + "4. Use authentic {tribe} names with their traditional spellings\n", + "5. Include a mix of male and female names unless otherwise specified\n", + "6. Present the names in a simple numbered list format\n", + "7. After the list, you may optionally provide brief context about {tribe} naming traditions if requested\n", + "\n", + "{tribe} names often have {naming_pattern}.\n", + "\n", + "Ensure all names are culturally authentic and respectful of {tribe} heritage.\"\"\"\n", + "\n", + " messages = [\n", + "\n", + " {\"role\": \"system\", \"content\": system_prompt},\n", + " {\"role\": \"user\", \"content\": f\"Generate a list of {number} Nigerian {tribe} names\"}\n", + " ]\n", + "\n", + " return messages" + ], + "metadata": { + "id": "xq8dGEiXYSdz" + }, + "execution_count": 18, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "quant_config = BitsAndBytesConfig(\n", + " load_in_4bit=True,\n", + " bnb_4bit_use_double_quant=True,\n", + " bnb_4bit_compute_dtype=torch.bfloat16,\n", + " bnb_4bit_quant_type=\"nf4\"\n", + ")" + ], + "metadata": { + "id": "4IhI6PR4Yn8v" + }, + "execution_count": 19, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "def generate_names_interface(tribe, number):\n", + "\n", + " try:\n", + " messages = generate_prompt(tribe, number)\n", + " tokenizer = AutoTokenizer.from_pretrained(PHI)\n", + " tokenizer.pad_token = tokenizer.eos_token\n", + " input_ids = tokenizer.apply_chat_template(\n", + " messages,\n", + " return_tensors=\"pt\",\n", + " add_generation_prompt=True\n", + " ).to(\"cuda\")\n", + "\n", + " attention_mask = torch.ones_like(input_ids, dtype=torch.long, device=\"cuda\")\n", + " model = AutoModelForCausalLM.from_pretrained(\n", + " PHI,\n", + " quantization_config=quant_config\n", + " ).to(\"cuda\")\n", + " outputs = model.generate(\n", + " input_ids=input_ids,\n", + " attention_mask=attention_mask,\n", + " max_new_tokens=300,\n", + " do_sample=True,\n", + " temperature=0.7\n", + " )\n", + "\n", + " generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)\n", + "\n", + " # Extract only the assistant's response\n", + " if \"<|assistant|>\" in generated_text:\n", + " result = generated_text.split(\"<|assistant|>\")[-1].strip()\n", + " else:\n", + " result = generated_text\n", + "\n", + " del model\n", + " torch.cuda.empty_cache()\n", + " gc.collect()\n", + "\n", + " return result\n", + "\n", + " except Exception as e:\n", + " return f\"Error generating names: {str(e)}\"" + ], + "metadata": { + "id": "QPmegk3bZdHy" + }, + "execution_count": 20, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "def create_interface():\n", + " with gr.Blocks(title=\"Nigerian Names Generator\", theme=gr.themes.Soft()) as demo:\n", + " gr.Markdown(\"# 🇳🇬 Nigerian Names Dataset Generator\")\n", + " gr.Markdown(\"Generate authentic Nigerian names from the Igbo, Yoruba, or Hausa tribes.\")\n", + "\n", + " with gr.Row():\n", + " with gr.Column():\n", + " tribe_dropdown = gr.Dropdown(\n", + " choices=[\"Igbo\", \"Yoruba\", \"Hausa\"],\n", + " label=\"Select Tribe\",\n", + " value=\"Igbo\",\n", + " info=\"Choose a Nigerian tribe\"\n", + " )\n", + "\n", + " number_slider = gr.Slider(\n", + " minimum=1,\n", + " maximum=20,\n", + " step=1,\n", + " value=10,\n", + " label=\"Number of Names\",\n", + " info=\"How many names do you want to generate?\"\n", + " )\n", + "\n", + " generate_btn = gr.Button(\"Generate Names\", variant=\"primary\", size=\"lg\")\n", + "\n", + " with gr.Column():\n", + " output_text = gr.Textbox(\n", + " label=\"Generated Names\",\n", + " lines=15,\n", + " placeholder=\"Your generated names will appear here...\",\n", + " show_copy_button=True\n", + " )\n", + "\n", + " gr.Markdown(\"\"\"\n", + " ### About\n", + " This tool generates authentic Nigerian names based on traditional naming conventions:\n", + " - **Igbo**: Names often reflect spiritual beliefs (Chi - God, Eze - King)\n", + " - **Yoruba**: Names reflect circumstances of birth (Ade - Crown, Ayo - Joy)\n", + " - **Hausa**: Names often have Islamic influence\n", + " \"\"\")\n", + "\n", + " # Connect the button to the function\n", + " generate_btn.click(\n", + " fn=generate_names_interface,\n", + " inputs=[tribe_dropdown, number_slider],\n", + " outputs=output_text\n", + " )\n", + "\n", + " return demo" + ], + "metadata": { + "id": "Svo24KUom4a5" + }, + "execution_count": 22, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "demo = create_interface()\n", + "demo.launch(share=True)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 612 + }, + "id": "b8fDedDHo7uk", + "outputId": "e05a7bf0-3953-4216-a0c0-515bb6d8be05" + }, + "execution_count": 23, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Colab notebook detected. To show errors in colab notebook, set debug=True in launch()\n", + "* Running on public URL: https://a52406050690b5663b.gradio.live\n", + "\n", + "This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "" + ], + "text/html": [ + "
" + ] + }, + "metadata": {} + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [] + }, + "metadata": {}, + "execution_count": 23 + } + ] + } + ] +} \ No newline at end of file