Merge pull request #770 from Tochi-Nwachukwu/main
[Bootcamp] - Tochi - Submission for Week 3 Exercise
This commit is contained in:
@@ -0,0 +1,351 @@
|
|||||||
|
{
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 0,
|
||||||
|
"metadata": {
|
||||||
|
"colab": {
|
||||||
|
"provenance": [],
|
||||||
|
"gpuType": "T4"
|
||||||
|
},
|
||||||
|
"kernelspec": {
|
||||||
|
"name": "python3",
|
||||||
|
"display_name": "Python 3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"name": "python"
|
||||||
|
},
|
||||||
|
"accelerator": "GPU"
|
||||||
|
},
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"source": [
|
||||||
|
"#Nigerian Names Dataset Generator\n",
|
||||||
|
"### Project Description\n",
|
||||||
|
"This is a fun project that allows you to easily generate Nigerian names based of selectin the popular tribes in Nigeria."
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"id": "AVN03AKGhOHf"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"source": [
|
||||||
|
"### Installations"
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"id": "mzC6k8r9hz8T"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 1,
|
||||||
|
"metadata": {
|
||||||
|
"colab": {
|
||||||
|
"base_uri": "https://localhost:8080/"
|
||||||
|
},
|
||||||
|
"id": "WvBQWxyFWJFR",
|
||||||
|
"outputId": "ed902e29-cb0f-44fe-f714-6cfbf7584453"
|
||||||
|
},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"output_type": "stream",
|
||||||
|
"name": "stdout",
|
||||||
|
"text": [
|
||||||
|
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m60.1/60.1 MB\u001b[0m \u001b[31m12.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
||||||
|
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m375.8/375.8 kB\u001b[0m \u001b[31m18.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
||||||
|
"\u001b[?25h"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"!pip install -q --upgrade bitsandbytes accelerate gradio"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"source": [
|
||||||
|
"### Imports"
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"id": "zyzd851bh64j"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"source": [
|
||||||
|
"# Imports\n",
|
||||||
|
"import os\n",
|
||||||
|
"import requests\n",
|
||||||
|
"import json\n",
|
||||||
|
"from huggingface_hub import login\n",
|
||||||
|
"from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer, BitsAndBytesConfig\n",
|
||||||
|
"import torch\n",
|
||||||
|
"import gradio as gr\n",
|
||||||
|
"from google.colab import userdata\n",
|
||||||
|
"import gc"
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"id": "09JXEWAdWaNf"
|
||||||
|
},
|
||||||
|
"execution_count": 2,
|
||||||
|
"outputs": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"source": [
|
||||||
|
"### Logging into Huggingface"
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"id": "DK0MOMG2iAi0"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"source": [
|
||||||
|
"hf_token = userdata.get('HF_TOKEN')\n",
|
||||||
|
"login(hf_token, add_to_git_credential=True)"
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"id": "19eMcQjoX9gq"
|
||||||
|
},
|
||||||
|
"execution_count": 3,
|
||||||
|
"outputs": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"source": [
|
||||||
|
"PHI = \"microsoft/Phi-4-mini-instruct\""
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"id": "oqXIBtlaYLr8"
|
||||||
|
},
|
||||||
|
"execution_count": 4,
|
||||||
|
"outputs": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"source": [
|
||||||
|
"### System Prompt and User prompt Template"
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"id": "kBzXo-JPiHVF"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"source": [
|
||||||
|
"def generate_prompt(tribe, number):\n",
|
||||||
|
" tribe_patterns = {\n",
|
||||||
|
" \"Igbo\": 'names starting with \"Chi\" (God), \"Nneka\" (mother is supreme), \"Eze\" (king), and \"Nwa\" (child)',\n",
|
||||||
|
" \"Yoruba\": 'names starting with \"Ade\" (crown), \"Olu\" (God/Lord), \"Ayo\" (joy), and \"Ife\" (love)',\n",
|
||||||
|
" \"Hausa\": 'names like \"Aisha\", \"Fatima\", \"Muhammad\", and \"Ibrahim\" reflecting Islamic influence',\n",
|
||||||
|
" }\n",
|
||||||
|
" naming_pattern = tribe_patterns.get(\n",
|
||||||
|
" tribe,\n",
|
||||||
|
" \"meaningful translations and reflect the circumstances of birth, family values, or spiritual beliefs\"\n",
|
||||||
|
" )\n",
|
||||||
|
"\n",
|
||||||
|
" system_prompt = f\"\"\"You are a Nigerian name generator specializing in {tribe} names. When asked to generate names, follow these rules:\n",
|
||||||
|
"\n",
|
||||||
|
"1. Generate exactly {number} unique first names from the {tribe} tribe of Nigeria\n",
|
||||||
|
"2. Never repeat any name in your list\n",
|
||||||
|
"3. Provide only first names (no surnames or family names)\n",
|
||||||
|
"4. Use authentic {tribe} names with their traditional spellings\n",
|
||||||
|
"5. Include a mix of male and female names unless otherwise specified\n",
|
||||||
|
"6. Present the names in a simple numbered list format\n",
|
||||||
|
"7. After the list, you may optionally provide brief context about {tribe} naming traditions if requested\n",
|
||||||
|
"\n",
|
||||||
|
"{tribe} names often have {naming_pattern}.\n",
|
||||||
|
"\n",
|
||||||
|
"Ensure all names are culturally authentic and respectful of {tribe} heritage.\"\"\"\n",
|
||||||
|
"\n",
|
||||||
|
" messages = [\n",
|
||||||
|
"\n",
|
||||||
|
" {\"role\": \"system\", \"content\": system_prompt},\n",
|
||||||
|
" {\"role\": \"user\", \"content\": f\"Generate a list of {number} Nigerian {tribe} names\"}\n",
|
||||||
|
" ]\n",
|
||||||
|
"\n",
|
||||||
|
" return messages"
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"id": "xq8dGEiXYSdz"
|
||||||
|
},
|
||||||
|
"execution_count": 18,
|
||||||
|
"outputs": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"source": [
|
||||||
|
"quant_config = BitsAndBytesConfig(\n",
|
||||||
|
" load_in_4bit=True,\n",
|
||||||
|
" bnb_4bit_use_double_quant=True,\n",
|
||||||
|
" bnb_4bit_compute_dtype=torch.bfloat16,\n",
|
||||||
|
" bnb_4bit_quant_type=\"nf4\"\n",
|
||||||
|
")"
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"id": "4IhI6PR4Yn8v"
|
||||||
|
},
|
||||||
|
"execution_count": 19,
|
||||||
|
"outputs": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"source": [
|
||||||
|
"def generate_names_interface(tribe, number):\n",
|
||||||
|
"\n",
|
||||||
|
" try:\n",
|
||||||
|
" messages = generate_prompt(tribe, number)\n",
|
||||||
|
" tokenizer = AutoTokenizer.from_pretrained(PHI)\n",
|
||||||
|
" tokenizer.pad_token = tokenizer.eos_token\n",
|
||||||
|
" input_ids = tokenizer.apply_chat_template(\n",
|
||||||
|
" messages,\n",
|
||||||
|
" return_tensors=\"pt\",\n",
|
||||||
|
" add_generation_prompt=True\n",
|
||||||
|
" ).to(\"cuda\")\n",
|
||||||
|
"\n",
|
||||||
|
" attention_mask = torch.ones_like(input_ids, dtype=torch.long, device=\"cuda\")\n",
|
||||||
|
" model = AutoModelForCausalLM.from_pretrained(\n",
|
||||||
|
" PHI,\n",
|
||||||
|
" quantization_config=quant_config\n",
|
||||||
|
" ).to(\"cuda\")\n",
|
||||||
|
" outputs = model.generate(\n",
|
||||||
|
" input_ids=input_ids,\n",
|
||||||
|
" attention_mask=attention_mask,\n",
|
||||||
|
" max_new_tokens=300,\n",
|
||||||
|
" do_sample=True,\n",
|
||||||
|
" temperature=0.7\n",
|
||||||
|
" )\n",
|
||||||
|
"\n",
|
||||||
|
" generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)\n",
|
||||||
|
"\n",
|
||||||
|
" # Extract only the assistant's response\n",
|
||||||
|
" if \"<|assistant|>\" in generated_text:\n",
|
||||||
|
" result = generated_text.split(\"<|assistant|>\")[-1].strip()\n",
|
||||||
|
" else:\n",
|
||||||
|
" result = generated_text\n",
|
||||||
|
"\n",
|
||||||
|
" del model\n",
|
||||||
|
" torch.cuda.empty_cache()\n",
|
||||||
|
" gc.collect()\n",
|
||||||
|
"\n",
|
||||||
|
" return result\n",
|
||||||
|
"\n",
|
||||||
|
" except Exception as e:\n",
|
||||||
|
" return f\"Error generating names: {str(e)}\""
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"id": "QPmegk3bZdHy"
|
||||||
|
},
|
||||||
|
"execution_count": 20,
|
||||||
|
"outputs": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"source": [
|
||||||
|
"def create_interface():\n",
|
||||||
|
" with gr.Blocks(title=\"Nigerian Names Generator\", theme=gr.themes.Soft()) as demo:\n",
|
||||||
|
" gr.Markdown(\"# 🇳🇬 Nigerian Names Dataset Generator\")\n",
|
||||||
|
" gr.Markdown(\"Generate authentic Nigerian names from the Igbo, Yoruba, or Hausa tribes.\")\n",
|
||||||
|
"\n",
|
||||||
|
" with gr.Row():\n",
|
||||||
|
" with gr.Column():\n",
|
||||||
|
" tribe_dropdown = gr.Dropdown(\n",
|
||||||
|
" choices=[\"Igbo\", \"Yoruba\", \"Hausa\"],\n",
|
||||||
|
" label=\"Select Tribe\",\n",
|
||||||
|
" value=\"Igbo\",\n",
|
||||||
|
" info=\"Choose a Nigerian tribe\"\n",
|
||||||
|
" )\n",
|
||||||
|
"\n",
|
||||||
|
" number_slider = gr.Slider(\n",
|
||||||
|
" minimum=1,\n",
|
||||||
|
" maximum=20,\n",
|
||||||
|
" step=1,\n",
|
||||||
|
" value=10,\n",
|
||||||
|
" label=\"Number of Names\",\n",
|
||||||
|
" info=\"How many names do you want to generate?\"\n",
|
||||||
|
" )\n",
|
||||||
|
"\n",
|
||||||
|
" generate_btn = gr.Button(\"Generate Names\", variant=\"primary\", size=\"lg\")\n",
|
||||||
|
"\n",
|
||||||
|
" with gr.Column():\n",
|
||||||
|
" output_text = gr.Textbox(\n",
|
||||||
|
" label=\"Generated Names\",\n",
|
||||||
|
" lines=15,\n",
|
||||||
|
" placeholder=\"Your generated names will appear here...\",\n",
|
||||||
|
" show_copy_button=True\n",
|
||||||
|
" )\n",
|
||||||
|
"\n",
|
||||||
|
" gr.Markdown(\"\"\"\n",
|
||||||
|
" ### About\n",
|
||||||
|
" This tool generates authentic Nigerian names based on traditional naming conventions:\n",
|
||||||
|
" - **Igbo**: Names often reflect spiritual beliefs (Chi - God, Eze - King)\n",
|
||||||
|
" - **Yoruba**: Names reflect circumstances of birth (Ade - Crown, Ayo - Joy)\n",
|
||||||
|
" - **Hausa**: Names often have Islamic influence\n",
|
||||||
|
" \"\"\")\n",
|
||||||
|
"\n",
|
||||||
|
" # Connect the button to the function\n",
|
||||||
|
" generate_btn.click(\n",
|
||||||
|
" fn=generate_names_interface,\n",
|
||||||
|
" inputs=[tribe_dropdown, number_slider],\n",
|
||||||
|
" outputs=output_text\n",
|
||||||
|
" )\n",
|
||||||
|
"\n",
|
||||||
|
" return demo"
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"id": "Svo24KUom4a5"
|
||||||
|
},
|
||||||
|
"execution_count": 22,
|
||||||
|
"outputs": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"source": [
|
||||||
|
"demo = create_interface()\n",
|
||||||
|
"demo.launch(share=True)"
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"colab": {
|
||||||
|
"base_uri": "https://localhost:8080/",
|
||||||
|
"height": 612
|
||||||
|
},
|
||||||
|
"id": "b8fDedDHo7uk",
|
||||||
|
"outputId": "e05a7bf0-3953-4216-a0c0-515bb6d8be05"
|
||||||
|
},
|
||||||
|
"execution_count": 23,
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"output_type": "stream",
|
||||||
|
"name": "stdout",
|
||||||
|
"text": [
|
||||||
|
"Colab notebook detected. To show errors in colab notebook, set debug=True in launch()\n",
|
||||||
|
"* Running on public URL: https://a52406050690b5663b.gradio.live\n",
|
||||||
|
"\n",
|
||||||
|
"This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"output_type": "display_data",
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"<IPython.core.display.HTML object>"
|
||||||
|
],
|
||||||
|
"text/html": [
|
||||||
|
"<div><iframe src=\"https://a52406050690b5663b.gradio.live\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"metadata": {}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"output_type": "execute_result",
|
||||||
|
"data": {
|
||||||
|
"text/plain": []
|
||||||
|
},
|
||||||
|
"metadata": {},
|
||||||
|
"execution_count": 23
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user