From 998d04f8a374d3ce741fafc38ddf45d45bebb6d4 Mon Sep 17 00:00:00 2001
From: aashahid <sp21-bcs-034@cuilahore.edu.pk>
Date: Tue, 28 Oct 2025 23:19:41 +0500
Subject: [PATCH] Add Week 3 submission for muhammad_qasim_sheikh

---
 .../Day 5/synthetic_data_generator.ipynb      | 172 ++++++++++++++++++
 1 file changed, 172 insertions(+)
 create mode 100644 community-contributions/muhammad_qasim_sheikh/Week 3/Day 5/synthetic_data_generator.ipynb

diff --git a/community-contributions/muhammad_qasim_sheikh/Week 3/Day 5/synthetic_data_generator.ipynb b/community-contributions/muhammad_qasim_sheikh/Week 3/Day 5/synthetic_data_generator.ipynb
new file mode 100644
index 0000000..ac3a30a
--- /dev/null
+++ b/community-contributions/muhammad_qasim_sheikh/Week 3/Day 5/synthetic_data_generator.ipynb	
@@ -0,0 +1,172 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "236461b6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "import json\n",
+    "from dotenv import load_dotenv\n",
+    "import gradio as gr\n",
+    "import json\n",
+    "from openai import OpenAI\n",
+    "import re"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "4c493ebf",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "load_dotenv(override=True)\n",
+    "api_key = os.getenv('OPENAI_API_KEY')\n",
+    "    \n",
+    "client = OpenAI()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "349fa758",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "system_prompt = \"\"\"\n",
+    "    You are an expert technical writer and knowledge engineer.\n",
+    "    Your task is to generate well-structured Markdown (.md) documentation files that can be used as a knowledge base for a RAG.\n",
+    "\n",
+    "    Follow these rules carefully:\n",
+    "    1. Write the content in clear, concise Markdown format.\n",
+    "    2. Use appropriate Markdown headers (#, ##, ###) to structure the document.\n",
+    "    3. Include lists, tables, or code blocks only when necessary.\n",
+    "    4. Keep each document self-contained and focused on a single topic.\n",
+    "    5. Do not include any text outside the Markdown content (no explanations, no code fences).\n",
+    "    6. The style should be factual, structured, and helpful for machine retrieval.\n",
+    "    7. Use consistent tone and terminology across sections.\n",
+    "    \"\"\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "e65071d6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def create_kb_prompt(topic, kb_type=\"tutorial\"):\n",
+    "    return f\"\"\"\n",
+    "    Generate a comprehensive Markdown document for the following technical topic.\n",
+    "    Topic: {topic}\n",
+    "    Document Type: {kb_type}\n",
+    "    The document should include structured sections, concise explanations, and clear formatting suitable for a technical knowledge base.\n",
+    "    \"\"\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "1045db44",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def generate_markdown_doc(topic, kb_type=\"tutorial\"):\n",
+    "    \n",
+    "    user_prompt = create_kb_prompt(topic, kb_type)\n",
+    "    messages = [\n",
+    "        {\"role\": \"system\", \"content\": system_prompt},\n",
+    "        {\"role\": \"user\", \"content\": user_prompt},\n",
+    "    ]\n",
+    "    \n",
+    "    response = client.chat.completions.create(\n",
+    "        model=\"gpt-4o-mini\",\n",
+    "        messages=messages,\n",
+    "        temperature=0.7\n",
+    "    )\n",
+    "    markdown_output = response.choices[0].message.content.strip()\n",
+    "    markdown_output = re.sub(r'^```[a-z]*\\\\s*', '', markdown_output, flags=re.MULTILINE)\n",
+    "    markdown_output = re.sub(r'\\\\s*```$', '', markdown_output, flags=re.MULTILINE)\n",
+    "    return markdown_output"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "24ba021b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def create_kb_gradio_interface():\n",
+    "    with gr.Blocks(theme=gr.themes.Soft()) as app:\n",
+    "        gr.Markdown(\"## Technical Knowledge Base Generator\")\n",
+    "\n",
+    "        with gr.Row():\n",
+    "            with gr.Column():\n",
+    "                topic_input = gr.Textbox(\n",
+    "                    label=\"Technical Topic\",\n",
+    "                    placeholder=\"e.g., Building a RAG pipeline with LangChain...\",\n",
+    "                    lines=2\n",
+    "                )\n",
+    "                kb_type_input = gr.Radio(\n",
+    "                    label=\"Document Type\",\n",
+    "                    choices=[\"Overview\", \"FAQ\", \"Use Case\"],\n",
+    "                    value=\"FAQ\"\n",
+    "                )\n",
+    "                generate_button = gr.Button(\"Generate Markdown Document\", variant=\"primary\")\n",
+    "\n",
+    "            with gr.Column():\n",
+    "                output_md = gr.Textbox(\n",
+    "                    label=\"Generated Markdown Content\",\n",
+    "                    lines=25,\n",
+    "                    interactive=False,\n",
+    "                    placeholder=\"Generated Markdown will appear here...\"\n",
+    "                )\n",
+    "\n",
+    "        generate_button.click(\n",
+    "            fn=generate_markdown_doc,\n",
+    "            inputs=[topic_input, kb_type_input],\n",
+    "            outputs=[output_md],\n",
+    "            api_name=\"generate_kb_doc\"\n",
+    "        )\n",
+    "\n",
+    "    return app"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "db17cde4",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "app = create_kb_gradio_interface()\n",
+    "app.launch(debug=True, share=True)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "llm-engineering",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}