From 533c49b6e47396ac4f9c25b3e1e965f2eff6bfe4 Mon Sep 17 00:00:00 2001 From: twc-kwabena Date: Wed, 29 Oct 2025 08:57:50 -0400 Subject: [PATCH] kwabena_bootcamp --- .../kwabena/expert resume creator.ipynb | 511 ++++++++++++++++++ 1 file changed, 511 insertions(+) create mode 100644 week5/community-contributions/kwabena/expert resume creator.ipynb diff --git a/week5/community-contributions/kwabena/expert resume creator.ipynb b/week5/community-contributions/kwabena/expert resume creator.ipynb new file mode 100644 index 0000000..e431b26 --- /dev/null +++ b/week5/community-contributions/kwabena/expert resume creator.ipynb @@ -0,0 +1,511 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "85b93c49", + "metadata": {}, + "source": [ + "# Expert Resume Creator" + ] + }, + { + "cell_type": "markdown", + "id": "8f90fe9a", + "metadata": {}, + "source": [ + " In this exercise, we'll build a RAG-powered resume refinement tool that helps tailor resumes to specific job descriptions.\n", + " \n", + " What We'll Build\n", + " An AI assistant that takes a job description and current resume, then produces an optimized version using resume writing best practices.\n", + " \n", + " The Approach (RAG)\n", + " 1. **Generate Knowledge Base** - Use an LLM to create expert resume writing guides\n", + " 2. **Create Vector Database** - Store the knowledge in Chroma for semantic search\n", + " 3. **Build Interface** - Create a Gradio app where users can refine their resumes\n", + " \n", + " Steps\n", + " - **STEP 1**: Generate synthetic resume writing knowledge using LLM\n", + " - **STEP 2**: Load documents and create RAG with Chroma vector database\n", + " - **STEP 3**: Build Gradio interface for users to input job description and resume\n", + " \n", + " ---\n", + " \n", + " Let's get started! šŸš€" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1f889c1d", + "metadata": {}, + "outputs": [], + "source": [ + "# imports\n", + "\n", + "import os\n", + "import glob\n", + "from dotenv import load_dotenv\n", + "import gradio as gr" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3711bc34", + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.document_loaders import DirectoryLoader, TextLoader\n", + "from langchain.text_splitter import CharacterTextSplitter\n", + "from langchain.schema import Document\n", + "from langchain_openai import OpenAIEmbeddings, ChatOpenAI\n", + "from langchain_chroma import Chroma\n", + "from langchain.memory import ConversationBufferMemory\n", + "from langchain.chains import ConversationalRetrievalChain\n", + "from langchain.embeddings import HuggingFaceEmbeddings" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "840999d8", + "metadata": {}, + "outputs": [], + "source": [ + "# Configuration\n", + "MODEL = \"gpt-4o-mini\"\n", + "db_name = \"resume_vector_db\"\n", + "KNOWLEDGE_BASE_DIR = \"resume-knowledge-base\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4695238c", + "metadata": {}, + "outputs": [], + "source": [ + "#load environment variables\n", + "load_dotenv(override=True)\n", + "os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY', 'your-key-if-not-using-env')" + ] + }, + { + "cell_type": "markdown", + "id": "37ce61e4", + "metadata": {}, + "source": [ + "### STEP 1 - Programmatically Generate Synthetic Resume Knowledge Base" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f6257788", + "metadata": {}, + "outputs": [], + "source": [ + "def generate_content_with_llm(topic, category):\n", + " \"\"\"Use LLM to generate content for a specific topic\"\"\"\n", + " \n", + " llm = ChatOpenAI(temperature=0.8, model_name=MODEL)\n", + " \n", + " prompts = {\n", + " \"best-practices\": f\"\"\"You are an expert resume writer and career coach. Write a comprehensive guide about: {topic}\n", + "\n", + " Create a detailed markdown document with:\n", + " - Clear section headers\n", + " - Specific, actionable advice\n", + " - Multiple concrete examples\n", + " - Do's and don'ts\n", + " - Real-world tips that hiring managers look for\n", + "\n", + " Write 500-800 words in markdown format. Be specific and practical.\"\"\",\n", + " \n", + " \"industry-specific\": f\"\"\"You are an expert resume writer specializing in {topic} industry resumes.\n", + "\n", + " Write a comprehensive industry guide covering:\n", + " - Key skills and technologies to highlight for {topic} roles\n", + " - How to structure experience for this industry\n", + " - Important keywords and terminology\n", + " - 5-8 example bullet points showing strong achievements with specific metrics\n", + " - Common mistakes to avoid\n", + " - What hiring managers in {topic} look for\n", + "\n", + " Write 600-900 words in markdown format with specific examples.\"\"\",\n", + " \n", + " \"examples\": f\"\"\"You are an expert resume writer. Create detailed examples for: {topic}\n", + "\n", + " Provide:\n", + " - 3-4 complete, realistic examples showing proper formatting\n", + " - Each example should include company name, dates, and 4-6 bullet points\n", + " - Bullet points must include quantified achievements (numbers, percentages, dollar amounts)\n", + " - Show variety in roles (junior, mid-level, senior)\n", + " - Use strong action verbs\n", + " - Demonstrate clear impact and results\n", + "\n", + " Write in markdown format. Make examples realistic and impressive.\"\"\",\n", + " \n", + " \"specialized\": f\"\"\"You are an expert in resume writing for {topic}.\n", + "\n", + " Create a comprehensive guide covering:\n", + " - Unique considerations for {topic}\n", + " - Best practices and formatting tips\n", + " - 6-10 strong example bullet points with metrics\n", + " - Common questions and how to address them\n", + " - What makes a standout resume in this area\n", + "\n", + " Write 500-700 words in markdown format.\"\"\"\n", + " }\n", + " \n", + " prompt = prompts.get(category, prompts[\"best-practices\"])\n", + " response = llm.invoke(prompt)\n", + " return response.content" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6a3e0c62", + "metadata": {}, + "outputs": [], + "source": [ + "def create_resume_knowledge_base():\n", + " \"\"\"Programmatically generate comprehensive resume knowledge base using LLM\"\"\"\n", + " \n", + " print(\"šŸ¤– Starting LLM-powered knowledge base generation...\")\n", + " print(\"ā³ This may take 2-3 minutes to generate all content...\\n\")\n", + " \n", + " # Create directory structure\n", + " os.makedirs(f\"{KNOWLEDGE_BASE_DIR}/best-practices\", exist_ok=True)\n", + " os.makedirs(f\"{KNOWLEDGE_BASE_DIR}/examples\", exist_ok=True)\n", + " os.makedirs(f\"{KNOWLEDGE_BASE_DIR}/industry-specific\", exist_ok=True)\n", + " os.makedirs(f\"{KNOWLEDGE_BASE_DIR}/specialized\", exist_ok=True)\n", + " \n", + " # Define topics for each category\n", + " topics = {\n", + " \"best-practices\": [\n", + " \"Resume Formatting and Structure\",\n", + " \"Powerful Action Verbs and Keywords\",\n", + " \"Quantifying Achievements and Impact\",\n", + " \"Tailoring Resume to Job Descriptions\",\n", + " \"ATS (Applicant Tracking System) Optimization\",\n", + " \"Common Resume Mistakes to Avoid\"\n", + " ],\n", + " \"industry-specific\": [\n", + " \"Software Engineering and Technology\",\n", + " \"Data Science and Machine Learning\",\n", + " \"Business and Marketing\",\n", + " \"Finance and Accounting\",\n", + " \"Healthcare and Medical\",\n", + " \"Product Management\"\n", + " ],\n", + " \"examples\": [\n", + " \"Strong Experience Section Examples\",\n", + " \"Skills Section Formatting\",\n", + " \"Project Descriptions for Technical Roles\",\n", + " \"Leadership and Management Achievements\",\n", + " \"Entry-Level Resume Examples\"\n", + " ],\n", + " \"specialized\": [\n", + " \"Career Changers and Transitions\",\n", + " \"Recent Graduates and Internships\",\n", + " \"Executive and C-Level Resumes\",\n", + " \"Freelance and Contract Work\",\n", + " \"Career Gaps and Explanations\"\n", + " ]\n", + " }\n", + " \n", + " total_files = sum(len(topic_list) for topic_list in topics.values())\n", + " current_file = 0\n", + " \n", + " # Generate content for each category and topic\n", + " for category, topic_list in topics.items():\n", + " for topic in topic_list:\n", + " current_file += 1\n", + " print(f\"[{current_file}/{total_files}] Generating: {category}/{topic}...\")\n", + " \n", + " try:\n", + " # Generate content using LLM\n", + " content = generate_content_with_llm(topic, category)\n", + " \n", + " # Create filename from topic\n", + " filename = topic.lower().replace(\" \", \"-\").replace(\"(\", \"\").replace(\")\", \"\") + \".md\"\n", + " filepath = f\"{KNOWLEDGE_BASE_DIR}/{category}/{filename}\"\n", + " \n", + " # Add title to content\n", + " full_content = f\"# {topic}\\n\\n{content}\"\n", + " \n", + " # Write to file\n", + " with open(filepath, \"w\", encoding=\"utf-8\") as f:\n", + " f.write(full_content)\n", + " \n", + " print(f\" āœ… Saved to {category}/{filename}\")\n", + " \n", + " except Exception as e:\n", + " print(f\" āŒ Error generating {topic}: {str(e)}\")\n", + " continue\n", + " \n", + " print(f\"\\nāœ… Knowledge base generation complete!\")\n", + " print(f\"šŸ“ Created {total_files} files across 4 categories:\")\n", + " print(f\" - {len(topics['best-practices'])} best practice guides\")\n", + " print(f\" - {len(topics['industry-specific'])} industry-specific guides\")\n", + " print(f\" - {len(topics['examples'])} example collections\")\n", + " print(f\" - {len(topics['specialized'])} specialized guides\")\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a7257b2b", + "metadata": {}, + "outputs": [], + "source": [ + "# Run this to create the knowledge base\n", + "create_resume_knowledge_base()" + ] + }, + { + "cell_type": "markdown", + "id": "292a8d84", + "metadata": {}, + "source": [ + "### Load and Process Documents" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d8c18a52", + "metadata": {}, + "outputs": [], + "source": [ + " # Read in documents using LangChain's loaders\n", + "folders = glob.glob(f\"{KNOWLEDGE_BASE_DIR}/*\")\n", + "\n", + "def add_metadata(doc, doc_type):\n", + " doc.metadata[\"doc_type\"] = doc_type\n", + " return doc\n", + "\n", + "text_loader_kwargs = {'encoding': 'utf-8'}\n", + "\n", + "documents = []\n", + "for folder in folders:\n", + " doc_type = os.path.basename(folder)\n", + " loader = DirectoryLoader(folder, glob=\"**/*.md\", loader_cls=TextLoader, loader_kwargs=text_loader_kwargs)\n", + " folder_docs = loader.load()\n", + " documents.extend([add_metadata(doc, doc_type) for doc in folder_docs])\n", + "\n", + "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=200)\n", + "chunks = text_splitter.split_documents(documents)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "567829d5", + "metadata": {}, + "outputs": [], + "source": [ + "print(f\"Total number of chunks: {len(chunks)}\")\n", + "print(f\"Document types found: {set(doc.metadata['doc_type'] for doc in documents)}\")" + ] + }, + { + "cell_type": "markdown", + "id": "12e5dfb1", + "metadata": {}, + "source": [ + "Create Vector Store" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "94239c9d", + "metadata": {}, + "outputs": [], + "source": [ + "# Using OpenAI embeddings (you can switch to HuggingFace for free alternative)\n", + "embeddings = OpenAIEmbeddings()\n", + "\n", + "# Alternative free option:\n", + "# embeddings = HuggingFaceEmbeddings(model_name=\"sentence-transformers/all-MiniLM-L6-v2\")\n", + "\n", + "# Delete if already exists\n", + "if os.path.exists(db_name):\n", + " Chroma(persist_directory=db_name, embedding_function=embeddings).delete_collection()\n", + "\n", + "# Create vectorstore\n", + "vectorstore = Chroma.from_documents(documents=chunks, embedding=embeddings, persist_directory=db_name)\n", + "print(f\"āœ… Vectorstore created with {vectorstore._collection.count()} documents\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "62554189", + "metadata": {}, + "source": [ + "Set up RAG Chain" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e2b349f5", + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "llm = ChatOpenAI(temperature=0.7, model_name=MODEL)\n", + "\n", + "# Alternative - use Ollama locally:\n", + "# llm = ChatOpenAI(temperature=0.7, model_name='llama3.2', base_url='http://localhost:11434/v1', api_key='ollama')\n", + "\n", + "memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)\n", + "retriever = vectorstore.as_retriever(search_kwargs={\"k\": 10})\n", + "conversation_chain = ConversationalRetrievalChain.from_llm(llm=llm, retriever=retriever, memory=memory)\n", + "\n", + "print(\"āœ… RAG chain configured and ready\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "280ad157", + "metadata": {}, + "source": [ + "Create Resume Refinement Function" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f54e5573", + "metadata": {}, + "outputs": [], + "source": [ + "def refine_resume(job_description, current_resume, history=None):\n", + " \"\"\"\n", + " Refines a resume based on job description using RAG knowledge base\n", + " \"\"\"\n", + " # Reset memory for each new refinement\n", + " global conversation_chain, memory, llm, retriever\n", + " memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)\n", + " conversation_chain = ConversationalRetrievalChain.from_llm(llm=llm, retriever=retriever, memory=memory)\n", + " \n", + " prompt = f\"\"\"You are an expert resume writer with access to best practices and successful examples.\n", + "\n", + " JOB DESCRIPTION:\n", + " {job_description}\n", + "\n", + " CURRENT RESUME:\n", + " {current_resume}\n", + "\n", + " Please analyze the current resume and provide a refined version that:\n", + " 1. Aligns keywords and skills with the job description\n", + " 2. Uses strong action verbs and quantified achievements\n", + " 3. Follows formatting best practices\n", + " 4. Highlights most relevant experience for this role\n", + " 5. Removes or de-emphasizes less relevant information\n", + "\n", + " Provide the refined resume in a clear, professional format. Also include a brief \"KEY IMPROVEMENTS\" section at the end explaining the main changes you made and why.\n", + " \"\"\"\n", + " \n", + " result = conversation_chain.invoke({\"question\": prompt})\n", + " return result[\"answer\"]\n" + ] + }, + { + "cell_type": "markdown", + "id": "4efdfe8b", + "metadata": {}, + "source": [ + "Create Gradio Interface" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "dacb51de", + "metadata": {}, + "outputs": [], + "source": [ + "def create_gradio_interface():\n", + " with gr.Blocks(title=\"Expert Resume Creator\") as interface:\n", + " gr.Markdown(\"# šŸ“„ Expert Resume Creator\")\n", + " gr.Markdown(\"Refine your resume using AI-powered best practices and tailored optimization\")\n", + " \n", + " with gr.Row():\n", + " with gr.Column():\n", + " job_desc_input = gr.Textbox(\n", + " label=\"Job Description\",\n", + " placeholder=\"Paste the job description here...\",\n", + " lines=10\n", + " )\n", + " resume_input = gr.Textbox(\n", + " label=\"Your Current Resume\",\n", + " placeholder=\"Paste your current resume here...\",\n", + " lines=15\n", + " )\n", + " submit_btn = gr.Button(\"✨ Refine My Resume\", variant=\"primary\", size=\"lg\")\n", + " \n", + " with gr.Column():\n", + " output = gr.Textbox(\n", + " label=\"Refined Resume\",\n", + " lines=30,\n", + " show_copy_button=True\n", + " )\n", + " \n", + " gr.Markdown(\"### šŸ’” Tips\")\n", + " gr.Markdown(\"\"\"\n", + " - Include complete job description with requirements and responsibilities\n", + " - Paste your full resume including experience, education, and skills\n", + " - The AI will optimize your resume to match the job requirements\n", + " - Review the KEY IMPROVEMENTS section to understand the changes\n", + " \"\"\")\n", + " \n", + " submit_btn.click(\n", + " fn=refine_resume,\n", + " inputs=[job_desc_input, resume_input],\n", + " outputs=output\n", + " )\n", + " \n", + " return interface" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e01ddd13", + "metadata": {}, + "outputs": [], + "source": [ + "# Launch the interface\n", + "interface = create_gradio_interface()\n", + "interface.launch(inbrowser=True, share=False)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.4" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}