Merge pull request #795 from CosmusMutuku/week4-exercise
Cosmus_Week5_Exercise.ipynb (Andela GenAI Bootcamp)
This commit is contained in:
307
week5/community-contributions/Cosmus_Week5_Exercise.ipynb
Normal file
307
week5/community-contributions/Cosmus_Week5_Exercise.ipynb
Normal file
@@ -0,0 +1,307 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "d04a7c55",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"#Importing necessary libraries\n",
|
||||||
|
"import os\n",
|
||||||
|
"from dotenv import load_dotenv\n",
|
||||||
|
"from anthropic import Client\n",
|
||||||
|
"from dotenv import load_dotenv\n",
|
||||||
|
"import sys\n",
|
||||||
|
"from faker import Faker\n",
|
||||||
|
"import random\n",
|
||||||
|
"import gradio as gr\n",
|
||||||
|
"from langchain_community.document_loaders import DirectoryLoader, TextLoader\n",
|
||||||
|
"from langchain_text_splitters import CharacterTextSplitter\n",
|
||||||
|
"from langchain_community.embeddings import HuggingFaceEmbeddings\n",
|
||||||
|
"from langchain_community.vectorstores import Chroma\n",
|
||||||
|
"from langchain_anthropic import ChatAnthropic\n",
|
||||||
|
"from langchain_classic.memory import ConversationBufferMemory\n",
|
||||||
|
"from langchain_classic.chains import ConversationalRetrievalChain\n",
|
||||||
|
"\n",
|
||||||
|
"!{sys.executable} -m pip install faker\n",
|
||||||
|
"\n",
|
||||||
|
"\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "3d7f8354",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"\n",
|
||||||
|
"# loading the .env variables\n",
|
||||||
|
"load_dotenv(override=True)\n",
|
||||||
|
"\n",
|
||||||
|
"# Force export to OS env so LangChain can detect it (had to try this because the key was not loading at some point but by the time i shared the code it loaded well so i commented it out)\n",
|
||||||
|
"#os.environ[\"ANTHROPIC_API_KEY\"] = os.getenv(\"ANTHROPIC_API_KEY\")\n",
|
||||||
|
"\n",
|
||||||
|
"#getting the key from the our .env file. It is Anthropic_API_KEY\n",
|
||||||
|
"ANTHROPIC_KEY = os.getenv(\"ANTHROPIC_API_KEY\")\n",
|
||||||
|
"client = Client(api_key=ANTHROPIC_KEY)\n",
|
||||||
|
"\n",
|
||||||
|
"# Checking the anthropic models list our anthropic key ca help us play with\n",
|
||||||
|
"models = client.models.list()\n",
|
||||||
|
"for model in models:\n",
|
||||||
|
" print(model.id)\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "20d11d1c",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"#Getting the python executable path on my notebook to know where to install the faker library\n",
|
||||||
|
"print(sys.executable)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "93a8f3ec",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"#Creating a fake person with faker\n",
|
||||||
|
"fake = Faker()\n",
|
||||||
|
"base_dir = \"knowledge_base\"\n",
|
||||||
|
"folders = [\"personal\", \"projects\", \"learning\"]\n",
|
||||||
|
"\n",
|
||||||
|
"# We now create folders if they don't exist\n",
|
||||||
|
"for folder in folders:\n",
|
||||||
|
" os.makedirs(f\"{base_dir}/{folder}\", exist_ok=True)\n",
|
||||||
|
"\n",
|
||||||
|
"# Check if data already exists\n",
|
||||||
|
"personal_file = f\"{base_dir}/personal/info.md\"\n",
|
||||||
|
"projects_file = f\"{base_dir}/projects/projects.md\"\n",
|
||||||
|
"learning_file = f\"{base_dir}/learning/learning.md\"\n",
|
||||||
|
"\n",
|
||||||
|
"#If the personal info file does not exist, create it\n",
|
||||||
|
"if not os.path.exists(personal_file):\n",
|
||||||
|
" name = fake.name()\n",
|
||||||
|
" profession = random.choice([\"Data Analyst\", \"Business Analyst\", \"Software Engineer\", \"AI Specialist\"])\n",
|
||||||
|
" bio = fake.paragraph(nb_sentences=5)\n",
|
||||||
|
" experience = \"\\n\".join([f\"- {fake.job()} at {fake.company()} ({fake.year()})\" for _ in range(3)])\n",
|
||||||
|
" \n",
|
||||||
|
" personal_text = f\"\"\"\n",
|
||||||
|
"# Personal Profile\n",
|
||||||
|
"Name: {name} \n",
|
||||||
|
"Profession: {profession} \n",
|
||||||
|
"\n",
|
||||||
|
"Bio: {bio}\n",
|
||||||
|
"\n",
|
||||||
|
"## Experience\n",
|
||||||
|
"{experience}\n",
|
||||||
|
"\"\"\"\n",
|
||||||
|
" with open(personal_file, \"w\") as f:\n",
|
||||||
|
" f.write(personal_text)\n",
|
||||||
|
" print(\"Personal info generated.\")\n",
|
||||||
|
"else:\n",
|
||||||
|
" #If the personal info file exists, skip the regeneration\n",
|
||||||
|
" print(\"ℹPersonal info already exists. Skipping regeneration.\")\n",
|
||||||
|
"\n",
|
||||||
|
"#doing the same for project file\n",
|
||||||
|
"if not os.path.exists(projects_file):\n",
|
||||||
|
" projects = \"\\n\".join([\n",
|
||||||
|
" f\"- **{fake.catch_phrase()}** — {fake.bs().capitalize()} for {fake.company()}.\"\n",
|
||||||
|
" for _ in range(5)\n",
|
||||||
|
" ])\n",
|
||||||
|
" projects_text = f\"\"\"\n",
|
||||||
|
"# Projects Portfolio\n",
|
||||||
|
"\n",
|
||||||
|
"Key Projects:\n",
|
||||||
|
"{projects}\n",
|
||||||
|
"\"\"\"\n",
|
||||||
|
" with open(projects_file, \"w\") as f:\n",
|
||||||
|
" f.write(projects_text)\n",
|
||||||
|
" print(\"Projects generated.\")\n",
|
||||||
|
"else:\n",
|
||||||
|
" print(\"ℹProjects already exist. Skipping regeneration.\")\n",
|
||||||
|
"\n",
|
||||||
|
"#same thing for learning file\n",
|
||||||
|
"if not os.path.exists(learning_file):\n",
|
||||||
|
" topics = [\"LangChain\", \"RAG Systems\", \"Vector Databases\", \"AI Ethics\", \"Prompt Engineering\", \"Data Visualization\"]\n",
|
||||||
|
" learning = \"\\n\".join([\n",
|
||||||
|
" f\"- {random.choice(topics)} — {fake.sentence(nb_words=8)}\"\n",
|
||||||
|
" for _ in range(6)\n",
|
||||||
|
" ])\n",
|
||||||
|
" learning_text = f\"\"\"\n",
|
||||||
|
"# Learning Journey\n",
|
||||||
|
"\n",
|
||||||
|
"Recent Topics and Notes:\n",
|
||||||
|
"{learning}\n",
|
||||||
|
"\"\"\"\n",
|
||||||
|
" with open(learning_file, \"w\") as f:\n",
|
||||||
|
" f.write(learning_text)\n",
|
||||||
|
" print(\"Learning notes generated.\")\n",
|
||||||
|
"else:\n",
|
||||||
|
" print(\"ℹLearning notes already exist. Skipping regeneration.\")\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "6fa19091",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"#loading the knowledge information from the knowledge_base folder\n",
|
||||||
|
"loader = DirectoryLoader(\"knowledge_base\", glob=\"**/*.md\", loader_cls=TextLoader)\n",
|
||||||
|
"documents = loader.load()\n",
|
||||||
|
"\n",
|
||||||
|
"#Splitting the documents into chunks\n",
|
||||||
|
"splitter = CharacterTextSplitter(chunk_size=100, chunk_overlap=80)\n",
|
||||||
|
"chunks = splitter.split_documents(documents)\n",
|
||||||
|
"\n",
|
||||||
|
"print(f\"Loaded {len(documents)} documents and created {len(chunks)} chunks.\")\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "7b9fc9a5",
|
||||||
|
"metadata": {},
|
||||||
|
"source": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "6dcdec41",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"#Creating the embeddings\n",
|
||||||
|
"embeddings = HuggingFaceEmbeddings(model_name=\"sentence-transformers/all-MiniLM-L6-v2\")\n",
|
||||||
|
"\n",
|
||||||
|
"# Chroma as the vector store\n",
|
||||||
|
"vectorstore = Chroma.from_documents(chunks, embeddings, persist_directory=\"chroma_db\")\n",
|
||||||
|
"vectorstore.persist()\n",
|
||||||
|
"\n",
|
||||||
|
"print(\"Vector store created and saved to 'chroma_db'.\")\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "99e4a99f",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"#Check Langchain version as they updated the version recently thus making it difficult to use it successfullt\n",
|
||||||
|
"print(langchain.__version__)\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "5dc1b6ce",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# The main Langchain Abstraction are: Memory, LLM, and Retriever\n",
|
||||||
|
"\n",
|
||||||
|
"# Memory for conversation history\n",
|
||||||
|
"memory = ConversationBufferMemory(\n",
|
||||||
|
" memory_key=\"chat_history\",\n",
|
||||||
|
" return_messages=True\n",
|
||||||
|
")\n",
|
||||||
|
"\n",
|
||||||
|
"# Using one of the Anthropic models from the list above to create the LLM\n",
|
||||||
|
"llm = ChatAnthropic(\n",
|
||||||
|
" model=\"claude-sonnet-4-5-20250929\",\n",
|
||||||
|
" temperature=0.6,\n",
|
||||||
|
" max_tokens=1024,\n",
|
||||||
|
" anthropic_api_key=ANTHROPIC_KEY\n",
|
||||||
|
")\n",
|
||||||
|
"\n",
|
||||||
|
"# Retriever from your vectorstore\n",
|
||||||
|
"retriever = vectorstore.as_retriever(search_kwargs={\"k\": 3})\n",
|
||||||
|
"\n",
|
||||||
|
"# Bringing everything together tConversational RAG Chain\n",
|
||||||
|
"conversation_chain = ConversationalRetrievalChain.from_llm(\n",
|
||||||
|
" llm=llm,\n",
|
||||||
|
" retriever=retriever,\n",
|
||||||
|
" memory=memory\n",
|
||||||
|
")\n",
|
||||||
|
"\n",
|
||||||
|
"print(\"Anthropic conversational retriever is ready!\")\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "6f93eea7",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"#fnc to create a chat interface\n",
|
||||||
|
"def chat(message, history):\n",
|
||||||
|
" if conversation_chain:\n",
|
||||||
|
" result = conversation_chain.invoke({\"question\": message})\n",
|
||||||
|
" return result[\"answer\"]\n",
|
||||||
|
" else:\n",
|
||||||
|
" # Retrieval-only fallback\n",
|
||||||
|
" docs = retriever.get_relevant_documents(message)\n",
|
||||||
|
" context = \"\\n\\n\".join([d.page_content for d in docs])\n",
|
||||||
|
" return f\"(Offline Mode)\\nTop relevant info:\\n\\n{context[:1000]}\"\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "aadf91b4",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"#used som css to make the chat interface look better, and dark mode. I love dark mode btw\n",
|
||||||
|
"css = \"\"\"\n",
|
||||||
|
"body {background-color: #0f1117; color: #e6e6e6;}\n",
|
||||||
|
".gradio-container {background-color: #0f1117 !important;}\n",
|
||||||
|
"textarea, input, .wrap.svelte-1ipelgc {background-color: #1b1f2a !important; color: #ffffff !important;}\n",
|
||||||
|
"\"\"\"\n",
|
||||||
|
"\n",
|
||||||
|
"#Gradio blocks\n",
|
||||||
|
"with gr.Blocks(css=css, theme=\"gradio/monochrome\") as demo:\n",
|
||||||
|
" gr.Markdown(\n",
|
||||||
|
" \"\"\"\n",
|
||||||
|
" <h2 style=\"color: #f5f5f5;\">Personal Knowledge Worker</h2>\n",
|
||||||
|
" <p style=\"color: #f5f5f5;\">Chat with your auto-generated knowledge base (Claude-powered if available)</p>\n",
|
||||||
|
" \"\"\",\n",
|
||||||
|
" elem_id=\"title\"\n",
|
||||||
|
" )\n",
|
||||||
|
" gr.ChatInterface(chat, type=\"messages\")\n",
|
||||||
|
"\n",
|
||||||
|
"demo.launch(inbrowser=True)\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": ".venv",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.13.5"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 5
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user