{ "cells": [ { "cell_type": "code", "execution_count": null, "id": "d04a7c55", "metadata": {}, "outputs": [], "source": [ "#Importing necessary libraries\n", "import os\n", "from dotenv import load_dotenv\n", "from anthropic import Client\n", "from dotenv import load_dotenv\n", "import sys\n", "from faker import Faker\n", "import random\n", "import gradio as gr\n", "from langchain_community.document_loaders import DirectoryLoader, TextLoader\n", "from langchain_text_splitters import CharacterTextSplitter\n", "from langchain_community.embeddings import HuggingFaceEmbeddings\n", "from langchain_community.vectorstores import Chroma\n", "from langchain_anthropic import ChatAnthropic\n", "from langchain_classic.memory import ConversationBufferMemory\n", "from langchain_classic.chains import ConversationalRetrievalChain\n", "\n", "!{sys.executable} -m pip install faker\n", "\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "id": "3d7f8354", "metadata": {}, "outputs": [], "source": [ "\n", "# loading the .env variables\n", "load_dotenv(override=True)\n", "\n", "# Force export to OS env so LangChain can detect it (had to try this because the key was not loading at some point but by the time i shared the code it loaded well so i commented it out)\n", "#os.environ[\"ANTHROPIC_API_KEY\"] = os.getenv(\"ANTHROPIC_API_KEY\")\n", "\n", "#getting the key from the our .env file. It is Anthropic_API_KEY\n", "ANTHROPIC_KEY = os.getenv(\"ANTHROPIC_API_KEY\")\n", "client = Client(api_key=ANTHROPIC_KEY)\n", "\n", "# Checking the anthropic models list our anthropic key ca help us play with\n", "models = client.models.list()\n", "for model in models:\n", " print(model.id)\n" ] }, { "cell_type": "code", "execution_count": null, "id": "20d11d1c", "metadata": {}, "outputs": [], "source": [ "#Getting the python executable path on my notebook to know where to install the faker library\n", "print(sys.executable)" ] }, { "cell_type": "code", "execution_count": null, "id": "93a8f3ec", "metadata": {}, "outputs": [], "source": [ "#Creating a fake person with faker\n", "fake = Faker()\n", "base_dir = \"knowledge_base\"\n", "folders = [\"personal\", \"projects\", \"learning\"]\n", "\n", "# We now create folders if they don't exist\n", "for folder in folders:\n", " os.makedirs(f\"{base_dir}/{folder}\", exist_ok=True)\n", "\n", "# Check if data already exists\n", "personal_file = f\"{base_dir}/personal/info.md\"\n", "projects_file = f\"{base_dir}/projects/projects.md\"\n", "learning_file = f\"{base_dir}/learning/learning.md\"\n", "\n", "#If the personal info file does not exist, create it\n", "if not os.path.exists(personal_file):\n", " name = fake.name()\n", " profession = random.choice([\"Data Analyst\", \"Business Analyst\", \"Software Engineer\", \"AI Specialist\"])\n", " bio = fake.paragraph(nb_sentences=5)\n", " experience = \"\\n\".join([f\"- {fake.job()} at {fake.company()} ({fake.year()})\" for _ in range(3)])\n", " \n", " personal_text = f\"\"\"\n", "# Personal Profile\n", "Name: {name} \n", "Profession: {profession} \n", "\n", "Bio: {bio}\n", "\n", "## Experience\n", "{experience}\n", "\"\"\"\n", " with open(personal_file, \"w\") as f:\n", " f.write(personal_text)\n", " print(\"Personal info generated.\")\n", "else:\n", " #If the personal info file exists, skip the regeneration\n", " print(\"ℹPersonal info already exists. Skipping regeneration.\")\n", "\n", "#doing the same for project file\n", "if not os.path.exists(projects_file):\n", " projects = \"\\n\".join([\n", " f\"- **{fake.catch_phrase()}** — {fake.bs().capitalize()} for {fake.company()}.\"\n", " for _ in range(5)\n", " ])\n", " projects_text = f\"\"\"\n", "# Projects Portfolio\n", "\n", "Key Projects:\n", "{projects}\n", "\"\"\"\n", " with open(projects_file, \"w\") as f:\n", " f.write(projects_text)\n", " print(\"Projects generated.\")\n", "else:\n", " print(\"ℹProjects already exist. Skipping regeneration.\")\n", "\n", "#same thing for learning file\n", "if not os.path.exists(learning_file):\n", " topics = [\"LangChain\", \"RAG Systems\", \"Vector Databases\", \"AI Ethics\", \"Prompt Engineering\", \"Data Visualization\"]\n", " learning = \"\\n\".join([\n", " f\"- {random.choice(topics)} — {fake.sentence(nb_words=8)}\"\n", " for _ in range(6)\n", " ])\n", " learning_text = f\"\"\"\n", "# Learning Journey\n", "\n", "Recent Topics and Notes:\n", "{learning}\n", "\"\"\"\n", " with open(learning_file, \"w\") as f:\n", " f.write(learning_text)\n", " print(\"Learning notes generated.\")\n", "else:\n", " print(\"ℹLearning notes already exist. Skipping regeneration.\")\n" ] }, { "cell_type": "code", "execution_count": null, "id": "6fa19091", "metadata": {}, "outputs": [], "source": [ "#loading the knowledge information from the knowledge_base folder\n", "loader = DirectoryLoader(\"knowledge_base\", glob=\"**/*.md\", loader_cls=TextLoader)\n", "documents = loader.load()\n", "\n", "#Splitting the documents into chunks\n", "splitter = CharacterTextSplitter(chunk_size=100, chunk_overlap=80)\n", "chunks = splitter.split_documents(documents)\n", "\n", "print(f\"Loaded {len(documents)} documents and created {len(chunks)} chunks.\")\n" ] }, { "cell_type": "markdown", "id": "7b9fc9a5", "metadata": {}, "source": [] }, { "cell_type": "code", "execution_count": null, "id": "6dcdec41", "metadata": {}, "outputs": [], "source": [ "#Creating the embeddings\n", "embeddings = HuggingFaceEmbeddings(model_name=\"sentence-transformers/all-MiniLM-L6-v2\")\n", "\n", "# Chroma as the vector store\n", "vectorstore = Chroma.from_documents(chunks, embeddings, persist_directory=\"chroma_db\")\n", "vectorstore.persist()\n", "\n", "print(\"Vector store created and saved to 'chroma_db'.\")\n" ] }, { "cell_type": "code", "execution_count": null, "id": "99e4a99f", "metadata": {}, "outputs": [], "source": [ "#Check Langchain version as they updated the version recently thus making it difficult to use it successfullt\n", "print(langchain.__version__)\n" ] }, { "cell_type": "code", "execution_count": null, "id": "5dc1b6ce", "metadata": {}, "outputs": [], "source": [ "# The main Langchain Abstraction are: Memory, LLM, and Retriever\n", "\n", "# Memory for conversation history\n", "memory = ConversationBufferMemory(\n", " memory_key=\"chat_history\",\n", " return_messages=True\n", ")\n", "\n", "# Using one of the Anthropic models from the list above to create the LLM\n", "llm = ChatAnthropic(\n", " model=\"claude-sonnet-4-5-20250929\",\n", " temperature=0.6,\n", " max_tokens=1024,\n", " anthropic_api_key=ANTHROPIC_KEY\n", ")\n", "\n", "# Retriever from your vectorstore\n", "retriever = vectorstore.as_retriever(search_kwargs={\"k\": 3})\n", "\n", "# Bringing everything together tConversational RAG Chain\n", "conversation_chain = ConversationalRetrievalChain.from_llm(\n", " llm=llm,\n", " retriever=retriever,\n", " memory=memory\n", ")\n", "\n", "print(\"Anthropic conversational retriever is ready!\")\n" ] }, { "cell_type": "code", "execution_count": null, "id": "6f93eea7", "metadata": {}, "outputs": [], "source": [ "#fnc to create a chat interface\n", "def chat(message, history):\n", " if conversation_chain:\n", " result = conversation_chain.invoke({\"question\": message})\n", " return result[\"answer\"]\n", " else:\n", " # Retrieval-only fallback\n", " docs = retriever.get_relevant_documents(message)\n", " context = \"\\n\\n\".join([d.page_content for d in docs])\n", " return f\"(Offline Mode)\\nTop relevant info:\\n\\n{context[:1000]}\"\n" ] }, { "cell_type": "code", "execution_count": null, "id": "aadf91b4", "metadata": {}, "outputs": [], "source": [ "#used som css to make the chat interface look better, and dark mode. I love dark mode btw\n", "css = \"\"\"\n", "body {background-color: #0f1117; color: #e6e6e6;}\n", ".gradio-container {background-color: #0f1117 !important;}\n", "textarea, input, .wrap.svelte-1ipelgc {background-color: #1b1f2a !important; color: #ffffff !important;}\n", "\"\"\"\n", "\n", "#Gradio blocks\n", "with gr.Blocks(css=css, theme=\"gradio/monochrome\") as demo:\n", " gr.Markdown(\n", " \"\"\"\n", "
Chat with your auto-generated knowledge base (Claude-powered if available)
\n", " \"\"\",\n", " elem_id=\"title\"\n", " )\n", " gr.ChatInterface(chat, type=\"messages\")\n", "\n", "demo.launch(inbrowser=True)\n" ] } ], "metadata": { "kernelspec": { "display_name": ".venv", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.13.5" } }, "nbformat": 4, "nbformat_minor": 5 }