diff --git a/week5/community-contributions/Cosmus_Week5_Exercise.ipynb b/week5/community-contributions/Cosmus_Week5_Exercise.ipynb new file mode 100644 index 0000000..ef3da6f --- /dev/null +++ b/week5/community-contributions/Cosmus_Week5_Exercise.ipynb @@ -0,0 +1,307 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "d04a7c55", + "metadata": {}, + "outputs": [], + "source": [ + "#Importing necessary libraries\n", + "import os\n", + "from dotenv import load_dotenv\n", + "from anthropic import Client\n", + "from dotenv import load_dotenv\n", + "import sys\n", + "from faker import Faker\n", + "import random\n", + "import gradio as gr\n", + "from langchain_community.document_loaders import DirectoryLoader, TextLoader\n", + "from langchain_text_splitters import CharacterTextSplitter\n", + "from langchain_community.embeddings import HuggingFaceEmbeddings\n", + "from langchain_community.vectorstores import Chroma\n", + "from langchain_anthropic import ChatAnthropic\n", + "from langchain_classic.memory import ConversationBufferMemory\n", + "from langchain_classic.chains import ConversationalRetrievalChain\n", + "\n", + "!{sys.executable} -m pip install faker\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3d7f8354", + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "# loading the .env variables\n", + "load_dotenv(override=True)\n", + "\n", + "# Force export to OS env so LangChain can detect it (had to try this because the key was not loading at some point but by the time i shared the code it loaded well so i commented it out)\n", + "#os.environ[\"ANTHROPIC_API_KEY\"] = os.getenv(\"ANTHROPIC_API_KEY\")\n", + "\n", + "#getting the key from the our .env file. It is Anthropic_API_KEY\n", + "ANTHROPIC_KEY = os.getenv(\"ANTHROPIC_API_KEY\")\n", + "client = Client(api_key=ANTHROPIC_KEY)\n", + "\n", + "# Checking the anthropic models list our anthropic key ca help us play with\n", + "models = client.models.list()\n", + "for model in models:\n", + " print(model.id)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "20d11d1c", + "metadata": {}, + "outputs": [], + "source": [ + "#Getting the python executable path on my notebook to know where to install the faker library\n", + "print(sys.executable)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "93a8f3ec", + "metadata": {}, + "outputs": [], + "source": [ + "#Creating a fake person with faker\n", + "fake = Faker()\n", + "base_dir = \"knowledge_base\"\n", + "folders = [\"personal\", \"projects\", \"learning\"]\n", + "\n", + "# We now create folders if they don't exist\n", + "for folder in folders:\n", + " os.makedirs(f\"{base_dir}/{folder}\", exist_ok=True)\n", + "\n", + "# Check if data already exists\n", + "personal_file = f\"{base_dir}/personal/info.md\"\n", + "projects_file = f\"{base_dir}/projects/projects.md\"\n", + "learning_file = f\"{base_dir}/learning/learning.md\"\n", + "\n", + "#If the personal info file does not exist, create it\n", + "if not os.path.exists(personal_file):\n", + " name = fake.name()\n", + " profession = random.choice([\"Data Analyst\", \"Business Analyst\", \"Software Engineer\", \"AI Specialist\"])\n", + " bio = fake.paragraph(nb_sentences=5)\n", + " experience = \"\\n\".join([f\"- {fake.job()} at {fake.company()} ({fake.year()})\" for _ in range(3)])\n", + " \n", + " personal_text = f\"\"\"\n", + "# Personal Profile\n", + "Name: {name} \n", + "Profession: {profession} \n", + "\n", + "Bio: {bio}\n", + "\n", + "## Experience\n", + "{experience}\n", + "\"\"\"\n", + " with open(personal_file, \"w\") as f:\n", + " f.write(personal_text)\n", + " print(\"Personal info generated.\")\n", + "else:\n", + " #If the personal info file exists, skip the regeneration\n", + " print(\"ℹPersonal info already exists. Skipping regeneration.\")\n", + "\n", + "#doing the same for project file\n", + "if not os.path.exists(projects_file):\n", + " projects = \"\\n\".join([\n", + " f\"- **{fake.catch_phrase()}** — {fake.bs().capitalize()} for {fake.company()}.\"\n", + " for _ in range(5)\n", + " ])\n", + " projects_text = f\"\"\"\n", + "# Projects Portfolio\n", + "\n", + "Key Projects:\n", + "{projects}\n", + "\"\"\"\n", + " with open(projects_file, \"w\") as f:\n", + " f.write(projects_text)\n", + " print(\"Projects generated.\")\n", + "else:\n", + " print(\"ℹProjects already exist. Skipping regeneration.\")\n", + "\n", + "#same thing for learning file\n", + "if not os.path.exists(learning_file):\n", + " topics = [\"LangChain\", \"RAG Systems\", \"Vector Databases\", \"AI Ethics\", \"Prompt Engineering\", \"Data Visualization\"]\n", + " learning = \"\\n\".join([\n", + " f\"- {random.choice(topics)} — {fake.sentence(nb_words=8)}\"\n", + " for _ in range(6)\n", + " ])\n", + " learning_text = f\"\"\"\n", + "# Learning Journey\n", + "\n", + "Recent Topics and Notes:\n", + "{learning}\n", + "\"\"\"\n", + " with open(learning_file, \"w\") as f:\n", + " f.write(learning_text)\n", + " print(\"Learning notes generated.\")\n", + "else:\n", + " print(\"ℹLearning notes already exist. Skipping regeneration.\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6fa19091", + "metadata": {}, + "outputs": [], + "source": [ + "#loading the knowledge information from the knowledge_base folder\n", + "loader = DirectoryLoader(\"knowledge_base\", glob=\"**/*.md\", loader_cls=TextLoader)\n", + "documents = loader.load()\n", + "\n", + "#Splitting the documents into chunks\n", + "splitter = CharacterTextSplitter(chunk_size=100, chunk_overlap=80)\n", + "chunks = splitter.split_documents(documents)\n", + "\n", + "print(f\"Loaded {len(documents)} documents and created {len(chunks)} chunks.\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "7b9fc9a5", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6dcdec41", + "metadata": {}, + "outputs": [], + "source": [ + "#Creating the embeddings\n", + "embeddings = HuggingFaceEmbeddings(model_name=\"sentence-transformers/all-MiniLM-L6-v2\")\n", + "\n", + "# Chroma as the vector store\n", + "vectorstore = Chroma.from_documents(chunks, embeddings, persist_directory=\"chroma_db\")\n", + "vectorstore.persist()\n", + "\n", + "print(\"Vector store created and saved to 'chroma_db'.\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "99e4a99f", + "metadata": {}, + "outputs": [], + "source": [ + "#Check Langchain version as they updated the version recently thus making it difficult to use it successfullt\n", + "print(langchain.__version__)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5dc1b6ce", + "metadata": {}, + "outputs": [], + "source": [ + "# The main Langchain Abstraction are: Memory, LLM, and Retriever\n", + "\n", + "# Memory for conversation history\n", + "memory = ConversationBufferMemory(\n", + " memory_key=\"chat_history\",\n", + " return_messages=True\n", + ")\n", + "\n", + "# Using one of the Anthropic models from the list above to create the LLM\n", + "llm = ChatAnthropic(\n", + " model=\"claude-sonnet-4-5-20250929\",\n", + " temperature=0.6,\n", + " max_tokens=1024,\n", + " anthropic_api_key=ANTHROPIC_KEY\n", + ")\n", + "\n", + "# Retriever from your vectorstore\n", + "retriever = vectorstore.as_retriever(search_kwargs={\"k\": 3})\n", + "\n", + "# Bringing everything together tConversational RAG Chain\n", + "conversation_chain = ConversationalRetrievalChain.from_llm(\n", + " llm=llm,\n", + " retriever=retriever,\n", + " memory=memory\n", + ")\n", + "\n", + "print(\"Anthropic conversational retriever is ready!\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6f93eea7", + "metadata": {}, + "outputs": [], + "source": [ + "#fnc to create a chat interface\n", + "def chat(message, history):\n", + " if conversation_chain:\n", + " result = conversation_chain.invoke({\"question\": message})\n", + " return result[\"answer\"]\n", + " else:\n", + " # Retrieval-only fallback\n", + " docs = retriever.get_relevant_documents(message)\n", + " context = \"\\n\\n\".join([d.page_content for d in docs])\n", + " return f\"(Offline Mode)\\nTop relevant info:\\n\\n{context[:1000]}\"\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "aadf91b4", + "metadata": {}, + "outputs": [], + "source": [ + "#used som css to make the chat interface look better, and dark mode. I love dark mode btw\n", + "css = \"\"\"\n", + "body {background-color: #0f1117; color: #e6e6e6;}\n", + ".gradio-container {background-color: #0f1117 !important;}\n", + "textarea, input, .wrap.svelte-1ipelgc {background-color: #1b1f2a !important; color: #ffffff !important;}\n", + "\"\"\"\n", + "\n", + "#Gradio blocks\n", + "with gr.Blocks(css=css, theme=\"gradio/monochrome\") as demo:\n", + " gr.Markdown(\n", + " \"\"\"\n", + "
Chat with your auto-generated knowledge base (Claude-powered if available)
\n", + " \"\"\",\n", + " elem_id=\"title\"\n", + " )\n", + " gr.ChatInterface(chat, type=\"messages\")\n", + "\n", + "demo.launch(inbrowser=True)\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.5" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}