Add Week 5 submission for muhammad_qasim_sheikh
This commit is contained in:
@@ -0,0 +1,178 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "7015d967",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"from dotenv import load_dotenv\n",
|
||||
"from langchain.chat_models import ChatOpenAI\n",
|
||||
"from langchain.chains import ConversationalRetrievalChain\n",
|
||||
"from langchain.memory import ConversationBufferMemory\n",
|
||||
"from langchain.vectorstores import Chroma\n",
|
||||
"from langchain.embeddings import OpenAIEmbeddings\n",
|
||||
"from langchain.document_loaders import DirectoryLoader, TextLoader\n",
|
||||
"from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
|
||||
"from langchain_experimental.text_splitter import SemanticChunker\n",
|
||||
"from langchain.schema import Document\n",
|
||||
"import gradio as gr\n",
|
||||
"import glob"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "87646db6",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"load_dotenv(override=True)\n",
|
||||
"api_key = os.getenv('OPENAI_API_KEY')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "1019e3b8",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"file_paths = glob.glob(\"knowledge_base/**/*.md\", recursive=True)\n",
|
||||
"\n",
|
||||
"documents = []\n",
|
||||
"for path in file_paths:\n",
|
||||
" with open(path, \"r\", encoding=\"utf-8\") as f:\n",
|
||||
" text = f.read()\n",
|
||||
" doc_type = os.path.basename(os.path.dirname(path)) \n",
|
||||
"\n",
|
||||
" documents.append(\n",
|
||||
" Document(\n",
|
||||
" page_content=text,\n",
|
||||
" metadata={\n",
|
||||
" \"doc_type\": doc_type,\n",
|
||||
" },\n",
|
||||
" )\n",
|
||||
" )"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "54527a21",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"embeddings = OpenAIEmbeddings(model=\"text-embedding-3-small\", openai_api_key=api_key)\n",
|
||||
"\n",
|
||||
"# text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)\n",
|
||||
"text_splitter = SemanticChunker(embeddings)\n",
|
||||
"chunks = text_splitter.split_documents(documents)\n",
|
||||
"\n",
|
||||
"print(f\"Total number of chunks: {len(chunks)}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "15579dda",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"vectorstore = Chroma.from_documents(\n",
|
||||
" documents=chunks,\n",
|
||||
" embedding=embeddings,\n",
|
||||
" persist_directory=\"chroma_db\"\n",
|
||||
")\n",
|
||||
"vectorstore.persist()\n",
|
||||
"print(\"Chroma vector store built.\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "ca3b4d55",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"llm = ChatOpenAI(model=\"gpt-4o-mini\", temperature=0, openai_api_key=api_key)\n",
|
||||
"memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)\n",
|
||||
"retriever = vectorstore.as_retriever()\n",
|
||||
"conversation_chain = ConversationalRetrievalChain.from_llm(\n",
|
||||
" llm=llm,\n",
|
||||
" retriever=retriever,\n",
|
||||
" memory=memory,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "94b3a75a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"query = \"Tell me about Langchain.\"\n",
|
||||
"result = conversation_chain({\"question\": query})\n",
|
||||
"\n",
|
||||
"print(\"Answer:\")\n",
|
||||
"print(result[\"answer\"])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "e814f910",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def rag_chat(query, history):\n",
|
||||
" response = conversation_chain({\"question\": query})\n",
|
||||
" answer = response[\"answer\"]\n",
|
||||
" return answer\n",
|
||||
"\n",
|
||||
"with gr.Blocks(theme=gr.themes.Soft()) as rag_ui:\n",
|
||||
" gr.Markdown(\"# RAG Chat Assistant\")\n",
|
||||
" gr.Markdown(\"Ask questions about your Markdown knowledge base.\")\n",
|
||||
" chat_box = gr.ChatInterface(\n",
|
||||
" fn=rag_chat,\n",
|
||||
" title=\"RAG Knowledge Base Assistant\",\n",
|
||||
" description=\"Chat with your Markdown-based knowledge base using RAG.\"\n",
|
||||
" )"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "eef8d2ee",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"rag_ui.launch(debug=True, share=True)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "llm-engineering",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.12.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
Reference in New Issue
Block a user