fixed memory method of langchain

2025-10-24 12:14:48 +05:30
parent fe122c223d
commit 54d717fbd2
1 changed files with 57 additions and 43 deletions
--- a/week5/community-contributions/bharat_puri/files_based_knowledge_base.ipynb
+++ b/week5/community-contributions/bharat_puri/files_based_knowledge_base.ipynb
@@ -44,17 +44,30 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": null,
   "id": "78743444-cae7-4fad-bf66-dcfabbe73335",
-   "metadata": {},
+   "metadata": {
+    "scrolled": true
+   },
   "outputs": [],
   "source": [
-    "!pip install -U -q imapclient langchain langchain-openai langchain-chroma langchain-community langchain-core langchain-text-splitters langchain-huggingface chromadb sentence-transformers"
+    "# !pip install -U imapclient langchain langchain-openai langchain-chroma langchain-community langchain-core langchain-text-splitters langchain-huggingface chromadb sentence-transformers"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": null,
+   "id": "71924170-e73a-4e98-a34a-c5c0567f39da",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "## Install specific version of langchain to avoid future issues\n",
+    "!pip install -U -q imapclient langchain==1.0.2 langchain-openai==1.0.1 langchain-chroma==1.0.0 langchain-community==0.4 langchain-core==1.0.0 langchain-text-splitters==1.0.0 langchain-huggingface==1.0.0 langchain-classic==1.0.0 chromadb==1.2.1 sentence-transformers==5.1.2"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 41,
   "id": "802137aa-8a74-45e0-a487-d1974927d7ca",
   "metadata": {},
   "outputs": [],
@@ -68,7 +81,10 @@
    "from langchain_chroma import Chroma\n",
    "from langchain_huggingface import HuggingFaceEmbeddings\n",
    "from langchain_core.callbacks import StdOutCallbackHandler\n",
-    "from langchain_community.document_loaders import DirectoryLoader, TextLoader\n"
+    "from langchain_community.document_loaders import DirectoryLoader, TextLoader\n",
+    "from langchain_classic.memory import ConversationBufferMemory\n",
+    "from langchain_classic.chains import ConversationalRetrievalChain\n",
+    "\n"
   ]
  },
  {
@@ -99,7 +115,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 9,
   "id": "730711a9-6ffe-4eee-8f48-d6cfb7314905",
   "metadata": {},
   "outputs": [],
@@ -126,18 +142,12 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": null,
   "id": "7310c9c8-03c1-4efc-a104-5e89aec6db1a",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Created a chunk of size 1088, which is longer than the specified 1000\n"
-     ]
-    }
-   ],
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [],
   "source": [
    "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=200)\n",
    "chunks = text_splitter.split_documents(documents)"
@@ -145,28 +155,17 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": null,
   "id": "cd06e02f-6d9b-44cc-a43d-e1faa8acc7bb",
   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "123"
-      ]
-     },
-     "execution_count": 14,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
   "source": [
    "len(chunks)"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": 11,
   "id": "2c54b4b6-06da-463d-bee7-4dd456c2b887",
   "metadata": {},
   "outputs": [
@@ -174,7 +173,7 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "Document types found: company, employees, contracts, products\n"
+      "Document types found: employees, products, contracts, company\n"
     ]
    }
   ],
@@ -208,13 +207,15 @@
   "cell_type": "code",
   "execution_count": null,
   "id": "78998399-ac17-4e28-b15f-0b5f51e6ee23",
-   "metadata": {},
+   "metadata": {
+    "scrolled": true
+   },
   "outputs": [],
   "source": [
    "# Put the chunks of data into a Vector Store that associates a Vector Embedding with each chunk\n",
    "# Chroma is a popular open source Vector Database based on SQLLite\n",
    "\n",
-    "# embeddings = OpenAIEmbeddings()\n",
+    "embeddings = OpenAIEmbeddings()\n",
    "\n",
    "# If you would rather use the free Vector Embeddings from HuggingFace sentence-transformers\n",
    "# Then replace embeddings = OpenAIEmbeddings()\n",
@@ -222,7 +223,8 @@
    "# from langchain.embeddings import HuggingFaceEmbeddings\n",
    "# embeddings = HuggingFaceEmbeddings(model_name=\"sentence-transformers/all-MiniLM-L6-v2\",show_progress=False  # you can set this False to hide the download bar)\n",
    "\n",
-    "embeddings = HuggingFaceEmbeddings(model_name=\"sentence-transformers/all-MiniLM-L6-v2\")\n",
+    "# embeddings = HuggingFaceEmbeddings(\n",
+    "#         model_name=\"sentence-transformers/all-MiniLM-L6-v2\")\n",
    "                                   \n",
    "# Delete if already exists\n",
    "\n",
@@ -239,7 +241,9 @@
   "cell_type": "code",
   "execution_count": null,
   "id": "057868f6-51a6-4087-94d1-380145821550",
-   "metadata": {},
+   "metadata": {
+    "scrolled": true
+   },
   "outputs": [],
   "source": [
    "# Get one vector and find how many dimensions it has\n",
@@ -262,12 +266,13 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 15,
   "id": "b98adf5e-d464-4bd2-9bdf-bc5b6770263b",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Prework\n",
+    "import numpy as np\n",
    "\n",
    "result = collection.get(include=['embeddings', 'documents', 'metadatas'])\n",
    "vectors = np.array(result['embeddings'])\n",
@@ -287,6 +292,9 @@
    "# Reduce the dimensionality of the vectors to 2D using t-SNE\n",
    "# (t-distributed stochastic neighbor embedding)\n",
    "\n",
+    "from sklearn.manifold import TSNE\n",
+    "import plotly.graph_objects as go\n",
+    "\n",
    "tsne = TSNE(n_components=2, random_state=42)\n",
    "reduced_vectors = tsne.fit_transform(vectors)\n",
    "\n",
@@ -315,7 +323,9 @@
   "cell_type": "code",
   "execution_count": null,
   "id": "e1418e88-acd5-460a-bf2b-4e6efc88e3dd",
-   "metadata": {},
+   "metadata": {
+    "scrolled": true
+   },
   "outputs": [],
   "source": [
    "# Let's try 3D!\n",
@@ -396,7 +406,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 35,
   "id": "129c7d1e-0094-4479-9459-f9360b95f244",
   "metadata": {},
   "outputs": [],
@@ -418,7 +428,9 @@
   "cell_type": "code",
   "execution_count": null,
   "id": "968e7bf2-e862-4679-a11f-6c1efb6ec8ca",
-   "metadata": {},
+   "metadata": {
+    "scrolled": true
+   },
   "outputs": [],
   "source": [
    "query = \"Can you describe Insurellm in a few sentences\"\n",
@@ -428,7 +440,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 37,
   "id": "e6eb99fb-33ec-4025-ab92-b634ede03647",
   "metadata": {},
   "outputs": [],
@@ -452,7 +464,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 38,
   "id": "c3536590-85c7-4155-bd87-ae78a1467670",
   "metadata": {},
   "outputs": [],
@@ -468,7 +480,9 @@
   "cell_type": "code",
   "execution_count": null,
   "id": "b252d8c1-61a8-406d-b57a-8f708a62b014",
-   "metadata": {},
+   "metadata": {
+    "scrolled": true
+   },
   "outputs": [],
   "source": [
    "# And in Gradio:\n",