fixed memory method of langchain

This commit is contained in:
Bharat Puri
2025-10-24 12:14:48 +05:30
parent fe122c223d
commit 54d717fbd2

View File

@@ -44,17 +44,30 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": null,
"id": "78743444-cae7-4fad-bf66-dcfabbe73335",
"metadata": {},
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"!pip install -U -q imapclient langchain langchain-openai langchain-chroma langchain-community langchain-core langchain-text-splitters langchain-huggingface chromadb sentence-transformers"
"# !pip install -U imapclient langchain langchain-openai langchain-chroma langchain-community langchain-core langchain-text-splitters langchain-huggingface chromadb sentence-transformers"
]
},
{
"cell_type": "code",
"execution_count": 11,
"execution_count": null,
"id": "71924170-e73a-4e98-a34a-c5c0567f39da",
"metadata": {},
"outputs": [],
"source": [
"## Install specific version of langchain to avoid future issues\n",
"!pip install -U -q imapclient langchain==1.0.2 langchain-openai==1.0.1 langchain-chroma==1.0.0 langchain-community==0.4 langchain-core==1.0.0 langchain-text-splitters==1.0.0 langchain-huggingface==1.0.0 langchain-classic==1.0.0 chromadb==1.2.1 sentence-transformers==5.1.2"
]
},
{
"cell_type": "code",
"execution_count": 41,
"id": "802137aa-8a74-45e0-a487-d1974927d7ca",
"metadata": {},
"outputs": [],
@@ -68,7 +81,10 @@
"from langchain_chroma import Chroma\n",
"from langchain_huggingface import HuggingFaceEmbeddings\n",
"from langchain_core.callbacks import StdOutCallbackHandler\n",
"from langchain_community.document_loaders import DirectoryLoader, TextLoader\n"
"from langchain_community.document_loaders import DirectoryLoader, TextLoader\n",
"from langchain_classic.memory import ConversationBufferMemory\n",
"from langchain_classic.chains import ConversationalRetrievalChain\n",
"\n"
]
},
{
@@ -99,7 +115,7 @@
},
{
"cell_type": "code",
"execution_count": 12,
"execution_count": 9,
"id": "730711a9-6ffe-4eee-8f48-d6cfb7314905",
"metadata": {},
"outputs": [],
@@ -126,18 +142,12 @@
},
{
"cell_type": "code",
"execution_count": 13,
"execution_count": null,
"id": "7310c9c8-03c1-4efc-a104-5e89aec6db1a",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Created a chunk of size 1088, which is longer than the specified 1000\n"
]
}
],
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=200)\n",
"chunks = text_splitter.split_documents(documents)"
@@ -145,28 +155,17 @@
},
{
"cell_type": "code",
"execution_count": 14,
"execution_count": null,
"id": "cd06e02f-6d9b-44cc-a43d-e1faa8acc7bb",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"123"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"outputs": [],
"source": [
"len(chunks)"
]
},
{
"cell_type": "code",
"execution_count": 15,
"execution_count": 11,
"id": "2c54b4b6-06da-463d-bee7-4dd456c2b887",
"metadata": {},
"outputs": [
@@ -174,7 +173,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"Document types found: company, employees, contracts, products\n"
"Document types found: employees, products, contracts, company\n"
]
}
],
@@ -208,13 +207,15 @@
"cell_type": "code",
"execution_count": null,
"id": "78998399-ac17-4e28-b15f-0b5f51e6ee23",
"metadata": {},
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"# Put the chunks of data into a Vector Store that associates a Vector Embedding with each chunk\n",
"# Chroma is a popular open source Vector Database based on SQLLite\n",
"\n",
"# embeddings = OpenAIEmbeddings()\n",
"embeddings = OpenAIEmbeddings()\n",
"\n",
"# If you would rather use the free Vector Embeddings from HuggingFace sentence-transformers\n",
"# Then replace embeddings = OpenAIEmbeddings()\n",
@@ -222,7 +223,8 @@
"# from langchain.embeddings import HuggingFaceEmbeddings\n",
"# embeddings = HuggingFaceEmbeddings(model_name=\"sentence-transformers/all-MiniLM-L6-v2\",show_progress=False # you can set this False to hide the download bar)\n",
"\n",
"embeddings = HuggingFaceEmbeddings(model_name=\"sentence-transformers/all-MiniLM-L6-v2\")\n",
"# embeddings = HuggingFaceEmbeddings(\n",
"# model_name=\"sentence-transformers/all-MiniLM-L6-v2\")\n",
" \n",
"# Delete if already exists\n",
"\n",
@@ -239,7 +241,9 @@
"cell_type": "code",
"execution_count": null,
"id": "057868f6-51a6-4087-94d1-380145821550",
"metadata": {},
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"# Get one vector and find how many dimensions it has\n",
@@ -262,12 +266,13 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 15,
"id": "b98adf5e-d464-4bd2-9bdf-bc5b6770263b",
"metadata": {},
"outputs": [],
"source": [
"# Prework\n",
"import numpy as np\n",
"\n",
"result = collection.get(include=['embeddings', 'documents', 'metadatas'])\n",
"vectors = np.array(result['embeddings'])\n",
@@ -287,6 +292,9 @@
"# Reduce the dimensionality of the vectors to 2D using t-SNE\n",
"# (t-distributed stochastic neighbor embedding)\n",
"\n",
"from sklearn.manifold import TSNE\n",
"import plotly.graph_objects as go\n",
"\n",
"tsne = TSNE(n_components=2, random_state=42)\n",
"reduced_vectors = tsne.fit_transform(vectors)\n",
"\n",
@@ -315,7 +323,9 @@
"cell_type": "code",
"execution_count": null,
"id": "e1418e88-acd5-460a-bf2b-4e6efc88e3dd",
"metadata": {},
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"# Let's try 3D!\n",
@@ -396,7 +406,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 35,
"id": "129c7d1e-0094-4479-9459-f9360b95f244",
"metadata": {},
"outputs": [],
@@ -418,7 +428,9 @@
"cell_type": "code",
"execution_count": null,
"id": "968e7bf2-e862-4679-a11f-6c1efb6ec8ca",
"metadata": {},
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"query = \"Can you describe Insurellm in a few sentences\"\n",
@@ -428,7 +440,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 37,
"id": "e6eb99fb-33ec-4025-ab92-b634ede03647",
"metadata": {},
"outputs": [],
@@ -452,7 +464,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 38,
"id": "c3536590-85c7-4155-bd87-ae78a1467670",
"metadata": {},
"outputs": [],
@@ -468,7 +480,9 @@
"cell_type": "code",
"execution_count": null,
"id": "b252d8c1-61a8-406d-b57a-8f708a62b014",
"metadata": {},
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"# And in Gradio:\n",