Some updates for the latest Chroma version
This commit is contained in:
@@ -188,7 +188,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.11"
|
||||
"version": "3.11.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -358,7 +358,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.11"
|
||||
"version": "3.11.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -24,7 +24,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 1,
|
||||
"id": "993a2a24-1a58-42be-8034-6d116fb8d786",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -51,7 +51,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 2,
|
||||
"id": "2359ccc0-dbf2-4b1e-9473-e472b32f548b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -66,10 +66,18 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 3,
|
||||
"id": "645167e6-cf0d-42d2-949f-1089a25a2841",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Log in to HuggingFace\n",
|
||||
"\n",
|
||||
@@ -93,7 +101,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 4,
|
||||
"id": "688bd995-ec3e-43cd-8179-7fe14b275877",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -106,10 +114,21 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 5,
|
||||
"id": "2817eaf5-4302-4a18-9148-d1062e3b3dbb",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'How much does this cost to the nearest dollar?\\n\\nDelphi FG0166 Fuel Pump Module\\nDelphi brings 80 years of OE Heritage into each Delphi pump, ensuring quality and fitment for each Delphi part. Part is validated, tested and matched to the right vehicle application Delphi brings 80 years of OE Heritage into each Delphi assembly, ensuring quality and fitment for each Delphi part Always be sure to check and clean fuel tank to avoid unnecessary returns Rigorous OE-testing ensures the pump can withstand extreme temperatures Brand Delphi, Fit Type Vehicle Specific Fit, Dimensions LxWxH 19.7 x 7.7 x 5.1 inches, Weight 2.2 Pounds, Auto Part Position Unknown, Operation Mode Mechanical, Manufacturer Delphi, Model FUEL PUMP, Dimensions 19.7\\n\\nPrice is $227.00'"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"train[0].prompt"
|
||||
]
|
||||
@@ -129,12 +148,13 @@
|
||||
"\n",
|
||||
"Special note: if Chroma crashes and you're a Windows user, you should try rolling back to an earlier version of the Chroma library with: \n",
|
||||
"`!pip install chromadb==0.5.0` \n",
|
||||
"With many thanks to student Kelly Z. for finding this out and pointing to the GitHub issue [here](https://github.com/chroma-core/chroma/issues/2513). "
|
||||
"With many thanks to student Kelly Z. for finding this out and pointing to the GitHub issue [here](https://github.com/chroma-core/chroma/issues/2513). \n",
|
||||
"But try first without reverting Chroma."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 6,
|
||||
"id": "f4aab95e-d719-4476-b6e7-e248120df25a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -144,14 +164,26 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 19,
|
||||
"id": "5f95dafd-ab80-464e-ba8a-dec7a2424780",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Deleted existing collection: products\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Check if the collection exists and delete it if it does\n",
|
||||
"collection_name = \"products\"\n",
|
||||
"existing_collection_names = [collection.name for collection in client.list_collections()]\n",
|
||||
"\n",
|
||||
"# For old versions of Chroma, use this line instead of the subsequent one\n",
|
||||
"# existing_collection_names = [collection.name for collection in client.list_collections()]\n",
|
||||
"existing_collection_names = client.list_collections()\n",
|
||||
"\n",
|
||||
"if collection_name in existing_collection_names:\n",
|
||||
" client.delete_collection(collection_name)\n",
|
||||
" print(f\"Deleted existing collection: {collection_name}\")\n",
|
||||
@@ -179,7 +211,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 9,
|
||||
"id": "a87db200-d19d-44bf-acbd-15c45c70f5c9",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -189,7 +221,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 10,
|
||||
"id": "9b23a025-4c35-4d3a-96ad-b956cad37b0a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -201,17 +233,124 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 11,
|
||||
"id": "8adde63f-e732-4f7c-bba9-f8b2a469f14e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"array([-9.46715921e-02, 4.27619480e-02, 5.51620498e-02, -5.10962738e-04,\n",
|
||||
" 1.16203260e-02, -6.80130497e-02, 2.76405811e-02, 6.06974475e-02,\n",
|
||||
" 2.88530309e-02, -1.74128171e-02, -4.94346656e-02, 2.30993368e-02,\n",
|
||||
" -1.28614372e-02, -4.31402922e-02, 2.17510257e-02, 4.26548645e-02,\n",
|
||||
" 5.10499887e-02, -7.79727027e-02, -1.23247243e-01, 3.67455557e-02,\n",
|
||||
" 4.54110606e-03, 9.47937742e-02, -5.53098507e-02, 1.70641206e-02,\n",
|
||||
" -2.92873308e-02, -4.47124951e-02, 2.06784457e-02, 6.39320165e-02,\n",
|
||||
" 2.27427781e-02, 4.87789772e-02, -2.33503035e-03, 4.72859442e-02,\n",
|
||||
" -2.86258962e-02, 2.30624825e-02, 2.45130397e-02, 3.95681970e-02,\n",
|
||||
" -4.33176197e-02, -1.02316625e-01, 2.79874774e-03, 2.39304882e-02,\n",
|
||||
" 1.61556154e-02, -8.99078418e-03, 2.07255688e-02, 6.40123338e-02,\n",
|
||||
" 6.89179525e-02, -6.98360875e-02, 2.89764395e-03, -8.10988992e-02,\n",
|
||||
" 1.71123203e-02, 2.50653620e-03, -1.06529057e-01, -4.87733148e-02,\n",
|
||||
" -1.67762041e-02, -2.28662305e-02, 1.14816584e-01, 4.87413220e-02,\n",
|
||||
" -1.64962150e-02, -6.90832064e-02, 1.13612078e-01, -7.18485564e-02,\n",
|
||||
" -9.01571065e-02, 3.91712456e-06, -8.66769403e-02, -4.05916385e-02,\n",
|
||||
" 3.71317938e-02, -1.77618619e-02, -5.57464026e-02, -4.57097329e-02,\n",
|
||||
" -5.43141440e-02, -4.00611758e-02, -4.60227989e-02, 2.82194205e-02,\n",
|
||||
" -2.33606398e-02, 1.44406883e-02, -1.52777461e-02, -4.34328429e-02,\n",
|
||||
" 6.81274384e-02, 8.21894556e-02, 7.83890672e-03, -2.85973065e-02,\n",
|
||||
" 6.14309646e-02, -4.92684618e-02, 3.27055180e-03, -2.72683166e-02,\n",
|
||||
" -4.59346883e-02, -2.60167848e-02, 5.70650175e-02, -5.86463953e-04,\n",
|
||||
" -4.37076613e-02, -2.47807417e-04, -8.73549953e-02, 3.48776393e-02,\n",
|
||||
" 1.39327236e-02, -1.60043724e-02, 2.86958776e-02, -9.48595777e-02,\n",
|
||||
" 9.40612778e-02, 5.92685342e-02, -8.65014344e-02, 1.45011380e-01,\n",
|
||||
" 2.35388409e-02, 3.43324952e-02, 2.51516962e-04, 7.15541244e-02,\n",
|
||||
" -3.12182512e-02, 3.86665650e-02, -2.47745048e-02, 6.52674213e-02,\n",
|
||||
" -8.28817189e-02, -2.80247182e-02, 9.34544671e-03, -7.85543211e-03,\n",
|
||||
" 5.30728251e-02, 2.96895411e-02, 3.27329561e-02, 4.94737104e-02,\n",
|
||||
" 2.52208579e-02, 4.67068404e-02, 5.03403395e-02, -7.23745152e-02,\n",
|
||||
" 2.54435297e-02, -3.67216468e-02, 1.27570340e-02, 1.46063734e-02,\n",
|
||||
" 2.11492609e-02, -5.56909367e-02, -9.18510836e-03, -5.63397912e-34,\n",
|
||||
" 9.58769619e-02, 5.94578013e-02, 5.11445254e-02, 3.36360112e-02,\n",
|
||||
" -1.34414928e-02, -2.77202837e-02, -3.48436125e-02, 1.80352535e-02,\n",
|
||||
" -2.53210980e-02, 6.73587900e-03, 5.47841080e-02, -3.60574126e-02,\n",
|
||||
" -5.20869941e-02, -2.90345643e-02, 4.38962830e-03, 6.50023222e-02,\n",
|
||||
" 3.07485200e-02, 2.00220738e-02, 1.73043029e-03, 2.96725915e-03,\n",
|
||||
" 3.40953190e-03, -6.78145364e-02, 3.41304727e-02, 8.37869197e-03,\n",
|
||||
" 5.39904200e-02, 2.70389449e-02, 7.84119442e-02, -1.30136222e-01,\n",
|
||||
" 4.84649912e-02, 5.14179170e-02, -7.94680975e-03, 5.57883596e-03,\n",
|
||||
" -5.31026050e-02, 3.81299518e-02, -3.05512808e-02, -7.69778788e-02,\n",
|
||||
" 1.20531386e-02, -4.08992954e-02, -8.69358853e-02, 6.38056174e-02,\n",
|
||||
" 1.68674774e-02, 1.68734661e-03, 6.28894269e-02, -1.67711563e-02,\n",
|
||||
" 2.15586051e-02, 7.10083405e-04, 2.81031127e-03, -8.89794994e-03,\n",
|
||||
" -1.80887729e-02, -2.16216948e-02, -5.59149943e-02, 1.78774409e-02,\n",
|
||||
" -9.27092806e-02, 7.27912923e-03, -1.27753615e-01, -4.86937575e-02,\n",
|
||||
" 1.45872515e-02, -1.62751433e-02, 6.75622374e-02, 3.87702174e-02,\n",
|
||||
" 7.23295361e-02, 9.14991871e-02, -9.65291932e-02, 4.84791324e-02,\n",
|
||||
" -1.06274165e-01, -1.05042597e-02, 8.90350714e-02, -8.07525739e-02,\n",
|
||||
" 7.87081569e-02, -2.04917882e-02, -5.55080660e-02, -3.31532657e-02,\n",
|
||||
" -2.14429274e-02, 4.94700260e-02, -7.05119222e-02, 6.63999170e-02,\n",
|
||||
" 7.39671215e-02, -2.70118006e-03, 1.62262432e-02, -3.98229137e-02,\n",
|
||||
" 5.94092607e-02, -7.14372285e-03, -3.33479457e-02, 2.30419375e-02,\n",
|
||||
" 1.87185761e-02, -6.15724660e-02, -8.55441322e-04, -1.05786659e-01,\n",
|
||||
" -8.43619034e-02, -3.92993316e-02, -3.16447318e-02, 6.60644025e-02,\n",
|
||||
" 9.41815823e-02, -8.35982785e-02, 9.50878393e-03, 1.25504937e-34,\n",
|
||||
" 6.38198331e-02, 2.11371221e-02, -1.65899675e-02, 1.88641250e-02,\n",
|
||||
" -5.57018854e-02, 1.82811334e-03, -1.37586696e-02, 8.16279203e-02,\n",
|
||||
" -9.13297161e-02, 7.06856027e-02, 6.79991618e-02, -5.44536524e-02,\n",
|
||||
" 3.80394608e-02, 3.80505901e-03, 1.03689805e-01, 7.32792250e-04,\n",
|
||||
" 2.95661930e-02, 4.19423953e-02, -1.20444328e-01, 1.24932425e-02,\n",
|
||||
" -5.53506613e-02, 1.75228491e-02, -2.28164811e-02, -5.79299554e-02,\n",
|
||||
" 9.42929648e-03, -5.42278960e-03, -3.94944148e-03, 2.82348841e-02,\n",
|
||||
" -1.28066897e-01, -1.31305009e-02, 7.42957145e-02, -1.74529813e-02,\n",
|
||||
" -9.72758904e-02, 8.25622585e-03, 2.06900928e-02, -5.29770693e-03,\n",
|
||||
" -1.37696080e-02, -3.50973643e-02, 1.74977854e-02, -1.76232997e-02,\n",
|
||||
" -6.50825426e-02, -3.84675786e-02, -8.76396820e-02, 3.21291834e-02,\n",
|
||||
" 2.55022198e-03, -2.09378973e-02, 5.55310138e-02, 2.57095750e-02,\n",
|
||||
" -2.94735264e-02, 1.25047946e-02, -6.83466196e-02, -8.00624415e-02,\n",
|
||||
" -1.46906544e-02, 1.03744324e-02, -8.51863101e-02, -1.10539049e-02,\n",
|
||||
" 2.14596409e-02, 4.08609174e-02, 3.31646428e-02, -2.76757460e-02,\n",
|
||||
" -2.01877337e-02, 8.98879580e-03, 3.92048508e-02, 1.15103319e-01,\n",
|
||||
" 5.50440997e-02, 2.72754990e-02, -1.09526694e-01, -1.72622949e-02,\n",
|
||||
" 1.33438502e-02, -1.73702314e-02, -5.04373619e-03, -2.00292896e-02,\n",
|
||||
" 1.16672359e-01, -1.84322968e-02, 3.70628126e-02, 1.60885658e-02,\n",
|
||||
" 3.48830372e-02, 5.50574400e-02, -6.60797628e-03, 7.06828609e-02,\n",
|
||||
" 4.07849252e-02, -1.43314507e-02, -2.85441079e-03, 2.74251588e-02,\n",
|
||||
" -4.26768996e-02, 1.26583334e-02, 3.34343277e-02, 1.62644926e-02,\n",
|
||||
" 1.19263111e-02, -2.92118900e-02, 2.73978021e-02, 3.44304889e-02,\n",
|
||||
" 2.52832547e-02, 3.07514369e-02, 3.22557390e-02, -1.74628472e-08,\n",
|
||||
" -1.52690830e-02, 5.37678460e-03, 1.41246513e-01, 5.08366451e-02,\n",
|
||||
" 5.32256104e-02, 9.67938006e-02, 4.33674715e-02, -6.48309989e-03,\n",
|
||||
" 1.58604085e-02, 4.05631103e-02, 6.94985166e-02, 6.04905337e-02,\n",
|
||||
" -6.26189336e-02, -3.96144278e-02, 1.10648885e-01, 1.67735182e-02,\n",
|
||||
" -7.68693630e-03, 2.59615108e-02, -5.28793186e-02, -2.22318210e-02,\n",
|
||||
" 1.74595844e-02, 4.75339778e-02, 3.27674635e-02, -4.59685028e-02,\n",
|
||||
" 2.01770533e-02, -1.60875805e-02, -1.58613976e-02, -1.66658163e-02,\n",
|
||||
" -3.05246692e-02, -3.87907438e-02, -1.27654579e-02, 6.57610297e-02,\n",
|
||||
" -2.22502891e-02, -9.44992620e-03, 2.32080184e-02, 2.66038440e-02,\n",
|
||||
" 2.14203820e-02, -7.54579064e-03, 8.84752199e-02, -9.43513960e-02,\n",
|
||||
" -5.74870482e-02, -7.77097791e-02, 1.95792271e-03, -1.50347762e-02,\n",
|
||||
" -8.08496401e-03, 1.88217536e-02, 8.42519756e-03, -3.78591903e-02,\n",
|
||||
" 1.24534657e-02, -7.94995651e-02, -2.15790682e-02, 1.20276492e-02,\n",
|
||||
" 1.74870938e-02, 8.74478668e-02, 6.64091483e-02, 3.13736796e-02,\n",
|
||||
" -1.00629032e-02, 2.07700878e-02, -5.20163439e-02, -8.91334843e-03,\n",
|
||||
" 1.48542315e-01, -2.51267431e-03, 9.93156135e-02, 2.34929714e-02],\n",
|
||||
" dtype=float32)"
|
||||
]
|
||||
},
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"vector"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 12,
|
||||
"id": "38de1bf8-c9b5-45b4-9f4b-86af93b3f80d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -223,26 +362,66 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 13,
|
||||
"id": "8c1205bd-4692-44ef-8ea4-69f255354537",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'Delphi FG0166 Fuel Pump Module\\nDelphi brings 80 years of OE Heritage into each Delphi pump, ensuring quality and fitment for each Delphi part. Part is validated, tested and matched to the right vehicle application Delphi brings 80 years of OE Heritage into each Delphi assembly, ensuring quality and fitment for each Delphi part Always be sure to check and clean fuel tank to avoid unnecessary returns Rigorous OE-testing ensures the pump can withstand extreme temperatures Brand Delphi, Fit Type Vehicle Specific Fit, Dimensions LxWxH 19.7 x 7.7 x 5.1 inches, Weight 2.2 Pounds, Auto Part Position Unknown, Operation Mode Mechanical, Manufacturer Delphi, Model FUEL PUMP, Dimensions 19.7'"
|
||||
]
|
||||
},
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"description(train[0])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "16b4ef1f-c696-4a01-b011-00fbccbc1a56",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Now we populate our RAG datastore\n",
|
||||
"\n",
|
||||
"The next cell populates the 400,000 items in Chroma.\n",
|
||||
"\n",
|
||||
"Feel free to reduce the number of documents if this takes too long! You can change to: \n",
|
||||
"`NUMBER_OF_DOCUMENTS = 20000` \n",
|
||||
"And that's plenty for a perfectly good RAG pipeline.\n",
|
||||
"\n",
|
||||
"Just note that if you interrupt the below cell while it's running, you might need to clear out the Chroma datastore (by rerunning the earlier cell that deletes the collection), before you run it again. Otherwise it will complain that there are existing documents with the same ID."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 20,
|
||||
"id": "8c79e2fe-1f50-4ebf-9a93-34f3088f2996",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"100%|███████████████████████████████████████████| 20/20 [01:01<00:00, 3.05s/it]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"for i in tqdm(range(0, len(train), 1000)):\n",
|
||||
"NUMBER_OF_DOCUMENTS = len(train)\n",
|
||||
"\n",
|
||||
"# Uncomment if you'd rather not wait for the full 400,000\n",
|
||||
"# NUMBER_OF_DOCUMENTS = 20000\n",
|
||||
"\n",
|
||||
"for i in tqdm(range(0, NUMBER_OF_DOCUMENTS, 1000)):\n",
|
||||
" documents = [description(item) for item in train[i: i+1000]]\n",
|
||||
" vectors = model.encode(documents).astype(float).tolist()\n",
|
||||
" metadatas = [{\"category\": item.category, \"price\": item.price} for item in train[i: i+1000]]\n",
|
||||
" ids = [f\"doc_{j}\" for j in range(i, i+1000)]\n",
|
||||
" ids = [f\"doc_{j}\" for j in range(i, i+len(documents))]\n",
|
||||
" collection.add(\n",
|
||||
" ids=ids,\n",
|
||||
" documents=documents,\n",
|
||||
@@ -250,6 +429,14 @@
|
||||
" metadatas=metadatas\n",
|
||||
" )"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "f04f1b20-05ed-461d-b728-d7729125502a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
@@ -268,7 +455,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.11"
|
||||
"version": "3.11.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
Reference in New Issue
Block a user