Updated models, added vector tests
This commit is contained in:
@@ -24,7 +24,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"execution_count": null,
|
||||
"id": "993a2a24-1a58-42be-8034-6d116fb8d786",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -51,7 +51,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"execution_count": null,
|
||||
"id": "2359ccc0-dbf2-4b1e-9473-e472b32f548b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -66,18 +66,10 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": null,
|
||||
"id": "645167e6-cf0d-42d2-949f-1089a25a2841",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Log in to HuggingFace\n",
|
||||
"\n",
|
||||
@@ -101,7 +93,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": null,
|
||||
"id": "688bd995-ec3e-43cd-8179-7fe14b275877",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -114,21 +106,10 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"execution_count": null,
|
||||
"id": "2817eaf5-4302-4a18-9148-d1062e3b3dbb",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'How much does this cost to the nearest dollar?\\n\\nDelphi FG0166 Fuel Pump Module\\nDelphi brings 80 years of OE Heritage into each Delphi pump, ensuring quality and fitment for each Delphi part. Part is validated, tested and matched to the right vehicle application Delphi brings 80 years of OE Heritage into each Delphi assembly, ensuring quality and fitment for each Delphi part Always be sure to check and clean fuel tank to avoid unnecessary returns Rigorous OE-testing ensures the pump can withstand extreme temperatures Brand Delphi, Fit Type Vehicle Specific Fit, Dimensions LxWxH 19.7 x 7.7 x 5.1 inches, Weight 2.2 Pounds, Auto Part Position Unknown, Operation Mode Mechanical, Manufacturer Delphi, Model FUEL PUMP, Dimensions 19.7\\n\\nPrice is $227.00'"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"train[0].prompt"
|
||||
]
|
||||
@@ -154,7 +135,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": null,
|
||||
"id": "f4aab95e-d719-4476-b6e7-e248120df25a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -164,18 +145,10 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 19,
|
||||
"execution_count": null,
|
||||
"id": "5f95dafd-ab80-464e-ba8a-dec7a2424780",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Deleted existing collection: products\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Check if the collection exists and delete it if it does\n",
|
||||
"collection_name = \"products\"\n",
|
||||
@@ -211,7 +184,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"execution_count": null,
|
||||
"id": "a87db200-d19d-44bf-acbd-15c45c70f5c9",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -221,140 +194,61 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"execution_count": null,
|
||||
"id": "9b23a025-4c35-4d3a-96ad-b956cad37b0a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Pass in a list of texts, get back a numpy array of vectors\n",
|
||||
"\n",
|
||||
"vector = model.encode([\"Well hi there\"])[0]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"id": "8adde63f-e732-4f7c-bba9-f8b2a469f14e",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"array([-9.46715921e-02, 4.27619480e-02, 5.51620498e-02, -5.10962738e-04,\n",
|
||||
" 1.16203260e-02, -6.80130497e-02, 2.76405811e-02, 6.06974475e-02,\n",
|
||||
" 2.88530309e-02, -1.74128171e-02, -4.94346656e-02, 2.30993368e-02,\n",
|
||||
" -1.28614372e-02, -4.31402922e-02, 2.17510257e-02, 4.26548645e-02,\n",
|
||||
" 5.10499887e-02, -7.79727027e-02, -1.23247243e-01, 3.67455557e-02,\n",
|
||||
" 4.54110606e-03, 9.47937742e-02, -5.53098507e-02, 1.70641206e-02,\n",
|
||||
" -2.92873308e-02, -4.47124951e-02, 2.06784457e-02, 6.39320165e-02,\n",
|
||||
" 2.27427781e-02, 4.87789772e-02, -2.33503035e-03, 4.72859442e-02,\n",
|
||||
" -2.86258962e-02, 2.30624825e-02, 2.45130397e-02, 3.95681970e-02,\n",
|
||||
" -4.33176197e-02, -1.02316625e-01, 2.79874774e-03, 2.39304882e-02,\n",
|
||||
" 1.61556154e-02, -8.99078418e-03, 2.07255688e-02, 6.40123338e-02,\n",
|
||||
" 6.89179525e-02, -6.98360875e-02, 2.89764395e-03, -8.10988992e-02,\n",
|
||||
" 1.71123203e-02, 2.50653620e-03, -1.06529057e-01, -4.87733148e-02,\n",
|
||||
" -1.67762041e-02, -2.28662305e-02, 1.14816584e-01, 4.87413220e-02,\n",
|
||||
" -1.64962150e-02, -6.90832064e-02, 1.13612078e-01, -7.18485564e-02,\n",
|
||||
" -9.01571065e-02, 3.91712456e-06, -8.66769403e-02, -4.05916385e-02,\n",
|
||||
" 3.71317938e-02, -1.77618619e-02, -5.57464026e-02, -4.57097329e-02,\n",
|
||||
" -5.43141440e-02, -4.00611758e-02, -4.60227989e-02, 2.82194205e-02,\n",
|
||||
" -2.33606398e-02, 1.44406883e-02, -1.52777461e-02, -4.34328429e-02,\n",
|
||||
" 6.81274384e-02, 8.21894556e-02, 7.83890672e-03, -2.85973065e-02,\n",
|
||||
" 6.14309646e-02, -4.92684618e-02, 3.27055180e-03, -2.72683166e-02,\n",
|
||||
" -4.59346883e-02, -2.60167848e-02, 5.70650175e-02, -5.86463953e-04,\n",
|
||||
" -4.37076613e-02, -2.47807417e-04, -8.73549953e-02, 3.48776393e-02,\n",
|
||||
" 1.39327236e-02, -1.60043724e-02, 2.86958776e-02, -9.48595777e-02,\n",
|
||||
" 9.40612778e-02, 5.92685342e-02, -8.65014344e-02, 1.45011380e-01,\n",
|
||||
" 2.35388409e-02, 3.43324952e-02, 2.51516962e-04, 7.15541244e-02,\n",
|
||||
" -3.12182512e-02, 3.86665650e-02, -2.47745048e-02, 6.52674213e-02,\n",
|
||||
" -8.28817189e-02, -2.80247182e-02, 9.34544671e-03, -7.85543211e-03,\n",
|
||||
" 5.30728251e-02, 2.96895411e-02, 3.27329561e-02, 4.94737104e-02,\n",
|
||||
" 2.52208579e-02, 4.67068404e-02, 5.03403395e-02, -7.23745152e-02,\n",
|
||||
" 2.54435297e-02, -3.67216468e-02, 1.27570340e-02, 1.46063734e-02,\n",
|
||||
" 2.11492609e-02, -5.56909367e-02, -9.18510836e-03, -5.63397912e-34,\n",
|
||||
" 9.58769619e-02, 5.94578013e-02, 5.11445254e-02, 3.36360112e-02,\n",
|
||||
" -1.34414928e-02, -2.77202837e-02, -3.48436125e-02, 1.80352535e-02,\n",
|
||||
" -2.53210980e-02, 6.73587900e-03, 5.47841080e-02, -3.60574126e-02,\n",
|
||||
" -5.20869941e-02, -2.90345643e-02, 4.38962830e-03, 6.50023222e-02,\n",
|
||||
" 3.07485200e-02, 2.00220738e-02, 1.73043029e-03, 2.96725915e-03,\n",
|
||||
" 3.40953190e-03, -6.78145364e-02, 3.41304727e-02, 8.37869197e-03,\n",
|
||||
" 5.39904200e-02, 2.70389449e-02, 7.84119442e-02, -1.30136222e-01,\n",
|
||||
" 4.84649912e-02, 5.14179170e-02, -7.94680975e-03, 5.57883596e-03,\n",
|
||||
" -5.31026050e-02, 3.81299518e-02, -3.05512808e-02, -7.69778788e-02,\n",
|
||||
" 1.20531386e-02, -4.08992954e-02, -8.69358853e-02, 6.38056174e-02,\n",
|
||||
" 1.68674774e-02, 1.68734661e-03, 6.28894269e-02, -1.67711563e-02,\n",
|
||||
" 2.15586051e-02, 7.10083405e-04, 2.81031127e-03, -8.89794994e-03,\n",
|
||||
" -1.80887729e-02, -2.16216948e-02, -5.59149943e-02, 1.78774409e-02,\n",
|
||||
" -9.27092806e-02, 7.27912923e-03, -1.27753615e-01, -4.86937575e-02,\n",
|
||||
" 1.45872515e-02, -1.62751433e-02, 6.75622374e-02, 3.87702174e-02,\n",
|
||||
" 7.23295361e-02, 9.14991871e-02, -9.65291932e-02, 4.84791324e-02,\n",
|
||||
" -1.06274165e-01, -1.05042597e-02, 8.90350714e-02, -8.07525739e-02,\n",
|
||||
" 7.87081569e-02, -2.04917882e-02, -5.55080660e-02, -3.31532657e-02,\n",
|
||||
" -2.14429274e-02, 4.94700260e-02, -7.05119222e-02, 6.63999170e-02,\n",
|
||||
" 7.39671215e-02, -2.70118006e-03, 1.62262432e-02, -3.98229137e-02,\n",
|
||||
" 5.94092607e-02, -7.14372285e-03, -3.33479457e-02, 2.30419375e-02,\n",
|
||||
" 1.87185761e-02, -6.15724660e-02, -8.55441322e-04, -1.05786659e-01,\n",
|
||||
" -8.43619034e-02, -3.92993316e-02, -3.16447318e-02, 6.60644025e-02,\n",
|
||||
" 9.41815823e-02, -8.35982785e-02, 9.50878393e-03, 1.25504937e-34,\n",
|
||||
" 6.38198331e-02, 2.11371221e-02, -1.65899675e-02, 1.88641250e-02,\n",
|
||||
" -5.57018854e-02, 1.82811334e-03, -1.37586696e-02, 8.16279203e-02,\n",
|
||||
" -9.13297161e-02, 7.06856027e-02, 6.79991618e-02, -5.44536524e-02,\n",
|
||||
" 3.80394608e-02, 3.80505901e-03, 1.03689805e-01, 7.32792250e-04,\n",
|
||||
" 2.95661930e-02, 4.19423953e-02, -1.20444328e-01, 1.24932425e-02,\n",
|
||||
" -5.53506613e-02, 1.75228491e-02, -2.28164811e-02, -5.79299554e-02,\n",
|
||||
" 9.42929648e-03, -5.42278960e-03, -3.94944148e-03, 2.82348841e-02,\n",
|
||||
" -1.28066897e-01, -1.31305009e-02, 7.42957145e-02, -1.74529813e-02,\n",
|
||||
" -9.72758904e-02, 8.25622585e-03, 2.06900928e-02, -5.29770693e-03,\n",
|
||||
" -1.37696080e-02, -3.50973643e-02, 1.74977854e-02, -1.76232997e-02,\n",
|
||||
" -6.50825426e-02, -3.84675786e-02, -8.76396820e-02, 3.21291834e-02,\n",
|
||||
" 2.55022198e-03, -2.09378973e-02, 5.55310138e-02, 2.57095750e-02,\n",
|
||||
" -2.94735264e-02, 1.25047946e-02, -6.83466196e-02, -8.00624415e-02,\n",
|
||||
" -1.46906544e-02, 1.03744324e-02, -8.51863101e-02, -1.10539049e-02,\n",
|
||||
" 2.14596409e-02, 4.08609174e-02, 3.31646428e-02, -2.76757460e-02,\n",
|
||||
" -2.01877337e-02, 8.98879580e-03, 3.92048508e-02, 1.15103319e-01,\n",
|
||||
" 5.50440997e-02, 2.72754990e-02, -1.09526694e-01, -1.72622949e-02,\n",
|
||||
" 1.33438502e-02, -1.73702314e-02, -5.04373619e-03, -2.00292896e-02,\n",
|
||||
" 1.16672359e-01, -1.84322968e-02, 3.70628126e-02, 1.60885658e-02,\n",
|
||||
" 3.48830372e-02, 5.50574400e-02, -6.60797628e-03, 7.06828609e-02,\n",
|
||||
" 4.07849252e-02, -1.43314507e-02, -2.85441079e-03, 2.74251588e-02,\n",
|
||||
" -4.26768996e-02, 1.26583334e-02, 3.34343277e-02, 1.62644926e-02,\n",
|
||||
" 1.19263111e-02, -2.92118900e-02, 2.73978021e-02, 3.44304889e-02,\n",
|
||||
" 2.52832547e-02, 3.07514369e-02, 3.22557390e-02, -1.74628472e-08,\n",
|
||||
" -1.52690830e-02, 5.37678460e-03, 1.41246513e-01, 5.08366451e-02,\n",
|
||||
" 5.32256104e-02, 9.67938006e-02, 4.33674715e-02, -6.48309989e-03,\n",
|
||||
" 1.58604085e-02, 4.05631103e-02, 6.94985166e-02, 6.04905337e-02,\n",
|
||||
" -6.26189336e-02, -3.96144278e-02, 1.10648885e-01, 1.67735182e-02,\n",
|
||||
" -7.68693630e-03, 2.59615108e-02, -5.28793186e-02, -2.22318210e-02,\n",
|
||||
" 1.74595844e-02, 4.75339778e-02, 3.27674635e-02, -4.59685028e-02,\n",
|
||||
" 2.01770533e-02, -1.60875805e-02, -1.58613976e-02, -1.66658163e-02,\n",
|
||||
" -3.05246692e-02, -3.87907438e-02, -1.27654579e-02, 6.57610297e-02,\n",
|
||||
" -2.22502891e-02, -9.44992620e-03, 2.32080184e-02, 2.66038440e-02,\n",
|
||||
" 2.14203820e-02, -7.54579064e-03, 8.84752199e-02, -9.43513960e-02,\n",
|
||||
" -5.74870482e-02, -7.77097791e-02, 1.95792271e-03, -1.50347762e-02,\n",
|
||||
" -8.08496401e-03, 1.88217536e-02, 8.42519756e-03, -3.78591903e-02,\n",
|
||||
" 1.24534657e-02, -7.94995651e-02, -2.15790682e-02, 1.20276492e-02,\n",
|
||||
" 1.74870938e-02, 8.74478668e-02, 6.64091483e-02, 3.13736796e-02,\n",
|
||||
" -1.00629032e-02, 2.07700878e-02, -5.20163439e-02, -8.91334843e-03,\n",
|
||||
" 1.48542315e-01, -2.51267431e-03, 9.93156135e-02, 2.34929714e-02],\n",
|
||||
" dtype=float32)"
|
||||
]
|
||||
},
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"vector = model.encode([\"Well hi there\"])[0]\n",
|
||||
"vector"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"execution_count": null,
|
||||
"id": "8adde63f-e732-4f7c-bba9-f8b2a469f14e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Quick sidebar - extra to the videos - a function to compare vectors\n",
|
||||
"\n",
|
||||
"import numpy as np\n",
|
||||
"def cosine_similarity(a, b):\n",
|
||||
" return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))\n",
|
||||
"\n",
|
||||
"def how_similar(text1, text2):\n",
|
||||
" vector1, vector2 = model.encode([text1, text2])\n",
|
||||
" similarity = cosine_similarity(vector1, vector2)\n",
|
||||
" print(f\"Similarity between {text1} and {text2} is {similarity*100:.1f}%\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "2f9c8d19-6993-42e7-afd6-4c61ffc19419",
|
||||
"metadata": {
|
||||
"scrolled": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# And let's see how adding a few words to the context can change things up!\n",
|
||||
"\n",
|
||||
"how_similar(\"Java\", \"C++\")\n",
|
||||
"how_similar(\"Java\", \"mug\")\n",
|
||||
"how_similar(\"Cup of Java\", \"mug\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "38de1bf8-c9b5-45b4-9f4b-86af93b3f80d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# OK back to the main story - let's make something we can vectorize\n",
|
||||
"\n",
|
||||
"def description(item):\n",
|
||||
" text = item.prompt.replace(\"How much does this cost to the nearest dollar?\\n\\n\", \"\")\n",
|
||||
" return text.split(\"\\n\\nPrice is $\")[0]"
|
||||
@@ -362,21 +256,10 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"execution_count": null,
|
||||
"id": "8c1205bd-4692-44ef-8ea4-69f255354537",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'Delphi FG0166 Fuel Pump Module\\nDelphi brings 80 years of OE Heritage into each Delphi pump, ensuring quality and fitment for each Delphi part. Part is validated, tested and matched to the right vehicle application Delphi brings 80 years of OE Heritage into each Delphi assembly, ensuring quality and fitment for each Delphi part Always be sure to check and clean fuel tank to avoid unnecessary returns Rigorous OE-testing ensures the pump can withstand extreme temperatures Brand Delphi, Fit Type Vehicle Specific Fit, Dimensions LxWxH 19.7 x 7.7 x 5.1 inches, Weight 2.2 Pounds, Auto Part Position Unknown, Operation Mode Mechanical, Manufacturer Delphi, Model FUEL PUMP, Dimensions 19.7'"
|
||||
]
|
||||
},
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"description(train[0])"
|
||||
]
|
||||
@@ -399,18 +282,10 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 20,
|
||||
"execution_count": null,
|
||||
"id": "8c79e2fe-1f50-4ebf-9a93-34f3088f2996",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"100%|███████████████████████████████████████████| 20/20 [01:01<00:00, 3.05s/it]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"NUMBER_OF_DOCUMENTS = len(train)\n",
|
||||
"\n",
|
||||
|
||||
Reference in New Issue
Block a user