Add Week 8 submission for muhammad_qasim_sheikh
This commit is contained in:
@@ -0,0 +1,538 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "mzYB4XYQeWRQ",
|
||||||
|
"metadata": {
|
||||||
|
"id": "mzYB4XYQeWRQ"
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"!pip install tqdm huggingface_hub numpy sentence-transformers datasets chromadb catboost peft torch bitsandbytes"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "b3caecd1-8712-4acd-80b5-e8059c16f43f",
|
||||||
|
"metadata": {
|
||||||
|
"id": "b3caecd1-8712-4acd-80b5-e8059c16f43f"
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import os\n",
|
||||||
|
"import re\n",
|
||||||
|
"import zipfile\n",
|
||||||
|
"import chromadb\n",
|
||||||
|
"import joblib\n",
|
||||||
|
"import numpy as np\n",
|
||||||
|
"import pandas as pd\n",
|
||||||
|
"import requests\n",
|
||||||
|
"import torch\n",
|
||||||
|
"from datasets import load_dataset\n",
|
||||||
|
"from google.colab import userdata\n",
|
||||||
|
"from huggingface_hub import HfApi, hf_hub_download, login\n",
|
||||||
|
"from openai import OpenAI\n",
|
||||||
|
"from peft import PeftModel\n",
|
||||||
|
"from sentence_transformers import SentenceTransformer\n",
|
||||||
|
"from sklearn.linear_model import LinearRegression\n",
|
||||||
|
"from sklearn.metrics import r2_score, mean_squared_error\n",
|
||||||
|
"from tqdm import tqdm\n",
|
||||||
|
"from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig\n",
|
||||||
|
"from catboost import CatBoostRegressor"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 4,
|
||||||
|
"id": "05d9523f-b6c9-4132-bd2b-6712772b3cd2",
|
||||||
|
"metadata": {
|
||||||
|
"colab": {
|
||||||
|
"base_uri": "https://localhost:8080/"
|
||||||
|
},
|
||||||
|
"id": "05d9523f-b6c9-4132-bd2b-6712772b3cd2",
|
||||||
|
"outputId": "f6bb70c7-58f8-4e3c-a592-83cfb4f395a7"
|
||||||
|
},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"Mounted at /content/drive\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"from google.colab import drive\n",
|
||||||
|
"drive.mount(\"/content/drive\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 5,
|
||||||
|
"id": "z9735RD_TUHw",
|
||||||
|
"metadata": {
|
||||||
|
"id": "z9735RD_TUHw"
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"openai_api_key = userdata.get(\"OPENAI_API_KEY\")\n",
|
||||||
|
"openai = OpenAI(api_key=openai_api_key)\n",
|
||||||
|
"\n",
|
||||||
|
"hf_token = userdata.get(\"HF_TOKEN\")\n",
|
||||||
|
"login(hf_token, add_to_git_credential=True)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 6,
|
||||||
|
"id": "DtswsfBQxxJF",
|
||||||
|
"metadata": {
|
||||||
|
"id": "DtswsfBQxxJF"
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Configuration\n",
|
||||||
|
"HF_USER = \"qshaikh\""
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "0eKakxSFTVcA",
|
||||||
|
"metadata": {
|
||||||
|
"collapsed": true,
|
||||||
|
"id": "0eKakxSFTVcA"
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"DATASET_NAME = f\"{HF_USER}/pricer-data\"\n",
|
||||||
|
"dataset = load_dataset(DATASET_NAME)\n",
|
||||||
|
"test = dataset[\"test\"]"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "cWqvs8JRTggE",
|
||||||
|
"metadata": {
|
||||||
|
"collapsed": true,
|
||||||
|
"id": "cWqvs8JRTggE"
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"def description(item):\n",
|
||||||
|
" text = item[\"text\"].replace(\n",
|
||||||
|
" \"How much does this cost to the nearest dollar?\\n\\n\", \"\"\n",
|
||||||
|
" )\n",
|
||||||
|
" text = text.split(\"\\n\\nPrice is $\")[0]\n",
|
||||||
|
" return f\"passage: {text}\""
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "pjPBEgXqmHOA",
|
||||||
|
"metadata": {
|
||||||
|
"id": "pjPBEgXqmHOA"
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"CHROMA_PATH = \"/content/drive/MyDrive/chroma\"\n",
|
||||||
|
"COLLECTION_NAME = \"price_items\"\n",
|
||||||
|
"\n",
|
||||||
|
"print(f\"Attempting to load ChromaDB from: {CHROMA_PATH}\")\n",
|
||||||
|
"\n",
|
||||||
|
"client = chromadb.PersistentClient(path=CHROMA_PATH)\n",
|
||||||
|
"collection = client.get_or_create_collection(name=COLLECTION_NAME)\n",
|
||||||
|
"\n",
|
||||||
|
"print(f\"Successfully loaded ChromaDB collection '{COLLECTION_NAME}'.\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "8fi1BS71XCv1",
|
||||||
|
"metadata": {
|
||||||
|
"id": "8fi1BS71XCv1"
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"embedding_model = SentenceTransformer(\"intfloat/e5-small-v2\", device=\"cuda\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "zmwIbufXUzMo",
|
||||||
|
"metadata": {
|
||||||
|
"id": "zmwIbufXUzMo"
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"BASE_MODEL = \"meta-llama/Llama-3.1-8B\"\n",
|
||||||
|
"FINETUNED_MODEL = \"ed-donner/pricer-2024-09-13_13.04.39\"\n",
|
||||||
|
"REVISION = \"e8d637df551603dc86cd7a1598a8f44af4d7ae36\"\n",
|
||||||
|
"\n",
|
||||||
|
"quant_config = BitsAndBytesConfig(\n",
|
||||||
|
" load_in_4bit=True,\n",
|
||||||
|
" bnb_4bit_use_double_quant=True,\n",
|
||||||
|
" bnb_4bit_compute_dtype=torch.bfloat16,\n",
|
||||||
|
" bnb_4bit_quant_type=\"nf4\",\n",
|
||||||
|
")\n",
|
||||||
|
"\n",
|
||||||
|
"tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code=True)\n",
|
||||||
|
"tokenizer.pad_token = tokenizer.eos_token\n",
|
||||||
|
"tokenizer.padding_side = \"right\"\n",
|
||||||
|
"\n",
|
||||||
|
"base_model = AutoModelForCausalLM.from_pretrained(\n",
|
||||||
|
" BASE_MODEL, quantization_config=quant_config, device_map=\"auto\"\n",
|
||||||
|
")\n",
|
||||||
|
"\n",
|
||||||
|
"fine_tuned_model = PeftModel.from_pretrained(\n",
|
||||||
|
" base_model, FINETUNED_MODEL, revision=REVISION\n",
|
||||||
|
")\n",
|
||||||
|
"\n",
|
||||||
|
"fine_tuned_model.generation_config.pad_token_id = tokenizer.pad_token_id\n",
|
||||||
|
"\n",
|
||||||
|
"print(f\"Memory footprint: {fine_tuned_model.get_memory_footprint() / 1e6:.1f} MB\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "0IHiJNU7a4XC",
|
||||||
|
"metadata": {
|
||||||
|
"id": "0IHiJNU7a4XC"
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"#Cat Boost Trained Model\n",
|
||||||
|
"catboost_model_path = \"/content/drive/MyDrive/catboost_model.pkl\"\n",
|
||||||
|
"catboost_model = joblib.load(catboost_model_path)\n",
|
||||||
|
"print(f\"Successfully loaded CatBoost model from {catboost_model_path}\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 24,
|
||||||
|
"id": "LgGmUKJxayZ6",
|
||||||
|
"metadata": {
|
||||||
|
"id": "LgGmUKJxayZ6"
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"def extract_tagged_price(output: str):\n",
|
||||||
|
" try:\n",
|
||||||
|
" contents = output.split(\"Price is $\")[1].replace(\",\", \"\")\n",
|
||||||
|
" match = re.search(r\"[-+]?\\d*\\.\\d+|\\d+\", contents)\n",
|
||||||
|
" return float(match.group()) if match else 0.0\n",
|
||||||
|
" except Exception:\n",
|
||||||
|
" return 0.0"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 25,
|
||||||
|
"id": "ggKf1nSQbAnv",
|
||||||
|
"metadata": {
|
||||||
|
"id": "ggKf1nSQbAnv"
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"def ft_llama_price(description: str):\n",
|
||||||
|
" prompt = (\n",
|
||||||
|
" f\"How much does this cost to the nearest dollar?\\n\\n{description}\\n\\nPrice is $\"\n",
|
||||||
|
" )\n",
|
||||||
|
" inputs = tokenizer(prompt, return_tensors=\"pt\").to(\"cuda\")\n",
|
||||||
|
"\n",
|
||||||
|
" outputs = fine_tuned_model.generate(\n",
|
||||||
|
" **inputs, max_new_tokens=5, num_return_sequences=1\n",
|
||||||
|
" )\n",
|
||||||
|
"\n",
|
||||||
|
" result = tokenizer.decode(outputs[0])\n",
|
||||||
|
" price = extract_tagged_price(result)\n",
|
||||||
|
" return price"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 26,
|
||||||
|
"id": "_cWyYUd4Ub-K",
|
||||||
|
"metadata": {
|
||||||
|
"id": "_cWyYUd4Ub-K"
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"def catboost_price(description: str):\n",
|
||||||
|
" vector = embedding_model.encode([description], normalize_embeddings=True)[0]\n",
|
||||||
|
" pred = catboost_model.predict([vector])[0]\n",
|
||||||
|
" return round(float(max(0, pred)), 2)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 27,
|
||||||
|
"id": "3Skod8juXgnN",
|
||||||
|
"metadata": {
|
||||||
|
"id": "3Skod8juXgnN"
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"def gpt4o_price(item):\n",
|
||||||
|
" def get_embedding(text):\n",
|
||||||
|
" return embedding_model.encode([text], normalize_embeddings=True)\n",
|
||||||
|
"\n",
|
||||||
|
" def find_similars(text):\n",
|
||||||
|
" results = collection.query(\n",
|
||||||
|
" query_embeddings=get_embedding(text).astype(float).tolist(), n_results=5\n",
|
||||||
|
" )\n",
|
||||||
|
" docs = results[\"documents\"][0]\n",
|
||||||
|
" prices = [m[\"price\"] for m in results[\"metadatas\"][0]]\n",
|
||||||
|
" return docs, prices\n",
|
||||||
|
"\n",
|
||||||
|
" def format_context(similars, prices):\n",
|
||||||
|
" context = (\n",
|
||||||
|
" \"To provide some context, here are similar products and their prices:\\n\\n\"\n",
|
||||||
|
" )\n",
|
||||||
|
" for sim, price in zip(similars, prices):\n",
|
||||||
|
" context += f\"Product:\\n{sim}\\nPrice is ${price:.2f}\\n\\n\"\n",
|
||||||
|
" return context\n",
|
||||||
|
"\n",
|
||||||
|
" def build_messages(description, similars, prices):\n",
|
||||||
|
" system_message = (\n",
|
||||||
|
" \"You are a pricing expert. \"\n",
|
||||||
|
" \"Given a product description and a few similar products with their prices, \"\n",
|
||||||
|
" \"estimate the most likely price. \"\n",
|
||||||
|
" \"Respond ONLY with a number, no words.\"\n",
|
||||||
|
" )\n",
|
||||||
|
" context = format_context(similars, prices)\n",
|
||||||
|
" user_prompt = (\n",
|
||||||
|
" \"Estimate the price for the following product:\\n\\n\"\n",
|
||||||
|
" + description\n",
|
||||||
|
" + \"\\n\\n\"\n",
|
||||||
|
" + context\n",
|
||||||
|
" )\n",
|
||||||
|
" return [\n",
|
||||||
|
" {\"role\": \"system\", \"content\": system_message},\n",
|
||||||
|
" {\"role\": \"user\", \"content\": user_prompt},\n",
|
||||||
|
" {\"role\": \"assistant\", \"content\": \"Price is $\"},\n",
|
||||||
|
" ]\n",
|
||||||
|
"\n",
|
||||||
|
" docs, prices = find_similars(description(item))\n",
|
||||||
|
" messages = build_messages(description(item), docs, prices)\n",
|
||||||
|
" response = openai.chat.completions.create(\n",
|
||||||
|
" model=\"gpt-4o-mini\", messages=messages, seed=42, max_tokens=5\n",
|
||||||
|
" )\n",
|
||||||
|
" reply = response.choices[0].message.content\n",
|
||||||
|
" return float(\n",
|
||||||
|
" re.search(r\"[-+]?\\d*\\.\\d+|\\d+\", reply.replace(\"$\", \"\").replace(\",\", \"\")).group()\n",
|
||||||
|
" or 0\n",
|
||||||
|
" )"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "8XQK5yrk8On4",
|
||||||
|
"metadata": {
|
||||||
|
"id": "8XQK5yrk8On4"
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"print(\"Splitting entire dataset...\")\n",
|
||||||
|
"np.random.seed(42)\n",
|
||||||
|
"all_indices = list(range(len(test)))\n",
|
||||||
|
"np.random.shuffle(all_indices)\n",
|
||||||
|
"\n",
|
||||||
|
"train_split_size = int(0.8 * len(all_indices))\n",
|
||||||
|
"train_indices = all_indices[:train_split_size] # 80%\n",
|
||||||
|
"test_indices = all_indices[train_split_size:] # 20%\n",
|
||||||
|
"\n",
|
||||||
|
"train_indices = train_indices[:250]\n",
|
||||||
|
"test_indices = test_indices[:50]"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "XN7P5fkkXfgP",
|
||||||
|
"metadata": {
|
||||||
|
"id": "XN7P5fkkXfgP"
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"ft_llama_preds_train = []\n",
|
||||||
|
"gpt4omini_preds_train = []\n",
|
||||||
|
"catboost_preds_train = []\n",
|
||||||
|
"true_prices_train = []\n",
|
||||||
|
"\n",
|
||||||
|
"for i in tqdm(train_indices):\n",
|
||||||
|
" item = test[i]\n",
|
||||||
|
" text = description(item)\n",
|
||||||
|
" true_prices_train.append(item[\"price\"])\n",
|
||||||
|
" ft_llama_preds_train.append(ft_llama_price(text))\n",
|
||||||
|
" gpt4omini_preds_train.append(gpt4o_price(item))\n",
|
||||||
|
" catboost_preds_train.append(catboost_price(text))"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "1_6_atEgHnFR",
|
||||||
|
"metadata": {
|
||||||
|
"id": "1_6_atEgHnFR"
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"print(\"True Prices:\", true_prices_train)\n",
|
||||||
|
"print(\"FT-LLaMA Predictions:\", ft_llama_preds_train)\n",
|
||||||
|
"print(\"GPT-4o-mini Predictions:\", gpt4omini_preds_train)\n",
|
||||||
|
"print(\"CatBoost Predictions:\", catboost_preds_train)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "tYWMhTrXcA7x",
|
||||||
|
"metadata": {
|
||||||
|
"id": "tYWMhTrXcA7x"
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"maxes_train = [\n",
|
||||||
|
" max(a, b, c)\n",
|
||||||
|
" for a, b, c in zip(ft_llama_preds_train, gpt4omini_preds_train, catboost_preds_train)\n",
|
||||||
|
"]\n",
|
||||||
|
"means_train = [\n",
|
||||||
|
" np.mean([a, b, c])\n",
|
||||||
|
" for a, b, c in zip(ft_llama_preds_train, gpt4omini_preds_train, catboost_preds_train)\n",
|
||||||
|
"]\n",
|
||||||
|
"\n",
|
||||||
|
"X_train = pd.DataFrame(\n",
|
||||||
|
" {\n",
|
||||||
|
" \"FT_LLaMA\": ft_llama_preds_train,\n",
|
||||||
|
" \"GPT4oMini\": gpt4omini_preds_train,\n",
|
||||||
|
" \"CatBoost\": catboost_preds_train,\n",
|
||||||
|
" \"Max\": maxes_train,\n",
|
||||||
|
" \"Mean\": means_train,\n",
|
||||||
|
" }\n",
|
||||||
|
")\n",
|
||||||
|
"\n",
|
||||||
|
"y_train = pd.Series(true_prices_train)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "-WsFABEicOyo",
|
||||||
|
"metadata": {
|
||||||
|
"id": "-WsFABEicOyo"
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"np.random.seed(42)\n",
|
||||||
|
"lr = LinearRegression()\n",
|
||||||
|
"lr.fit(X_train, y_train)\n",
|
||||||
|
"\n",
|
||||||
|
"feature_columns = X_train.columns.tolist()\n",
|
||||||
|
"for feature, coef in zip(feature_columns, lr.coef_):\n",
|
||||||
|
" print(f\"{feature}: {coef:.2f}\")\n",
|
||||||
|
"print(f\"Intercept={lr.intercept_:.2f}\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "W3F0nNBXlrUJ",
|
||||||
|
"metadata": {
|
||||||
|
"id": "W3F0nNBXlrUJ"
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"ft_llama_preds_test = []\n",
|
||||||
|
"gpt4omini_preds_test = []\n",
|
||||||
|
"catboost_preds_test = []\n",
|
||||||
|
"true_prices_test = []\n",
|
||||||
|
"\n",
|
||||||
|
"print(\"Processing TEST data (50 items)...\")\n",
|
||||||
|
"for i in tqdm(test_indices):\n",
|
||||||
|
" item = test[i]\n",
|
||||||
|
" text = description(item)\n",
|
||||||
|
" true_prices_test.append(item[\"price\"])\n",
|
||||||
|
" ft_llama_preds_test.append(ft_llama_price(text))\n",
|
||||||
|
" gpt4omini_preds_test.append(gpt4o_price(item))\n",
|
||||||
|
" catboost_preds_test.append(catboost_price(text))\n",
|
||||||
|
"\n",
|
||||||
|
"maxes_test = [\n",
|
||||||
|
" max(a, b, c)\n",
|
||||||
|
" for a, b, c in zip(ft_llama_preds_test, gpt4omini_preds_test, catboost_preds_test)\n",
|
||||||
|
"]\n",
|
||||||
|
"means_test = [\n",
|
||||||
|
" np.mean([a, b, c])\n",
|
||||||
|
" for a, b, c in zip(ft_llama_preds_test, gpt4omini_preds_test, catboost_preds_test)\n",
|
||||||
|
"]\n",
|
||||||
|
"\n",
|
||||||
|
"X_test = pd.DataFrame(\n",
|
||||||
|
" {\n",
|
||||||
|
" \"FT_LLaMA\": ft_llama_preds_test,\n",
|
||||||
|
" \"GPT4oMini\": gpt4omini_preds_test,\n",
|
||||||
|
" \"CatBoost\": catboost_preds_test,\n",
|
||||||
|
" \"Max\": maxes_test,\n",
|
||||||
|
" \"Mean\": means_test,\n",
|
||||||
|
" }\n",
|
||||||
|
")\n",
|
||||||
|
"\n",
|
||||||
|
"y_test = pd.Series(true_prices_test)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "y25l8rR791wG",
|
||||||
|
"metadata": {
|
||||||
|
"id": "y25l8rR791wG"
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"print(\"Evaluating model...\")\n",
|
||||||
|
"y_pred = lr.predict(X_test)\n",
|
||||||
|
"r2 = r2_score(y_test, y_pred)\n",
|
||||||
|
"print(f\"R² score: {r2:.4f}\")\n",
|
||||||
|
"\n",
|
||||||
|
"rmse = np.sqrt(mean_squared_error(y_test, y_pred))\n",
|
||||||
|
"print(f\"RMSE: {rmse:.2f}\")\n",
|
||||||
|
"\n",
|
||||||
|
"mape = np.mean(np.abs((y_test - y_pred) / y_test)) * 100\n",
|
||||||
|
"print(f\"MAPE: {mape:.2f}%\")"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"accelerator": "GPU",
|
||||||
|
"colab": {
|
||||||
|
"gpuType": "T4",
|
||||||
|
"provenance": []
|
||||||
|
},
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3 (ipykernel)",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.11.11"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 5
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user