From 8368944a43a0ecda8a8d9ba35fa6c2bba9d5df44 Mon Sep 17 00:00:00 2001 From: Hope Ogbons Date: Fri, 31 Oct 2025 03:19:49 +0100 Subject: [PATCH 1/4] Add banking intents mapping module This commit introduces a new Python module, banking_intents.py, which maps intent labels (0-76) to their corresponding intent names for the Banking77 application. The module includes functions to retrieve intent names by label and vice versa, along with a utility to display all intents. This addition enhances the application's ability to handle various banking-related queries effectively. --- .../hopeogbons/banking_intents.py | 148 ++++++++++++++++++ 1 file changed, 148 insertions(+) create mode 100644 week6/community-contributions/hopeogbons/banking_intents.py diff --git a/week6/community-contributions/hopeogbons/banking_intents.py b/week6/community-contributions/hopeogbons/banking_intents.py new file mode 100644 index 0000000..495b497 --- /dev/null +++ b/week6/community-contributions/hopeogbons/banking_intents.py @@ -0,0 +1,148 @@ +""" +Banking77 Intent Mapping +Maps label numbers (0-76) to intent names +""" + +INTENT_LABELS = [ + "activate_my_card", + "age_limit", + "apple_pay_or_google_pay", + "atm_support", + "automatic_top_up", + "balance_not_updated_after_bank_transfer", + "balance_not_updated_after_cheque_or_cash_deposit", + "beneficiary_not_allowed", + "cancel_transfer", + "card_about_to_expire", + "card_acceptance", + "card_arrival", + "card_delivery_estimate", + "card_linking", + "card_not_working", + "card_payment_fee_charged", + "card_payment_not_recognised", + "card_payment_wrong_exchange_rate", + "card_swallowed", + "cash_withdrawal_charge", + "cash_withdrawal_not_recognised", + "change_pin", + "compromised_card", + "contactless_not_working", + "country_support", + "declined_card_payment", + "declined_cash_withdrawal", + "declined_transfer", + "direct_debit_payment_not_recognised", + "disposable_card_limits", + "edit_personal_details", + "exchange_charge", + "exchange_rate", + "exchange_via_app", + "extra_charge_on_statement", + "failed_transfer", + "fiat_currency_support", + "get_disposable_virtual_card", + "get_physical_card", + "getting_spare_card", + "getting_virtual_card", + "lost_or_stolen_card", + "lost_or_stolen_phone", + "order_physical_card", + "passcode_forgotten", + "pending_card_payment", + "pending_cash_withdrawal", + "pending_top_up", + "pending_transfer", + "pin_blocked", + "receiving_money", + "Refund_not_showing_up", + "request_refund", + "reverted_card_payment?", + "supported_cards_and_currencies", + "terminate_account", + "top_up_by_bank_transfer_charge", + "top_up_by_card_charge", + "top_up_by_cash_or_cheque", + "top_up_failed", + "top_up_limits", + "top_up_reverted", + "topping_up_by_card", + "transaction_charged_twice", + "transfer_fee_charged", + "transfer_into_account", + "transfer_not_received_by_recipient", + "transfer_timing", + "unable_to_verify_identity", + "verify_my_identity", + "verify_source_of_funds", + "verify_top_up", + "virtual_card_not_working", + "visa_or_mastercard", + "why_verify_identity", + "wrong_amount_of_cash_received", + "wrong_exchange_rate_for_cash_withdrawal" +] + + +def get_intent(label_number): + """ + Get intent name from label number. + + Args: + label_number (int): Label from 0 to 76 + + Returns: + str: Intent name + + Example: + >>> get_intent(0) + 'activate_my_card' + >>> get_intent(25) + 'declined_card_payment' + """ + if 0 <= label_number <= 76: + return INTENT_LABELS[label_number] + else: + raise ValueError(f"Label must be between 0 and 76, got {label_number}") + + +def get_label(intent_name): + """ + Get label number from intent name. + + Args: + intent_name (str): Intent name + + Returns: + int: Label number (0-76) + + Example: + >>> get_label('activate_my_card') + 0 + >>> get_label('declined_card_payment') + 25 + """ + try: + return INTENT_LABELS.index(intent_name) + except ValueError: + raise ValueError(f"Intent '{intent_name}' not found in labels") + + +# Quick access +def show_all_intents(): + """Display all 77 intents with their labels""" + for i, intent in enumerate(INTENT_LABELS): + print(f"{i}\t{intent}") + + +if __name__ == "__main__": + # Test the functions + print("Testing get_intent:") + print(f"Label 0: {get_intent(0)}") + print(f"Label 25: {get_intent(25)}") + print(f"Label 76: {get_intent(76)}") + + print("\nTesting get_label:") + print(f"'activate_my_card': {get_label('activate_my_card')}") + print(f"'declined_card_payment': {get_label('declined_card_payment')}") + From 3414454f4331f6fa9a4fda808ea4920450c05d1c Mon Sep 17 00:00:00 2001 From: Hope Ogbons Date: Fri, 31 Oct 2025 03:19:59 +0100 Subject: [PATCH 2/4] Add classifier testing framework for Banking Intent Model This commit introduces a new Python module, classifier_tester.py, which provides a testing framework for evaluating the accuracy of classification models on intent classification tasks. The module includes methods for running tests on individual data points, reporting metrics, and visualizing confusion pairs, enhancing the overall testing capabilities for the Banking77 application. --- .../hopeogbons/classifier_tester.py | 123 ++++++++++++++++++ 1 file changed, 123 insertions(+) create mode 100644 week6/community-contributions/hopeogbons/classifier_tester.py diff --git a/week6/community-contributions/hopeogbons/classifier_tester.py b/week6/community-contributions/hopeogbons/classifier_tester.py new file mode 100644 index 0000000..c9662f7 --- /dev/null +++ b/week6/community-contributions/hopeogbons/classifier_tester.py @@ -0,0 +1,123 @@ +""" +Classification Tester for Banking Intent Model +Evaluates model accuracy on intent classification +""" + +import matplotlib.pyplot as plt +from collections import Counter +from banking_intents import get_intent + +GREEN = "\033[92m" +RED = "\033[91m" +RESET = "\033[0m" + + +class ClassifierTester: + """Test framework for classification models""" + + def __init__(self, predictor, data, title=None, size=100): + self.predictor = predictor + self.data = data + self.title = title or predictor.__name__.replace("_", " ").title() + self.size = min(size, len(data)) + self.predictions = [] + self.actuals = [] + self.correct = 0 + self.incorrect = 0 + + def run_datapoint(self, i): + """Test a single example""" + item = self.data[i] + + # Get prediction + predicted_intent = self.predictor(item) + actual_intent = get_intent(item['label']) + + # Check if correct + is_correct = predicted_intent == actual_intent + + if is_correct: + self.correct += 1 + color = GREEN + status = "✓" + else: + self.incorrect += 1 + color = RED + status = "✗" + + self.predictions.append(predicted_intent) + self.actuals.append(actual_intent) + + # Print result + query = item['text'][:60] + "..." if len(item['text']) > 60 else item['text'] + print(f"{color}{status} {i+1}: {query}") + print(f" Predicted: {predicted_intent} | Actual: {actual_intent}{RESET}") + + def chart(self): + """Visualize top confusion pairs""" + # Find misclassifications + errors = {} + for pred, actual in zip(self.predictions, self.actuals): + if pred != actual: + pair = f"{actual} → {pred}" + errors[pair] = errors.get(pair, 0) + 1 + + if not errors: + print("\n🎉 Perfect accuracy - no confusion to plot!") + return + + # Plot top 10 confusions + top_errors = sorted(errors.items(), key=lambda x: x[1], reverse=True)[:10] + + if top_errors: + labels = [pair for pair, _ in top_errors] + counts = [count for _, count in top_errors] + + plt.figure(figsize=(12, 6)) + plt.barh(labels, counts, color='coral') + plt.xlabel('Count') + plt.title('Top 10 Confusion Pairs (Actual → Predicted)') + plt.tight_layout() + plt.show() + + def report(self): + """Print final metrics and chart""" + accuracy = (self.correct / self.size) * 100 + + print("\n" + "="*70) + print(f"MODEL: {self.title}") + print(f"TESTED: {self.size} examples") + print(f"CORRECT: {self.correct} ({accuracy:.1f}%)") + print(f"INCORRECT: {self.incorrect}") + print("="*70) + + # Show most common errors + if self.incorrect > 0: + print("\nMost Common Errors:") + error_pairs = [(self.actuals[i], self.predictions[i]) + for i in range(len(self.actuals)) + if self.actuals[i] != self.predictions[i]] + error_counts = Counter(error_pairs).most_common(5) + + for (actual, pred), count in error_counts: + print(f" {actual} → {pred}: {count} times") + + # Chart + self.chart() + + return accuracy + + def run(self): + """Run the complete evaluation""" + print(f"Testing {self.title} on {self.size} examples...\n") + + for i in range(self.size): + self.run_datapoint(i) + + return self.report() + + @classmethod + def test(cls, function, data, size=100): + """Convenience method to test a predictor function""" + return cls(function, data, size=size).run() + From 95a3766d85b167f717fc4eafaf994e4e638950b0 Mon Sep 17 00:00:00 2001 From: Hope Ogbons Date: Fri, 31 Oct 2025 03:20:08 +0100 Subject: [PATCH 3/4] Add data cleaning utilities for dataset preparation This commit introduces a new Python module, data_cleaner.py, which provides functions for cleaning and preparing datasets for fine-tuning. The module includes a method to clean datasets based on text length and balance class distributions, as well as a function to analyze label distributions. These utilities enhance the data preprocessing capabilities for the application. --- .../hopeogbons/data_cleaner.py | 68 +++++++++++++++++++ 1 file changed, 68 insertions(+) create mode 100644 week6/community-contributions/hopeogbons/data_cleaner.py diff --git a/week6/community-contributions/hopeogbons/data_cleaner.py b/week6/community-contributions/hopeogbons/data_cleaner.py new file mode 100644 index 0000000..31db9d6 --- /dev/null +++ b/week6/community-contributions/hopeogbons/data_cleaner.py @@ -0,0 +1,68 @@ +""" +Data cleaning utilities for dataset preparation +""" + +from collections import defaultdict + + +def clean_dataset(data, min_length=10, max_samples_per_intent=None): + """ + Clean and prepare dataset for fine-tuning + + Args: + data: HuggingFace dataset or list of examples + min_length: Minimum text length to keep (default: 10) + max_samples_per_intent: Max samples per intent for balancing (default: None = no limit) + + Returns: + list: Cleaned examples + + Example: + >>> cleaned = clean_dataset(dataset['train'], min_length=10, max_samples_per_intent=200) + >>> print(f"Cleaned {len(cleaned)} examples") + """ + cleaned = [] + + for example in data: + text = example['text'].strip() + + # Skip if too short + if len(text) < min_length: + continue + + # Normalize text - remove extra whitespace + text = ' '.join(text.split()) + + cleaned.append({ + 'text': text, + 'label': example['label'] + }) + + # Balance classes if max_samples_per_intent is specified + if max_samples_per_intent: + balanced = defaultdict(list) + + for item in cleaned: + balanced[item['label']].append(item) + + cleaned = [] + for label, items in balanced.items(): + cleaned.extend(items[:max_samples_per_intent]) + + return cleaned + + +def analyze_distribution(data): + """ + Analyze label distribution in dataset + + Args: + data: List of examples with 'label' field + + Returns: + dict: Label counts + """ + from collections import Counter + labels = [item['label'] for item in data] + return Counter(labels) + From a22a5cef2c4838e533a06db1976ceef64c1eec66 Mon Sep 17 00:00:00 2001 From: Hope Ogbons Date: Fri, 31 Oct 2025 03:24:09 +0100 Subject: [PATCH 4/4] Add week 6 exercise notebook for banking intent classification This commit introduces a new Jupyter notebook, 'week6 EXERCISE.ipynb', which outlines the process for fine-tuning a model to classify banking customer queries. The notebook includes steps for data preparation, model training, and evaluation, utilizing the Banking77 dataset and OpenAI's API for fine-tuning. This addition enhances the project's capabilities in handling banking-related queries effectively. --- .../hopeogbons/week6 EXERCISE.ipynb | 855 ++++++++++++++++++ 1 file changed, 855 insertions(+) create mode 100644 week6/community-contributions/hopeogbons/week6 EXERCISE.ipynb diff --git a/week6/community-contributions/hopeogbons/week6 EXERCISE.ipynb b/week6/community-contributions/hopeogbons/week6 EXERCISE.ipynb new file mode 100644 index 0000000..bcbd0f4 --- /dev/null +++ b/week6/community-contributions/hopeogbons/week6 EXERCISE.ipynb @@ -0,0 +1,855 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "776935d0", + "metadata": {}, + "outputs": [], + "source": [ + "# Import required libraries for data handling, API connections, and model training\n", + "import os\n", + "import re\n", + "import math\n", + "import json\n", + "import random\n", + "from dotenv import load_dotenv\n", + "from huggingface_hub import login\n", + "import matplotlib.pyplot as plt\n", + "from datasets import load_dataset\n", + "import numpy as np\n", + "import pickle\n", + "from collections import Counter\n", + "from openai import OpenAI" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "04ef96aa", + "metadata": {}, + "outputs": [], + "source": [ + "# Load API keys from .env file\n", + "load_dotenv(override=True)\n", + "os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY', '####-####-####-####')\n", + "os.environ['HF_TOKEN'] = os.getenv('HF_TOKEN', '####-####-####-####')" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "8458f9e7", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.\n" + ] + } + ], + "source": [ + "# Initialize OpenAI client and login to HuggingFace\n", + "openai = OpenAI()\n", + "\n", + "hf_token = os.environ['HF_TOKEN']\n", + "login(hf_token, add_to_git_credential=True)" + ] + }, + { + "cell_type": "markdown", + "id": "0263f64b", + "metadata": {}, + "source": [ + "# Step 1\n", + "\n", + "### Prepare our data for fine-tuning in JSONL (JSON Lines) format and upload to OpenAI" + ] + }, + { + "cell_type": "markdown", + "id": "0302c73d", + "metadata": {}, + "source": [ + "### Load and Cache Dataset\n", + "Download banking77 dataset or load from cache (for slow internet)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "a85d7fbd", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Loading from cached pickle files...\n", + "✓ Loaded 10003 train and 3080 test samples from cache\n" + ] + } + ], + "source": [ + "from data_cleaner import clean_dataset\n", + "\n", + "# Check if pickle files exist, otherwise download\n", + "if os.path.exists('train.pkl') and os.path.exists('test.pkl'):\n", + " print(\"Loading from cached pickle files...\")\n", + " with open('train.pkl', 'rb') as f:\n", + " train = pickle.load(f)\n", + " with open('test.pkl', 'rb') as f:\n", + " test = pickle.load(f)\n", + " print(f\"✓ Loaded {len(train)} train and {len(test)} test samples from cache\")\n", + "else:\n", + " print(\"✓ Downloading dataset from HuggingFace...\")\n", + " dataset = load_dataset(\"PolyAI/banking77\")\n", + " \n", + " # Clean the data\n", + " print(\"Cleaning dataset...\")\n", + " train = clean_dataset(dataset['train'], min_length=10, max_samples_per_intent=200)\n", + " test = clean_dataset(dataset['test'], min_length=10)\n", + " \n", + " # Save for next time\n", + " with open('train.pkl', 'wb') as f:\n", + " pickle.dump(train, f)\n", + " with open('test.pkl', 'wb') as f:\n", + " pickle.dump(test, f)\n", + " print(f\"✓ Cleaned and saved {len(train)} train and {len(test)} test samples\")" + ] + }, + { + "cell_type": "markdown", + "id": "df2d9c9d", + "metadata": {}, + "source": [ + "# Step 2\n", + "\n", + "### Create fine-tuning job on OpenAI and monitor training progress" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "9a608e40", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "✓ Created 200 train and 50 validation examples\n" + ] + } + ], + "source": [ + "# Convert to list format for easier handling\n", + "train_list = [{'text': train[i]['text'], 'label': train[i]['label']} for i in range(len(train))]\n", + "\n", + "# Create fine-tuning subsets\n", + "fine_tune_train = train_list[:200] #800\n", + "fine_tune_validation = train_list[200:250] #4,000\n", + "\n", + "print(f\"✓ Created {len(fine_tune_train)} train and {len(fine_tune_validation)} validation examples\")" + ] + }, + { + "cell_type": "markdown", + "id": "e878a4f0", + "metadata": {}, + "source": [ + "### Format Messages for OpenAI\n", + "Create training and inference message formats\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "e305e49e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[{'role': 'system',\n", + " 'content': 'You classify banking customer queries into intents. Reply only with the intent name, no explanation'},\n", + " {'role': 'user', 'content': 'I am still waiting on my card?'},\n", + " {'role': 'assistant', 'content': 'card_arrival'}]" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from banking_intents import get_intent\n", + "\n", + "def messages_for_training(item):\n", + " \"\"\"Create messages for fine-tuning - includes the correct answer\"\"\"\n", + " system_message = \"You classify banking customer queries into intents. Reply only with the intent name, no explanation\"\n", + " return [\n", + " {\"role\": \"system\", \"content\": system_message},\n", + " {\"role\": \"user\", \"content\": item['text']},\n", + " {\"role\": \"assistant\", \"content\": get_intent(item['label'])}\n", + " ]\n", + "\n", + "def messages_for_inference(item):\n", + " \"\"\"Create messages for prediction - NO answer (model must predict)\"\"\"\n", + " system_message = \"You classify banking customer queries into intents. Reply only with the intent name, no explanation\"\n", + " return [\n", + " {\"role\": \"system\", \"content\": system_message},\n", + " {\"role\": \"user\", \"content\": item['text']}\n", + " ]\n", + "\n", + "# Test training format\n", + "messages_for_training(train[0])" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "c3a27241", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\"messages\": [{\"role\": \"system\", \"content\": \"You classify banking customer queries into intents. Reply only with the intent name, no explanation\"}, {\"role\": \"user\", \"content\": \"I am still waiting on my card?\"}, {\"role\": \"assistant\", \"content\": \"card_arrival\"}]}\n", + "{\"messages\": [{\"role\": \"system\", \"content\": \"You classify banking customer queries into intents. Reply only with the intent name, no explanation\"}, {\"role\": \"user\", \"content\": \"What can I do if my card still hasn't arrived after 2 weeks?\"}, {\"role\": \"assistant\", \"content\": \"card_arrival\"}]}\n", + "{\"messages\": [{\"role\": \"system\", \"content\": \"You classify banking customer queries into intents. Reply only with the intent name, no explanation\"}, {\"role\": \"user\", \"content\": \"I have been waiting over a week. Is the card still coming?\"}, {\"role\": \"assistant\", \"content\": \"card_arrival\"}]}\n" + ] + } + ], + "source": [ + "def make_jsonl(data, start=0, end=None):\n", + " \"\"\"Convert data to JSONL format for training\"\"\"\n", + " result = \"\"\n", + " end = end or len(data)\n", + " \n", + " for i in range(start, end):\n", + " item = data[i]\n", + " messages = messages_for_training(item) # Use training format\n", + " messages_str = json.dumps(messages)\n", + " result += '{\"messages\": ' + messages_str +'}\\n'\n", + " \n", + " return result.strip()\n", + "\n", + "print(make_jsonl(train, start=0, end=3))" + ] + }, + { + "cell_type": "markdown", + "id": "bb0e75d6", + "metadata": {}, + "source": [ + "### Convert to JSONL and Upload\n", + "Prepare data in OpenAI format and upload to their servers\n" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "dd4affd3", + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "# Write JSONL string to file\n", + "def write_jsonl(data, filename, start=0, end=None):\n", + " with open(filename, \"w\") as f:\n", + " jsonl = make_jsonl(data, start, end)\n", + " f.write(jsonl)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "8c5bf74c", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0. I am still waiting on my card? → Intent: 11\n", + "1. What can I do if my card still hasn't arrived after 2 weeks? → Intent: 11\n", + "2. I have been waiting over a week. Is the card still coming? → Intent: 11\n" + ] + } + ], + "source": [ + "# Verify data loaded correctly - show first 3 examples\n", + "for i in range(3):\n", + " print(f\"{i}. {train[i]['text']} → Intent: {train[i]['label']}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "8b3bc0a9", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "✓ Uploaded train: file-U84ceTSvNn833d6aPpWX4i\n", + "✓ Uploaded validation: file-ARVTnFJnHn2HpE9UAr9mr5\n" + ] + } + ], + "source": [ + "def prepare_and_upload(data, filename):\n", + " \"\"\"Write JSONL and upload to OpenAI\"\"\"\n", + " write_jsonl(data, filename)\n", + " with open(filename, \"rb\") as f:\n", + " return openai.files.create(file=f, purpose=\"fine-tune\")\n", + "\n", + "# Use it\n", + "train_file = prepare_and_upload(fine_tune_train, \"fine_tune_train.jsonl\")\n", + "validation_file = prepare_and_upload(fine_tune_validation, \"fine_tune_validation.jsonl\")\n", + "\n", + "print(f\"✓ Uploaded train: {train_file.id}\")\n", + "print(f\"✓ Uploaded validation: {validation_file.id}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "f6147112", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "FineTuningJob(id='ftjob-FN3B5dQQOhuk4UVOZ5X4CqBU', created_at=1761873619, error=Error(code=None, message=None, param=None), fine_tuned_model=None, finished_at=None, hyperparameters=Hyperparameters(batch_size='auto', learning_rate_multiplier='auto', n_epochs=1), model='gpt-4o-mini-2024-07-18', object='fine_tuning.job', organization_id='org-OFfqVJ5fIDV1i5BqCwT86Px6', result_files=[], seed=42, status='validating_files', trained_tokens=None, training_file='file-U84ceTSvNn833d6aPpWX4i', validation_file='file-ARVTnFJnHn2HpE9UAr9mr5', estimated_finish=None, integrations=[], metadata=None, method=Method(type='supervised', dpo=None, reinforcement=None, supervised=SupervisedMethod(hyperparameters=SupervisedHyperparameters(batch_size='auto', learning_rate_multiplier='auto', n_epochs=1))), user_provided_suffix='banking_intent', usage_metrics=None, shared_with_openai=False, eval_id=None)" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Create fine-tuning job - training happens on OpenAI's servers\n", + "openai.fine_tuning.jobs.create(\n", + " training_file=train_file.id,\n", + " validation_file=validation_file.id,\n", + " model=\"gpt-4o-mini-2024-07-18\",\n", + " seed=42, # For reproducibility\n", + " hyperparameters={\"n_epochs\": 1}, # Training passes\n", + " suffix=\"banking_intent\" # Custom model name\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "dd2fe11f", + "metadata": {}, + "source": [ + "### Monitor Training Progress\n", + "Check job status and view training events\n" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "bb98e266", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Status: ftjob-FN3B5dQQOhuk4UVOZ5X4CqBU\n", + "Status: succeeded\n" + ] + } + ], + "source": [ + "# List most recent fine-tuning job to check status\n", + "job = openai.fine_tuning.jobs.list(limit=1).data[0]\n", + "print(f\"Status: {job.id}\")\n", + "print(f\"Status: {job.status}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "a503b4f3", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "FineTuningJob(id='ftjob-FN3B5dQQOhuk4UVOZ5X4CqBU', created_at=1761873619, error=Error(code=None, message=None, param=None), fine_tuned_model='ft:gpt-4o-mini-2024-07-18:hope-ogbons:banking-intent:CWYGwKT5', finished_at=1761874273, hyperparameters=Hyperparameters(batch_size=1, learning_rate_multiplier=1.8, n_epochs=1), model='gpt-4o-mini-2024-07-18', object='fine_tuning.job', organization_id='org-OFfqVJ5fIDV1i5BqCwT86Px6', result_files=['file-N5eUNWYhwaxJt2KSYEjpBU'], seed=42, status='succeeded', trained_tokens=9105, training_file='file-U84ceTSvNn833d6aPpWX4i', validation_file='file-ARVTnFJnHn2HpE9UAr9mr5', estimated_finish=None, integrations=[], metadata=None, method=Method(type='supervised', dpo=None, reinforcement=None, supervised=SupervisedMethod(hyperparameters=SupervisedHyperparameters(batch_size=1, learning_rate_multiplier=1.8, n_epochs=1))), user_provided_suffix='banking_intent', usage_metrics=None, shared_with_openai=False, eval_id=None)" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Get detailed information about the job\n", + "openai.fine_tuning.jobs.retrieve(job.id)" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "d7008bdd", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[FineTuningJobEvent(id='ftevent-nsKcQzrX5kAlH71815ndMGr4', created_at=1761875045, level='info', message='The job has successfully completed', object='fine_tuning.job.event', data={}, type='message'),\n", + " FineTuningJobEvent(id='ftevent-BWM8ClTEEWO3rVf1bn8WdszY', created_at=1761875039, level='info', message='Usage policy evaluations completed, model is now enabled for sampling', object='fine_tuning.job.event', data={}, type='message'),\n", + " FineTuningJobEvent(id='ftevent-yyxA2AU1GvpP5Ii3WbxlDktc', created_at=1761875039, level='info', message='Moderation checks for snapshot ft:gpt-4o-mini-2024-07-18:hope-ogbons:banking-intent:CWYGwKT5 passed.', object='fine_tuning.job.event', data={'blocked': False, 'results': [{'flagged': False, 'category': 'harassment/threatening', 'enforcement': 'blocking'}, {'flagged': False, 'category': 'sexual', 'enforcement': 'blocking'}, {'flagged': False, 'category': 'sexual/minors', 'enforcement': 'blocking'}, {'flagged': False, 'category': 'propaganda', 'enforcement': 'blocking'}, {'flagged': False, 'category': 'hate', 'enforcement': 'blocking'}, {'flagged': False, 'category': 'hate/threatening', 'enforcement': 'blocking'}, {'flagged': False, 'category': 'illicit', 'enforcement': 'blocking'}, {'flagged': False, 'category': 'violence', 'enforcement': 'blocking'}, {'flagged': False, 'category': 'advice', 'enforcement': 'blocking'}, {'flagged': False, 'category': 'self-harm/intent', 'enforcement': 'blocking'}, {'flagged': False, 'category': 'self-harm/instructions', 'enforcement': 'non_blocking'}, {'flagged': False, 'category': 'sensitive', 'enforcement': 'blocking'}, {'flagged': False, 'category': 'highly-sensitive', 'enforcement': 'blocking'}, {'flagged': False, 'category': 'biological threats', 'enforcement': 'blocking'}, {'flagged': False, 'category': 'cyber security threats', 'enforcement': 'blocking'}], 'finetuned_model_checkpoint_id': 'ft:gpt-4o-mini-2024-07-18:hope-ogbons:banking-intent:CWYGwKT5'}, type='moderation_checks'),\n", + " FineTuningJobEvent(id='ftevent-yojUh3akovLiyB1gX7NN4UCn', created_at=1761874276, level='info', message='Evaluating model against our usage policies', object='fine_tuning.job.event', data={}, type='message'),\n", + " FineTuningJobEvent(id='ftevent-tGdquF1ECX2kirD3YMFZ68H2', created_at=1761874276, level='info', message='New fine-tuned model created', object='fine_tuning.job.event', data={}, type='message'),\n", + " FineTuningJobEvent(id='ftevent-sMiUeWRPavauz8yO7SJfWA3U', created_at=1761874248, level='info', message='Step 200/200: training loss=0.00, validation loss=0.00, full validation loss=0.00', object='fine_tuning.job.event', data={'step': 200, 'train_loss': 0.00013084411330055445, 'valid_loss': 0.0001373291015625, 'total_steps': 200, 'full_valid_loss': 0.000696044921875, 'train_mean_token_accuracy': 1.0, 'valid_mean_token_accuracy': 1.0, 'full_valid_mean_token_accuracy': 1.0}, type='metrics'),\n", + " FineTuningJobEvent(id='ftevent-C6XLihn75vu22sJBkSbBDZ6L', created_at=1761874239, level='info', message='Step 199/200: training loss=0.00', object='fine_tuning.job.event', data={'step': 199, 'train_loss': 8.850097947288305e-05, 'total_steps': 200, 'train_mean_token_accuracy': 1.0}, type='metrics'),\n", + " FineTuningJobEvent(id='ftevent-LUGsXqTJjgAIw8fQQLjaTewj', created_at=1761874239, level='info', message='Step 198/200: training loss=0.00', object='fine_tuning.job.event', data={'step': 198, 'train_loss': 0.00010757446580100805, 'total_steps': 200, 'train_mean_token_accuracy': 1.0}, type='metrics'),\n", + " FineTuningJobEvent(id='ftevent-4peUk2wOGyP7Pe0COgCEMtI1', created_at=1761874239, level='info', message='Step 197/200: training loss=0.00', object='fine_tuning.job.event', data={'step': 197, 'train_loss': 0.00012130737013649195, 'total_steps': 200, 'train_mean_token_accuracy': 1.0}, type='metrics'),\n", + " FineTuningJobEvent(id='ftevent-SyIiUrcSzUzoYBZMAMfgH3zh', created_at=1761874234, level='info', message='Step 196/200: training loss=0.00', object='fine_tuning.job.event', data={'step': 196, 'train_loss': 9.689330909168348e-05, 'total_steps': 200, 'train_mean_token_accuracy': 1.0}, type='metrics')]" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# View training events log (last 10 events)\n", + "openai.fine_tuning.jobs.list_events(fine_tuning_job_id=job.id, limit=10).data" + ] + }, + { + "cell_type": "markdown", + "id": "cb837277", + "metadata": {}, + "source": [ + "# Step 3\n", + "\n", + "### Use the fine-tuned model to classify banking queries" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "d710b977", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'ft:gpt-4o-mini-2024-07-18:hope-ogbons:banking-intent:CWYGwKT5'" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Get the fine-tuned model ID (only works after training succeeds)\n", + "fine_tuned_model_name = openai.fine_tuning.jobs.retrieve(job.id).fine_tuned_model\n", + "fine_tuned_model_name" + ] + }, + { + "cell_type": "markdown", + "id": "e9518959", + "metadata": {}, + "source": [ + "### Use Fine-Tuned Model\n", + "Classify banking queries with the trained model\n" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "5afef0bb", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'card_arrival'" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "def gpt_fine_tuned(item):\n", + " \"\"\"Classify banking query using fine-tuned model\"\"\"\n", + " response = openai.chat.completions.create(\n", + " model=fine_tuned_model_name,\n", + " messages=messages_for_inference(item), # Use inference format (no label)\n", + " seed=42,\n", + " max_tokens=20\n", + " )\n", + " intent = response.choices[0].message.content.strip()\n", + " return intent\n", + "\n", + "gpt_fine_tuned(train[0])" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "5198c5c5", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Model name: ft:gpt-4o-mini-2024-07-18:hope-ogbons:banking-intent:CWYGwKT5\n", + "\n", + "Job ID: ftjob-FN3B5dQQOhuk4UVOZ5X4CqBU\n", + "Status: succeeded\n", + "Fine-tuned model: ft:gpt-4o-mini-2024-07-18:hope-ogbons:banking-intent:CWYGwKT5\n" + ] + } + ], + "source": [ + "# Verify training completed successfully\n", + "print(\"Model name:\", fine_tuned_model_name)\n", + "print()\n", + "\n", + "print(\"Job ID:\", job.id)\n", + "job_status = openai.fine_tuning.jobs.retrieve(job.id)\n", + "print(\"Status:\", job_status.status)\n", + "print(\"Fine-tuned model:\", job_status.fine_tuned_model)" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "id": "33683c39", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Test example: {'text': 'How do I locate my card?', 'label': 11}\n", + "Predicted intent: card_arrival\n" + ] + } + ], + "source": [ + "# Test the fine-tuned model on a single example\n", + "print(\"Test example:\", test[0])\n", + "print(\"Predicted intent:\", gpt_fine_tuned(test[0]))" + ] + }, + { + "cell_type": "markdown", + "id": "216f3164", + "metadata": {}, + "source": [ + "### Evaluate Model Performance\n", + "Test on 100 examples and calculate accuracy\n" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "id": "b500717d", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "✓ Converted 3080 test examples to list format\n" + ] + } + ], + "source": [ + "# Convert test to list format\n", + "test_list = [{'text': test[i]['text'], 'label': test[i]['label']} for i in range(len(test))]\n", + "\n", + "print(f\"✓ Converted {len(test_list)} test examples to list format\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "id": "f0787061", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Testing Gpt Fine Tuned on 100 examples...\n", + "\n", + "\u001b[92m✓ 1: How do I locate my card?\n", + " Predicted: card_arrival | Actual: card_arrival\u001b[0m\n", + "\u001b[92m✓ 2: I still have not received my new card, I ordered over a week...\n", + " Predicted: card_arrival | Actual: card_arrival\u001b[0m\n", + "\u001b[92m✓ 3: I ordered a card but it has not arrived. Help please!\n", + " Predicted: card_arrival | Actual: card_arrival\u001b[0m\n", + "\u001b[92m✓ 4: Is there a way to know when my card will arrive?\n", + " Predicted: card_arrival | Actual: card_arrival\u001b[0m\n", + "\u001b[92m✓ 5: My card has not arrived yet.\n", + " Predicted: card_arrival | Actual: card_arrival\u001b[0m\n", + "\u001b[92m✓ 6: When will I get my card?\n", + " Predicted: card_arrival | Actual: card_arrival\u001b[0m\n", + "\u001b[92m✓ 7: Do you know if there is a tracking number for the new card y...\n", + " Predicted: card_arrival | Actual: card_arrival\u001b[0m\n", + "\u001b[92m✓ 8: i have not received my card\n", + " Predicted: card_arrival | Actual: card_arrival\u001b[0m\n", + "\u001b[92m✓ 9: still waiting on that card\n", + " Predicted: card_arrival | Actual: card_arrival\u001b[0m\n", + "\u001b[92m✓ 10: Is it normal to have to wait over a week for my new card?\n", + " Predicted: card_arrival | Actual: card_arrival\u001b[0m\n", + "\u001b[92m✓ 11: How do I track my card?\n", + " Predicted: card_arrival | Actual: card_arrival\u001b[0m\n", + "\u001b[92m✓ 12: How long does a card delivery take?\n", + " Predicted: card_arrival | Actual: card_arrival\u001b[0m\n", + "\u001b[92m✓ 13: I still don't have my card after 2 weeks. What should I do?\n", + " Predicted: card_arrival | Actual: card_arrival\u001b[0m\n", + "\u001b[92m✓ 14: still waiting on my new card\n", + " Predicted: card_arrival | Actual: card_arrival\u001b[0m\n", + "\u001b[92m✓ 15: I am still waiting for my card after 1 week. Is this ok?\n", + " Predicted: card_arrival | Actual: card_arrival\u001b[0m\n", + "\u001b[92m✓ 16: I have been waiting longer than expected for my bank card, c...\n", + " Predicted: card_arrival | Actual: card_arrival\u001b[0m\n", + "\u001b[92m✓ 17: I've been waiting longer than expected for my card.\n", + " Predicted: card_arrival | Actual: card_arrival\u001b[0m\n", + "\u001b[92m✓ 18: Why hasn't my card been delivered?\n", + " Predicted: card_arrival | Actual: card_arrival\u001b[0m\n", + "\u001b[92m✓ 19: Where is my new card? I have been waiting a week!\n", + " Predicted: card_arrival | Actual: card_arrival\u001b[0m\n", + "\u001b[92m✓ 20: My card still hasn't arrived after 2 weeks. Is it lost?\n", + " Predicted: card_arrival | Actual: card_arrival\u001b[0m\n", + "\u001b[92m✓ 21: I did not get my card yet, is it lost?\n", + " Predicted: card_arrival | Actual: card_arrival\u001b[0m\n", + "\u001b[92m✓ 22: Status of the card I ordered.\n", + " Predicted: card_arrival | Actual: card_arrival\u001b[0m\n", + "\u001b[92m✓ 23: How long should my new card take to arrive?\n", + " Predicted: card_arrival | Actual: card_arrival\u001b[0m\n", + "\u001b[92m✓ 24: I ordered my card 2 weeks ago and it still isn't here? What ...\n", + " Predicted: card_arrival | Actual: card_arrival\u001b[0m\n", + "\u001b[92m✓ 25: My card has not arrived yet, where is it?\n", + " Predicted: card_arrival | Actual: card_arrival\u001b[0m\n", + "\u001b[92m✓ 26: What is the tracking number for my card that was mailed?\n", + " Predicted: card_arrival | Actual: card_arrival\u001b[0m\n", + "\u001b[92m✓ 27: I think something went wrong with my card delivery as I have...\n", + " Predicted: card_arrival | Actual: card_arrival\u001b[0m\n", + "\u001b[92m✓ 28: I'm still waiting for delivery of my new card, why is it tak...\n", + " Predicted: card_arrival | Actual: card_arrival\u001b[0m\n", + "\u001b[92m✓ 29: I ordered a card a week ago, and it's still not here. What d...\n", + " Predicted: card_arrival | Actual: card_arrival\u001b[0m\n", + "\u001b[92m✓ 30: i want to track the card you sent\n", + " Predicted: card_arrival | Actual: card_arrival\u001b[0m\n", + "\u001b[92m✓ 31: My card hasn't arrived yet.\n", + " Predicted: card_arrival | Actual: card_arrival\u001b[0m\n", + "\u001b[92m✓ 32: I was expecting my new card and am wondering why I haven't r...\n", + " Predicted: card_arrival | Actual: card_arrival\u001b[0m\n", + "\u001b[92m✓ 33: How do I know when my card will arrive?\n", + " Predicted: card_arrival | Actual: card_arrival\u001b[0m\n", + "\u001b[92m✓ 34: I'm still waiting on my card to be delivered.\n", + " Predicted: card_arrival | Actual: card_arrival\u001b[0m\n", + "\u001b[92m✓ 35: Does the card you sent have a way to track to it?\n", + " Predicted: card_arrival | Actual: card_arrival\u001b[0m\n", + "\u001b[92m✓ 36: I ordered a card and I still haven't received it. It's been ...\n", + " Predicted: card_arrival | Actual: card_arrival\u001b[0m\n", + "\u001b[92m✓ 37: I'm starting to think my card is lost because it still hasn'...\n", + " Predicted: card_arrival | Actual: card_arrival\u001b[0m\n", + "\u001b[92m✓ 38: Is there tracking info available?\n", + " Predicted: card_arrival | Actual: card_arrival\u001b[0m\n", + "\u001b[92m✓ 39: What is the tracking number for the card you sent?\n", + " Predicted: card_arrival | Actual: card_arrival\u001b[0m\n", + "\u001b[92m✓ 40: Where is the tracking number for the card you sent me?\n", + " Predicted: card_arrival | Actual: card_arrival\u001b[0m\n", + "\u001b[92m✓ 41: Why won't my card show up on the app?\n", + " Predicted: card_linking | Actual: card_linking\u001b[0m\n", + "\u001b[92m✓ 42: I would like to reactivate my card.\n", + " Predicted: card_linking | Actual: card_linking\u001b[0m\n", + "\u001b[92m✓ 43: Where do I link the new card?\n", + " Predicted: card_linking | Actual: card_linking\u001b[0m\n", + "\u001b[92m✓ 44: I have received my card, can you help me put it in the app?\n", + " Predicted: card_linking | Actual: card_linking\u001b[0m\n", + "\u001b[92m✓ 45: How do I link a card that I already have?\n", + " Predicted: card_linking | Actual: card_linking\u001b[0m\n", + "\u001b[92m✓ 46: I received my new card, but I don't see it in the app anywhe...\n", + " Predicted: card_linking | Actual: card_linking\u001b[0m\n", + "\u001b[92m✓ 47: How do I re-add a card to the app?\n", + " Predicted: card_linking | Actual: card_linking\u001b[0m\n", + "\u001b[92m✓ 48: How do I add the card to my account?\n", + " Predicted: card_linking | Actual: card_linking\u001b[0m\n", + "\u001b[92m✓ 49: Can I put my old card back into the system? I just found it/\n", + " Predicted: card_linking | Actual: card_linking\u001b[0m\n", + "\u001b[92m✓ 50: I have one of your cards already, how do I link it?\n", + " Predicted: card_linking | Actual: card_linking\u001b[0m\n", + "\u001b[92m✓ 51: How do I link a new card?\n", + " Predicted: card_linking | Actual: card_linking\u001b[0m\n", + "\u001b[92m✓ 52: Can I link an existing card?\n", + " Predicted: card_linking | Actual: card_linking\u001b[0m\n", + "\u001b[92m✓ 53: How do I link one your card if I have one already?\n", + " Predicted: card_linking | Actual: card_linking\u001b[0m\n", + "\u001b[92m✓ 54: How do I add a card to the app?\n", + " Predicted: card_linking | Actual: card_linking\u001b[0m\n", + "\u001b[92m✓ 55: Can I link my new card?\n", + " Predicted: card_linking | Actual: card_linking\u001b[0m\n", + "\u001b[92m✓ 56: Hello, I found the card I misplaced and I need to reactive i...\n", + " Predicted: card_linking | Actual: card_linking\u001b[0m\n", + "\u001b[92m✓ 57: Can you tell me how to link one of your cards that I already...\n", + " Predicted: card_linking | Actual: card_linking\u001b[0m\n", + "\u001b[92m✓ 58: How do I view the card I received in the app?\n", + " Predicted: card_linking | Actual: card_linking\u001b[0m\n", + "\u001b[92m✓ 59: Where on the website do I go to link my card?\n", + " Predicted: card_linking | Actual: card_linking\u001b[0m\n", + "\u001b[92m✓ 60: I found my card, can I add it to the app?\n", + " Predicted: card_linking | Actual: card_linking\u001b[0m\n", + "\u001b[92m✓ 61: How do I link to my credit card with you?\n", + " Predicted: card_linking | Actual: card_linking\u001b[0m\n", + "\u001b[92m✓ 62: I've received my card so now I need to know how to sync it t...\n", + " Predicted: card_linking | Actual: card_linking\u001b[0m\n", + "\u001b[92m✓ 63: I found my card, I would like to reactivate it.\n", + " Predicted: card_linking | Actual: card_linking\u001b[0m\n", + "\u001b[92m✓ 64: How do I link this new card?\n", + " Predicted: card_linking | Actual: card_linking\u001b[0m\n", + "\u001b[92m✓ 65: Can I reactivate my lost card that I found this morning in m...\n", + " Predicted: card_linking | Actual: card_linking\u001b[0m\n", + "\u001b[92m✓ 66: how do I link an already existing card?\n", + " Predicted: card_linking | Actual: card_linking\u001b[0m\n", + "\u001b[92m✓ 67: The app doesn't show the card I received.\n", + " Predicted: card_linking | Actual: card_linking\u001b[0m\n", + "\u001b[92m✓ 68: how do I link a card I already have?\n", + " Predicted: card_linking | Actual: card_linking\u001b[0m\n", + "\u001b[92m✓ 69: I would like to link my card. How do I do it?\n", + " Predicted: card_linking | Actual: card_linking\u001b[0m\n", + "\u001b[92m✓ 70: Can you please show me where I can find the location to link...\n", + " Predicted: card_linking | Actual: card_linking\u001b[0m\n", + "\u001b[92m✓ 71: Where do I go if I want to link my new card?\n", + " Predicted: card_linking | Actual: card_linking\u001b[0m\n", + "\u001b[92m✓ 72: Is there a way to make my old card usable with the app?\n", + " Predicted: card_linking | Actual: card_linking\u001b[0m\n", + "\u001b[92m✓ 73: Can I reactivate a card I thought I lost?\n", + " Predicted: card_linking | Actual: card_linking\u001b[0m\n", + "\u001b[92m✓ 74: How do I link my card\n", + " Predicted: card_linking | Actual: card_linking\u001b[0m\n", + "\u001b[92m✓ 75: Where do I need to go in the app to enter my card info?\n", + " Predicted: card_linking | Actual: card_linking\u001b[0m\n", + "\u001b[92m✓ 76: I have found my lost or stolen card. Is there a way I can li...\n", + " Predicted: card_linking | Actual: card_linking\u001b[0m\n", + "\u001b[92m✓ 77: Could you help me reactivate my card? It was previously lost...\n", + " Predicted: card_linking | Actual: card_linking\u001b[0m\n", + "\u001b[92m✓ 78: I already have one of your cards, how do I link them?\n", + " Predicted: card_linking | Actual: card_linking\u001b[0m\n", + "\u001b[92m✓ 79: How do I link my replacement card?\n", + " Predicted: card_linking | Actual: card_linking\u001b[0m\n", + "\u001b[92m✓ 80: Can I link another card to my account?\n", + " Predicted: card_linking | Actual: card_linking\u001b[0m\n", + "\u001b[91m✗ 81: I need to know your exchange rates.\n", + " Predicted: exchange_rates | Actual: exchange_rate\u001b[0m\n", + "\u001b[91m✗ 82: What exchange rates do you offer?\n", + " Predicted: currency_exchange | Actual: exchange_rate\u001b[0m\n", + "\u001b[91m✗ 83: How did you come up with your exchange rates?\n", + " Predicted: currency_conversion | Actual: exchange_rate\u001b[0m\n", + "\u001b[91m✗ 84: Where do you guys acquire your exchange rate?\n", + " Predicted: foreign_exchange_rates | Actual: exchange_rate\u001b[0m\n", + "\u001b[91m✗ 85: How do I find the exchange rate?\n", + " Predicted: exchange_rate_arrival | Actual: exchange_rate\u001b[0m\n", + "\u001b[91m✗ 86: What are your international exchange rates?\n", + " Predicted: currency_exchange | Actual: exchange_rate\u001b[0m\n", + "\u001b[91m✗ 87: How often do your exchange rates change\n", + " Predicted: exchange_rate_change | Actual: exchange_rate\u001b[0m\n", + "\u001b[91m✗ 88: Please advise what is the exchange rate\n", + " Predicted: currency_exchange | Actual: exchange_rate\u001b[0m\n", + "\u001b[91m✗ 89: How are exchange rates calculated?\n", + " Predicted: currency_conversion | Actual: exchange_rate\u001b[0m\n", + "\u001b[91m✗ 90: what are exchange rates based on\n", + " Predicted: currency_conversion | Actual: exchange_rate\u001b[0m\n", + "\u001b[91m✗ 91: what are exchange rates\n", + " Predicted: exchange_rate_arrival | Actual: exchange_rate\u001b[0m\n", + "\u001b[91m✗ 92: What are the most current exchange rates?\n", + " Predicted: exchange_rates | Actual: exchange_rate\u001b[0m\n", + "\u001b[91m✗ 93: Can you explain your exchange rate policy to me?\n", + " Predicted: currency_conversion | Actual: exchange_rate\u001b[0m\n", + "\u001b[91m✗ 94: Is it a good time to exchange?\n", + " Predicted: currency_exchange | Actual: exchange_rate\u001b[0m\n", + "\u001b[91m✗ 95: What is the exchange rate like on this app?\n", + " Predicted: currency_conversion | Actual: exchange_rate\u001b[0m\n", + "\u001b[91m✗ 96: Do you have a list of exchange rates?\n", + " Predicted: foreign_exchange_rates | Actual: exchange_rate\u001b[0m\n", + "\u001b[91m✗ 97: Can you tell me where you get your exchange rates?\n", + " Predicted: exchange_rate_arrival | Actual: exchange_rate\u001b[0m\n", + "\u001b[91m✗ 98: Will I get a curreng foreign exchange rate?\n", + " Predicted: currency_exchange | Actual: exchange_rate\u001b[0m\n", + "\u001b[91m✗ 99: What currencies is an exchange rate calculated in?\n", + " Predicted: foreign_exchange_rates | Actual: exchange_rate\u001b[0m\n", + "\u001b[91m✗ 100: Where do you get your exchange rates from?\n", + " Predicted: foreign_exchange_rates | Actual: exchange_rate\u001b[0m\n", + "\n", + "======================================================================\n", + "MODEL: Gpt Fine Tuned\n", + "TESTED: 100 examples\n", + "CORRECT: 80 (80.0%)\n", + "INCORRECT: 20\n", + "======================================================================\n", + "\n", + "Most Common Errors:\n", + " exchange_rate → currency_exchange: 5 times\n", + " exchange_rate → currency_conversion: 5 times\n", + " exchange_rate → foreign_exchange_rates: 4 times\n", + " exchange_rate → exchange_rate_arrival: 3 times\n", + " exchange_rate → exchange_rates: 2 times\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Run comprehensive evaluation on 100 test examples\n", + "from classifier_tester import ClassifierTester\n", + "\n", + "accuracy = ClassifierTester.test(gpt_fine_tuned, test_list, size=100)\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}