Merge pull request #925 from hopeogbons/week6_exercise_hopeogbons

(Oct 2025 Bootcamp): Add week 6 exercise notebook for banking intent classification
2025-10-30 22:34:06 -04:00
parent 67c116fd45 a22a5cef2c
commit 875cbda5e0
4 changed files with 1194 additions and 0 deletions
--- a/week6/community-contributions/hopeogbons/banking_intents.py
+++ b/week6/community-contributions/hopeogbons/banking_intents.py
@@ -0,0 +1,148 @@
 """
 Banking77 Intent Mapping
 Maps label numbers (0-76) to intent names
 """
 INTENT_LABELS = [
    "activate_my_card",
    "age_limit",
    "apple_pay_or_google_pay",
    "atm_support",
    "automatic_top_up",
    "balance_not_updated_after_bank_transfer",
    "balance_not_updated_after_cheque_or_cash_deposit",
    "beneficiary_not_allowed",
    "cancel_transfer",
    "card_about_to_expire",
    "card_acceptance",
    "card_arrival",
    "card_delivery_estimate",
    "card_linking",
    "card_not_working",
    "card_payment_fee_charged",
    "card_payment_not_recognised",
    "card_payment_wrong_exchange_rate",
    "card_swallowed",
    "cash_withdrawal_charge",
    "cash_withdrawal_not_recognised",
    "change_pin",
    "compromised_card",
    "contactless_not_working",
    "country_support",
    "declined_card_payment",
    "declined_cash_withdrawal",
    "declined_transfer",
    "direct_debit_payment_not_recognised",
    "disposable_card_limits",
    "edit_personal_details",
    "exchange_charge",
    "exchange_rate",
    "exchange_via_app",
    "extra_charge_on_statement",
    "failed_transfer",
    "fiat_currency_support",
    "get_disposable_virtual_card",
    "get_physical_card",
    "getting_spare_card",
    "getting_virtual_card",
    "lost_or_stolen_card",
    "lost_or_stolen_phone",
    "order_physical_card",
    "passcode_forgotten",
    "pending_card_payment",
    "pending_cash_withdrawal",
    "pending_top_up",
    "pending_transfer",
    "pin_blocked",
    "receiving_money",
    "Refund_not_showing_up",
    "request_refund",
    "reverted_card_payment?",
    "supported_cards_and_currencies",
    "terminate_account",
    "top_up_by_bank_transfer_charge",
    "top_up_by_card_charge",
    "top_up_by_cash_or_cheque",
    "top_up_failed",
    "top_up_limits",
    "top_up_reverted",
    "topping_up_by_card",
    "transaction_charged_twice",
    "transfer_fee_charged",
    "transfer_into_account",
    "transfer_not_received_by_recipient",
    "transfer_timing",
    "unable_to_verify_identity",
    "verify_my_identity",
    "verify_source_of_funds",
    "verify_top_up",
    "virtual_card_not_working",
    "visa_or_mastercard",
    "why_verify_identity",
    "wrong_amount_of_cash_received",
    "wrong_exchange_rate_for_cash_withdrawal"
 ]
 def get_intent(label_number):
    """
    Get intent name from label number.
    Args:
        label_number (int): Label from 0 to 76
    Returns:
        str: Intent name
    Example:
        >>> get_intent(0)
        'activate_my_card'
        >>> get_intent(25)
        'declined_card_payment'
    """
    if 0 <= label_number <= 76:
        return INTENT_LABELS[label_number]
    else:
        raise ValueError(f"Label must be between 0 and 76, got {label_number}")
 def get_label(intent_name):
    """
    Get label number from intent name.
    Args:
        intent_name (str): Intent name
    Returns:
        int: Label number (0-76)
    Example:
        >>> get_label('activate_my_card')
        0
        >>> get_label('declined_card_payment')
        25
    """
    try:
        return INTENT_LABELS.index(intent_name)
    except ValueError:
        raise ValueError(f"Intent '{intent_name}' not found in labels")
 # Quick access
 def show_all_intents():
    """Display all 77 intents with their labels"""
    for i, intent in enumerate(INTENT_LABELS):
        print(f"{i}\t{intent}")
 if __name__ == "__main__":
    # Test the functions
    print("Testing get_intent:")
    print(f"Label 0: {get_intent(0)}")
    print(f"Label 25: {get_intent(25)}")
    print(f"Label 76: {get_intent(76)}")
    print("\nTesting get_label:")
    print(f"'activate_my_card': {get_label('activate_my_card')}")
    print(f"'declined_card_payment': {get_label('declined_card_payment')}")
--- a/week6/community-contributions/hopeogbons/classifier_tester.py
+++ b/week6/community-contributions/hopeogbons/classifier_tester.py
@@ -0,0 +1,123 @@
 """
 Classification Tester for Banking Intent Model
 Evaluates model accuracy on intent classification
 """
 import matplotlib.pyplot as plt
 from collections import Counter
 from banking_intents import get_intent
 GREEN = "\033[92m"
 RED = "\033[91m"
 RESET = "\033[0m"
 class ClassifierTester:
    """Test framework for classification models"""
    def __init__(self, predictor, data, title=None, size=100):
        self.predictor = predictor
        self.data = data
        self.title = title or predictor.__name__.replace("_", " ").title()
        self.size = min(size, len(data))
        self.predictions = []
        self.actuals = []
        self.correct = 0
        self.incorrect = 0
    def run_datapoint(self, i):
        """Test a single example"""
        item = self.data[i]
        # Get prediction
        predicted_intent = self.predictor(item)
        actual_intent = get_intent(item['label'])
        # Check if correct
        is_correct = predicted_intent == actual_intent
        if is_correct:
            self.correct += 1
            color = GREEN
            status = "✓"
        else:
            self.incorrect += 1
            color = RED
            status = "✗"
        self.predictions.append(predicted_intent)
        self.actuals.append(actual_intent)
        # Print result
        query = item['text'][:60] + "..." if len(item['text']) > 60 else item['text']
        print(f"{color}{status} {i+1}: {query}")
        print(f"   Predicted: {predicted_intent} | Actual: {actual_intent}{RESET}")
    def chart(self):
        """Visualize top confusion pairs"""
        # Find misclassifications
        errors = {}
        for pred, actual in zip(self.predictions, self.actuals):
            if pred != actual:
                pair = f"{actual} → {pred}"
                errors[pair] = errors.get(pair, 0) + 1
        if not errors:
            print("\n🎉 Perfect accuracy - no confusion to plot!")
            return
        # Plot top 10 confusions
        top_errors = sorted(errors.items(), key=lambda x: x[1], reverse=True)[:10]
        if top_errors:
            labels = [pair for pair, _ in top_errors]
            counts = [count for _, count in top_errors]
            plt.figure(figsize=(12, 6))
            plt.barh(labels, counts, color='coral')
            plt.xlabel('Count')
            plt.title('Top 10 Confusion Pairs (Actual → Predicted)')
            plt.tight_layout()
            plt.show()
    def report(self):
        """Print final metrics and chart"""
        accuracy = (self.correct / self.size) * 100
        print("\n" + "="*70)
        print(f"MODEL: {self.title}")
        print(f"TESTED: {self.size} examples")
        print(f"CORRECT: {self.correct} ({accuracy:.1f}%)")
        print(f"INCORRECT: {self.incorrect}")
        print("="*70)
        # Show most common errors
        if self.incorrect > 0:
            print("\nMost Common Errors:")
            error_pairs = [(self.actuals[i], self.predictions[i]) 
                          for i in range(len(self.actuals)) 
                          if self.actuals[i] != self.predictions[i]]
            error_counts = Counter(error_pairs).most_common(5)
            for (actual, pred), count in error_counts:
                print(f"  {actual} → {pred}: {count} times")
        # Chart
        self.chart()
        return accuracy
    def run(self):
        """Run the complete evaluation"""
        print(f"Testing {self.title} on {self.size} examples...\n")
        for i in range(self.size):
            self.run_datapoint(i)
        return self.report()
    @classmethod
    def test(cls, function, data, size=100):
        """Convenience method to test a predictor function"""
        return cls(function, data, size=size).run()
--- a/week6/community-contributions/hopeogbons/data_cleaner.py
+++ b/week6/community-contributions/hopeogbons/data_cleaner.py
@@ -0,0 +1,68 @@
 """
 Data cleaning utilities for dataset preparation
 """
 from collections import defaultdict
 def clean_dataset(data, min_length=10, max_samples_per_intent=None):
    """
    Clean and prepare dataset for fine-tuning
    Args:
        data: HuggingFace dataset or list of examples
        min_length: Minimum text length to keep (default: 10)
        max_samples_per_intent: Max samples per intent for balancing (default: None = no limit)
    Returns:
        list: Cleaned examples
    Example:
        >>> cleaned = clean_dataset(dataset['train'], min_length=10, max_samples_per_intent=200)
        >>> print(f"Cleaned {len(cleaned)} examples")
    """
    cleaned = []
    for example in data:
        text = example['text'].strip()
        # Skip if too short
        if len(text) < min_length:
            continue
        # Normalize text - remove extra whitespace
        text = ' '.join(text.split())
        cleaned.append({
            'text': text,
            'label': example['label']
        })
    # Balance classes if max_samples_per_intent is specified
    if max_samples_per_intent:
        balanced = defaultdict(list)
        for item in cleaned:
            balanced[item['label']].append(item)
        cleaned = []
        for label, items in balanced.items():
            cleaned.extend(items[:max_samples_per_intent])
    return cleaned
 def analyze_distribution(data):
    """
    Analyze label distribution in dataset
    Args:
        data: List of examples with 'label' field
    Returns:
        dict: Label counts
    """
    from collections import Counter
    labels = [item['label'] for item in data]
    return Counter(labels)
--- a/week6/community-contributions/hopeogbons/week6
+++ b/week6/community-contributions/hopeogbons/week6