Merge pull request #925 from hopeogbons/week6_exercise_hopeogbons
(Oct 2025 Bootcamp): Add week 6 exercise notebook for banking intent classification
This commit is contained in:
148
week6/community-contributions/hopeogbons/banking_intents.py
Normal file
148
week6/community-contributions/hopeogbons/banking_intents.py
Normal file
@@ -0,0 +1,148 @@
|
||||
"""
|
||||
Banking77 Intent Mapping
|
||||
Maps label numbers (0-76) to intent names
|
||||
"""
|
||||
|
||||
INTENT_LABELS = [
|
||||
"activate_my_card",
|
||||
"age_limit",
|
||||
"apple_pay_or_google_pay",
|
||||
"atm_support",
|
||||
"automatic_top_up",
|
||||
"balance_not_updated_after_bank_transfer",
|
||||
"balance_not_updated_after_cheque_or_cash_deposit",
|
||||
"beneficiary_not_allowed",
|
||||
"cancel_transfer",
|
||||
"card_about_to_expire",
|
||||
"card_acceptance",
|
||||
"card_arrival",
|
||||
"card_delivery_estimate",
|
||||
"card_linking",
|
||||
"card_not_working",
|
||||
"card_payment_fee_charged",
|
||||
"card_payment_not_recognised",
|
||||
"card_payment_wrong_exchange_rate",
|
||||
"card_swallowed",
|
||||
"cash_withdrawal_charge",
|
||||
"cash_withdrawal_not_recognised",
|
||||
"change_pin",
|
||||
"compromised_card",
|
||||
"contactless_not_working",
|
||||
"country_support",
|
||||
"declined_card_payment",
|
||||
"declined_cash_withdrawal",
|
||||
"declined_transfer",
|
||||
"direct_debit_payment_not_recognised",
|
||||
"disposable_card_limits",
|
||||
"edit_personal_details",
|
||||
"exchange_charge",
|
||||
"exchange_rate",
|
||||
"exchange_via_app",
|
||||
"extra_charge_on_statement",
|
||||
"failed_transfer",
|
||||
"fiat_currency_support",
|
||||
"get_disposable_virtual_card",
|
||||
"get_physical_card",
|
||||
"getting_spare_card",
|
||||
"getting_virtual_card",
|
||||
"lost_or_stolen_card",
|
||||
"lost_or_stolen_phone",
|
||||
"order_physical_card",
|
||||
"passcode_forgotten",
|
||||
"pending_card_payment",
|
||||
"pending_cash_withdrawal",
|
||||
"pending_top_up",
|
||||
"pending_transfer",
|
||||
"pin_blocked",
|
||||
"receiving_money",
|
||||
"Refund_not_showing_up",
|
||||
"request_refund",
|
||||
"reverted_card_payment?",
|
||||
"supported_cards_and_currencies",
|
||||
"terminate_account",
|
||||
"top_up_by_bank_transfer_charge",
|
||||
"top_up_by_card_charge",
|
||||
"top_up_by_cash_or_cheque",
|
||||
"top_up_failed",
|
||||
"top_up_limits",
|
||||
"top_up_reverted",
|
||||
"topping_up_by_card",
|
||||
"transaction_charged_twice",
|
||||
"transfer_fee_charged",
|
||||
"transfer_into_account",
|
||||
"transfer_not_received_by_recipient",
|
||||
"transfer_timing",
|
||||
"unable_to_verify_identity",
|
||||
"verify_my_identity",
|
||||
"verify_source_of_funds",
|
||||
"verify_top_up",
|
||||
"virtual_card_not_working",
|
||||
"visa_or_mastercard",
|
||||
"why_verify_identity",
|
||||
"wrong_amount_of_cash_received",
|
||||
"wrong_exchange_rate_for_cash_withdrawal"
|
||||
]
|
||||
|
||||
|
||||
def get_intent(label_number):
|
||||
"""
|
||||
Get intent name from label number.
|
||||
|
||||
Args:
|
||||
label_number (int): Label from 0 to 76
|
||||
|
||||
Returns:
|
||||
str: Intent name
|
||||
|
||||
Example:
|
||||
>>> get_intent(0)
|
||||
'activate_my_card'
|
||||
>>> get_intent(25)
|
||||
'declined_card_payment'
|
||||
"""
|
||||
if 0 <= label_number <= 76:
|
||||
return INTENT_LABELS[label_number]
|
||||
else:
|
||||
raise ValueError(f"Label must be between 0 and 76, got {label_number}")
|
||||
|
||||
|
||||
def get_label(intent_name):
|
||||
"""
|
||||
Get label number from intent name.
|
||||
|
||||
Args:
|
||||
intent_name (str): Intent name
|
||||
|
||||
Returns:
|
||||
int: Label number (0-76)
|
||||
|
||||
Example:
|
||||
>>> get_label('activate_my_card')
|
||||
0
|
||||
>>> get_label('declined_card_payment')
|
||||
25
|
||||
"""
|
||||
try:
|
||||
return INTENT_LABELS.index(intent_name)
|
||||
except ValueError:
|
||||
raise ValueError(f"Intent '{intent_name}' not found in labels")
|
||||
|
||||
|
||||
# Quick access
|
||||
def show_all_intents():
|
||||
"""Display all 77 intents with their labels"""
|
||||
for i, intent in enumerate(INTENT_LABELS):
|
||||
print(f"{i}\t{intent}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Test the functions
|
||||
print("Testing get_intent:")
|
||||
print(f"Label 0: {get_intent(0)}")
|
||||
print(f"Label 25: {get_intent(25)}")
|
||||
print(f"Label 76: {get_intent(76)}")
|
||||
|
||||
print("\nTesting get_label:")
|
||||
print(f"'activate_my_card': {get_label('activate_my_card')}")
|
||||
print(f"'declined_card_payment': {get_label('declined_card_payment')}")
|
||||
|
||||
123
week6/community-contributions/hopeogbons/classifier_tester.py
Normal file
123
week6/community-contributions/hopeogbons/classifier_tester.py
Normal file
@@ -0,0 +1,123 @@
|
||||
"""
|
||||
Classification Tester for Banking Intent Model
|
||||
Evaluates model accuracy on intent classification
|
||||
"""
|
||||
|
||||
import matplotlib.pyplot as plt
|
||||
from collections import Counter
|
||||
from banking_intents import get_intent
|
||||
|
||||
GREEN = "\033[92m"
|
||||
RED = "\033[91m"
|
||||
RESET = "\033[0m"
|
||||
|
||||
|
||||
class ClassifierTester:
|
||||
"""Test framework for classification models"""
|
||||
|
||||
def __init__(self, predictor, data, title=None, size=100):
|
||||
self.predictor = predictor
|
||||
self.data = data
|
||||
self.title = title or predictor.__name__.replace("_", " ").title()
|
||||
self.size = min(size, len(data))
|
||||
self.predictions = []
|
||||
self.actuals = []
|
||||
self.correct = 0
|
||||
self.incorrect = 0
|
||||
|
||||
def run_datapoint(self, i):
|
||||
"""Test a single example"""
|
||||
item = self.data[i]
|
||||
|
||||
# Get prediction
|
||||
predicted_intent = self.predictor(item)
|
||||
actual_intent = get_intent(item['label'])
|
||||
|
||||
# Check if correct
|
||||
is_correct = predicted_intent == actual_intent
|
||||
|
||||
if is_correct:
|
||||
self.correct += 1
|
||||
color = GREEN
|
||||
status = "✓"
|
||||
else:
|
||||
self.incorrect += 1
|
||||
color = RED
|
||||
status = "✗"
|
||||
|
||||
self.predictions.append(predicted_intent)
|
||||
self.actuals.append(actual_intent)
|
||||
|
||||
# Print result
|
||||
query = item['text'][:60] + "..." if len(item['text']) > 60 else item['text']
|
||||
print(f"{color}{status} {i+1}: {query}")
|
||||
print(f" Predicted: {predicted_intent} | Actual: {actual_intent}{RESET}")
|
||||
|
||||
def chart(self):
|
||||
"""Visualize top confusion pairs"""
|
||||
# Find misclassifications
|
||||
errors = {}
|
||||
for pred, actual in zip(self.predictions, self.actuals):
|
||||
if pred != actual:
|
||||
pair = f"{actual} → {pred}"
|
||||
errors[pair] = errors.get(pair, 0) + 1
|
||||
|
||||
if not errors:
|
||||
print("\n🎉 Perfect accuracy - no confusion to plot!")
|
||||
return
|
||||
|
||||
# Plot top 10 confusions
|
||||
top_errors = sorted(errors.items(), key=lambda x: x[1], reverse=True)[:10]
|
||||
|
||||
if top_errors:
|
||||
labels = [pair for pair, _ in top_errors]
|
||||
counts = [count for _, count in top_errors]
|
||||
|
||||
plt.figure(figsize=(12, 6))
|
||||
plt.barh(labels, counts, color='coral')
|
||||
plt.xlabel('Count')
|
||||
plt.title('Top 10 Confusion Pairs (Actual → Predicted)')
|
||||
plt.tight_layout()
|
||||
plt.show()
|
||||
|
||||
def report(self):
|
||||
"""Print final metrics and chart"""
|
||||
accuracy = (self.correct / self.size) * 100
|
||||
|
||||
print("\n" + "="*70)
|
||||
print(f"MODEL: {self.title}")
|
||||
print(f"TESTED: {self.size} examples")
|
||||
print(f"CORRECT: {self.correct} ({accuracy:.1f}%)")
|
||||
print(f"INCORRECT: {self.incorrect}")
|
||||
print("="*70)
|
||||
|
||||
# Show most common errors
|
||||
if self.incorrect > 0:
|
||||
print("\nMost Common Errors:")
|
||||
error_pairs = [(self.actuals[i], self.predictions[i])
|
||||
for i in range(len(self.actuals))
|
||||
if self.actuals[i] != self.predictions[i]]
|
||||
error_counts = Counter(error_pairs).most_common(5)
|
||||
|
||||
for (actual, pred), count in error_counts:
|
||||
print(f" {actual} → {pred}: {count} times")
|
||||
|
||||
# Chart
|
||||
self.chart()
|
||||
|
||||
return accuracy
|
||||
|
||||
def run(self):
|
||||
"""Run the complete evaluation"""
|
||||
print(f"Testing {self.title} on {self.size} examples...\n")
|
||||
|
||||
for i in range(self.size):
|
||||
self.run_datapoint(i)
|
||||
|
||||
return self.report()
|
||||
|
||||
@classmethod
|
||||
def test(cls, function, data, size=100):
|
||||
"""Convenience method to test a predictor function"""
|
||||
return cls(function, data, size=size).run()
|
||||
|
||||
68
week6/community-contributions/hopeogbons/data_cleaner.py
Normal file
68
week6/community-contributions/hopeogbons/data_cleaner.py
Normal file
@@ -0,0 +1,68 @@
|
||||
"""
|
||||
Data cleaning utilities for dataset preparation
|
||||
"""
|
||||
|
||||
from collections import defaultdict
|
||||
|
||||
|
||||
def clean_dataset(data, min_length=10, max_samples_per_intent=None):
|
||||
"""
|
||||
Clean and prepare dataset for fine-tuning
|
||||
|
||||
Args:
|
||||
data: HuggingFace dataset or list of examples
|
||||
min_length: Minimum text length to keep (default: 10)
|
||||
max_samples_per_intent: Max samples per intent for balancing (default: None = no limit)
|
||||
|
||||
Returns:
|
||||
list: Cleaned examples
|
||||
|
||||
Example:
|
||||
>>> cleaned = clean_dataset(dataset['train'], min_length=10, max_samples_per_intent=200)
|
||||
>>> print(f"Cleaned {len(cleaned)} examples")
|
||||
"""
|
||||
cleaned = []
|
||||
|
||||
for example in data:
|
||||
text = example['text'].strip()
|
||||
|
||||
# Skip if too short
|
||||
if len(text) < min_length:
|
||||
continue
|
||||
|
||||
# Normalize text - remove extra whitespace
|
||||
text = ' '.join(text.split())
|
||||
|
||||
cleaned.append({
|
||||
'text': text,
|
||||
'label': example['label']
|
||||
})
|
||||
|
||||
# Balance classes if max_samples_per_intent is specified
|
||||
if max_samples_per_intent:
|
||||
balanced = defaultdict(list)
|
||||
|
||||
for item in cleaned:
|
||||
balanced[item['label']].append(item)
|
||||
|
||||
cleaned = []
|
||||
for label, items in balanced.items():
|
||||
cleaned.extend(items[:max_samples_per_intent])
|
||||
|
||||
return cleaned
|
||||
|
||||
|
||||
def analyze_distribution(data):
|
||||
"""
|
||||
Analyze label distribution in dataset
|
||||
|
||||
Args:
|
||||
data: List of examples with 'label' field
|
||||
|
||||
Returns:
|
||||
dict: Label counts
|
||||
"""
|
||||
from collections import Counter
|
||||
labels = [item['label'] for item in data]
|
||||
return Counter(labels)
|
||||
|
||||
855
week6/community-contributions/hopeogbons/week6 EXERCISE.ipynb
Normal file
855
week6/community-contributions/hopeogbons/week6 EXERCISE.ipynb
Normal file
File diff suppressed because one or more lines are too long
Reference in New Issue
Block a user