# -*- coding: utf-8 -*- """week8_exercie.ipynb Automatically generated by Colab. Original file is located at https://colab.research.google.com/drive/1jJ4pKoJat0ZnC99sTQjEEe9BMK--ArwQ """ !pip install -q pandas datasets matplotlib seaborn !pip install datasets==3.0.1 !pip install anthropic -q import pandas as pd import numpy as np import matplotlib.pyplot as plt import seaborn as sns from datasets import load_dataset from sklearn.model_selection import train_test_split from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.linear_model import LogisticRegression #chec perfomance from sklearn.metrics import classification_report, confusion_matrix from sklearn.utils import resample import os from anthropic import Anthropic import re pd.set_option("display.max_colwidth", 100) # # Initialize client using environment variable # client = Anthropic(api_key=os.getenv("ANTHROPIC_API_KEY")) # # Quick test # print("Anthropic client initialized " if client else " Anthropic not detected.") from google.colab import userdata userdata.get('ANTHROPIC_API_KEY') api_key = userdata.get('ANTHROPIC_API_KEY') os.environ["ANTHROPIC_API_KEY"] = api_key client = Anthropic(api_key=api_key) # List models models = client.models.list() print("Available Anthropic Models:\n") for m in models.data: print(f"- {m.id}") #dataset = load_dataset("McAuley-Lab/Amazon-Reviews-2023", "raw_review_Appliances", split="full[:5000]") # Loading a sample from the full reviews data dataset = load_dataset("McAuley-Lab/Amazon-Reviews-2023", "raw_review_Appliances", split="full[:5000]") # creating a DF df = pd.DataFrame(dataset) df = df[["title", "text", "rating"]].dropna().reset_index(drop=True) # Renaming th columns for clarity/easy ref df.rename(columns={"text": "review_body"}, inplace=True) print(f"Loaded {len(df)} rows with reviews and ratings") df.head() #inspect the data # Basic info print(df.info()) print(df.isnull().sum()) # Unique ratings dist print(df["rating"].value_counts().sort_index()) # Check Random reviews display(df.sample(5, random_state=42)) # Review length distribution df["review_length"] = df["review_body"].apply(lambda x: len(str(x).split())) #Summarize the review length print(df["review_length"].describe()) # pltt the rating distribution plt.figure(figsize=(6,4)) df["rating"].hist(bins=5, edgecolor='black') plt.title("Ratings Distribution (1–5 stars)") plt.xlabel("Rating") plt.ylabel("Number of Reviews") plt.show() # review length plt.figure(figsize=(6,4)) df["review_length"].hist(bins=30, color="lightblue", edgecolor='black') plt.title("Review Length Distribution") plt.xlabel("Number of Words in Review") plt.ylabel("Number of Reviews") plt.show() #cleaning def clean_text(text): text = text.lower() # remove URLs text = re.sub(r"http\S+|www\S+|https\S+", '', text) # remove punctuation/special chars text = re.sub(r"[^a-z0-9\s]", '', text) # normalize whitespace text = re.sub(r"\s+", ' ', text).strip() return text df["clean_review"] = df["review_body"].apply(clean_text) df.head(3) """'#sentiment analysis""" # Rating labellings def label_sentiment(rating): if rating <= 2: return "negative" elif rating == 3: return "neutral" else: return "positive" df["sentiment"] = df["rating"].apply(label_sentiment) df["sentiment"].value_counts() #train/tets split X_train, X_test, y_train, y_test = train_test_split( df["clean_review"], df["sentiment"], test_size=0.2, random_state=42, stratify=df["sentiment"] ) print(f"Training samples: {len(X_train)}, Test samples: {len(X_test)}") # Convert text to TF-IDF features vectorizer = TfidfVectorizer(max_features=2000, ngram_range=(1,2)) X_train_tfidf = vectorizer.fit_transform(X_train) X_test_tfidf = vectorizer.transform(X_test) print(f"TF-IDF matrix shape: {X_train_tfidf.shape}") #trian classfier # Train lightweight model clf = LogisticRegression(max_iter=200) clf.fit(X_train_tfidf, y_train) y_pred = clf.predict(X_test_tfidf) print("Classification Report:\n", classification_report(y_test, y_pred)) print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred)) sample_texts = [ "This blender broke after two days. Waste of money!", "Works exactly as described, very satisfied!", "It’s okay, does the job but nothing special." ] sample_features = vectorizer.transform(sample_texts) sample_preds = clf.predict(sample_features) for text, pred in zip(sample_texts, sample_preds): print(f"\nReview: {text}\nPredicted Sentiment: {pred}") """-->Positive reviews dominate, -->The model basically learned “always say positive”, -->Hence, 84% accuracy but 0 recall for negative/neutral — a fake good score. #Improving Model Balance & Realism """ # Separate by sentiment pos = df[df["sentiment"] == "positive"] neg = df[df["sentiment"] == "negative"] neu = df[df["sentiment"] == "neutral"] # Undersample positive to match roughly others pos_down = resample(pos, replace=False, n_samples=len(neg) + len(neu), random_state=42) # Combine df_balanced = pd.concat([pos_down, neg, neu]).sample(frac=1, random_state=42).reset_index(drop=True) print(df_balanced["sentiment"].value_counts()) #retain classfier X_train, X_test, y_train, y_test = train_test_split( df_balanced["clean_review"], df_balanced["sentiment"], test_size=0.2, random_state=42, stratify=df_balanced["sentiment"] ) vectorizer = TfidfVectorizer(max_features=2000, ngram_range=(1,2)) X_train_tfidf = vectorizer.fit_transform(X_train) X_test_tfidf = vectorizer.transform(X_test) clf = LogisticRegression(max_iter=300, class_weight="balanced") clf.fit(X_train_tfidf, y_train) print("Balanced model trained successfully ") #evaluate agan y_pred = clf.predict(X_test_tfidf) print("Classification Report:\n", classification_report(y_test, y_pred)) print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred)) """-->It now distinguishes between negative, neutral, and positive. -->Has a macro average F1 ≈ 0.57, which is fair for such a small, noisy sample. -->Shows that balancing worked — negatives are now detected correctly (recall 0.83). #Agents """ # Base class for all agents class BaseAgent: """A simple base agent with a name and a run() method.""" def __init__(self, name): self.name = name def run(self, *args, **kwargs): raise NotImplementedError("Subclasses must implement run() method.") def log(self, message): print(f"[{self.name}] {message}") #DataAgent for loading/cleaning class DataAgent(BaseAgent): """Handles dataset preparation tasks.""" def __init__(self, data): super().__init__("DataAgent") self.data = data def run(self): self.log("Preprocessing data...") df_clean = self.data.copy() df_clean["review_body"] = df_clean["review_body"].str.strip() df_clean.drop_duplicates(subset=["review_body"], inplace=True) self.log(f"Dataset ready with {len(df_clean)} reviews.") return df_clean #analisyis agent-->using the tianed sentiment model *TF-IDF +Logistic Regression) to classfy Reviews class AnalysisAgent(BaseAgent): """Analyzes text sentiment using a trained model.""" def __init__(self, vectorizer, model): super().__init__("AnalysisAgent") self.vectorizer = vectorizer self.model = model def run(self, reviews): self.log(f"Analyzing {len(reviews)} reviews...") X = self.vectorizer.transform(reviews) predictions = self.model.predict(X) return predictions #ReviewerAgent. Serves as the summary agnt using the anthropic API to give LLM review insights class ReviewerAgent(BaseAgent): """Summarizes overall sentiment trends using Anthropic Claude.""" def __init__(self): super().__init__("ReviewerAgent") # Retrieve your key once — it’s already stored in Colab userdata api_key = os.getenv("ANTHROPIC_API_KEY") if not api_key: from google.colab import userdata api_key = userdata.get("ANTHROPIC_API_KEY") if not api_key: raise ValueError("Anthropic API key not found. Make sure it's set in Colab userdata as 'ANTHROPIC_API_KEY'.") self.client = Anthropic(api_key=api_key) def run(self, summary_text): """Generate an insights summary using Claude.""" self.log("Generating summary using Claude...") prompt = f""" You are a product insights assistant. Based on the following summarized customer reviews, write a concise 3–4 sentence sentiment analysis report. Clearly describe the main themes and tone in user feedback on these home appliance products. Reviews Summary: {summary_text} """ response = self.client.messages.create( model="claude-3-5-haiku-20241022", # updated to latest available model max_tokens=250, temperature=0.6, messages=[{"role": "user", "content": prompt}] ) output = response.content[0].text.strip() self.log("Summary generated successfully ") return output # Instantiate agents data_agent = DataAgent(df) analysis_agent = AnalysisAgent(vectorizer, clf) reviewer_agent = ReviewerAgent() # Clean data df_ready = data_agent.run() # Classify sentiments df_ready["predicted_sentiment"] = analysis_agent.run(df_ready["review_body"]) # Prepare summary text by sentiment group summary_text = df_ready.groupby("predicted_sentiment")["review_body"].apply(lambda x: " ".join(x[:3])).to_string() # Generate AI summary using Anthropic insight_summary = reviewer_agent.run(summary_text) print(insight_summary) """#Evaluation & Visualization""" # Evaluation & Visualization === # Count predicted sentiments sentiment_counts = df_ready["predicted_sentiment"].value_counts() print(sentiment_counts) # Plot sentiment distribution plt.figure(figsize=(6,4)) sns.barplot(x=sentiment_counts.index, y=sentiment_counts.values, palette="viridis") plt.title("Sentiment Distribution of Reviews", fontsize=14) plt.xlabel("Sentiment") plt.ylabel("Number of Reviews") plt.show() # Compute average review length per sentiment df_ready["review_length"] = df_ready["review_body"].apply(lambda x: len(x.split())) avg_length = df_ready.groupby("predicted_sentiment")["review_length"].mean() print(avg_length) # Visualize it plt.figure(figsize=(6,4)) sns.barplot(x=avg_length.index, y=avg_length.values, palette="coolwarm") plt.title("Average Review Length per Sentiment") plt.xlabel("Sentiment") plt.ylabel("Average Word Count") plt.show()