LLM_Engineering_OLD/community-contributions/Reputation_Radar/services/llm.py

"""LLM sentiment analysis and summarization utilities."""

from __future__ import annotations

import json
import logging
from dataclasses import dataclass
from typing import Any, Dict, Iterable, List, Optional, Sequence

try:  # pragma: no cover - optional dependency
    from openai import OpenAI
except ModuleNotFoundError:  # pragma: no cover
    OpenAI = None  # type: ignore[assignment]

from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

from .utils import ServiceWarning, chunked

CLASSIFICATION_SYSTEM_PROMPT = "You are a precise brand-sentiment classifier. Output JSON only."
SUMMARY_SYSTEM_PROMPT = "You analyze brand chatter and produce concise, executive-ready summaries."


@dataclass
class SentimentResult:
    """Structured sentiment output."""

    label: str
    confidence: float


class LLMService:
    """Wrapper around OpenAI with VADER fallback."""

    def __init__(self, api_key: Optional[str], model: str = "gpt-4o-mini", batch_size: int = 20):
        self.batch_size = max(1, batch_size)
        self.model = model
        self.logger = logging.getLogger("services.llm")
        self._client: Optional[Any] = None
        self._analyzer = SentimentIntensityAnalyzer()
        if api_key and OpenAI is not None:
            try:
                self._client = OpenAI(api_key=api_key)
            except Exception as exc:  # noqa: BLE001
                self.logger.warning("Failed to initialize OpenAI client, using VADER fallback: %s", exc)
                self._client = None
        elif api_key and OpenAI is None:
            self.logger.warning("openai package not installed; falling back to VADER despite API key.")

    def available(self) -> bool:
        """Return whether OpenAI-backed features are available."""
        return self._client is not None

    def classify_sentiment_batch(self, texts: Sequence[str]) -> List[SentimentResult]:
        """Classify multiple texts, chunking if necessary."""
        if not texts:
            return []
        if not self.available():
            return [self._vader_sentiment(text) for text in texts]

        results: List[SentimentResult] = []
        for chunk in chunked(list(texts), self.batch_size):
            prompt_lines = ["Classify each item as \"positive\", \"neutral\", or \"negative\".", "Also output a confidence score between 0 and 1.", "Return an array of objects: [{\"label\": \"...\", \"confidence\": 0.0}].", "Items:"]
            prompt_lines.extend([f"{idx + 1}) {text}" for idx, text in enumerate(chunk)])
            prompt = "\n".join(prompt_lines)
            try:
                response = self._client.responses.create(  # type: ignore[union-attr]
                    model=self.model,
                    input=[
                        {"role": "system", "content": CLASSIFICATION_SYSTEM_PROMPT},
                        {"role": "user", "content": prompt},
                    ],
                    temperature=0,
                    max_output_tokens=500,
                )
                output_text = self._extract_text(response)
                parsed = json.loads(output_text)
                for item in parsed:
                    results.append(
                        SentimentResult(
                            label=item.get("label", "neutral"),
                            confidence=float(item.get("confidence", 0.5)),
                        )
                    )
            except Exception as exc:  # noqa: BLE001
                self.logger.warning("Classification fallback to VADER due to error: %s", exc)
                for text in chunk:
                    results.append(self._vader_sentiment(text))
        # Ensure the output length matches input
        if len(results) != len(texts):
            # align by padding with neutral
            results.extend([SentimentResult(label="neutral", confidence=0.33)] * (len(texts) - len(results)))
        return results

    def summarize_overall(self, findings: List[Dict[str, Any]]) -> Dict[str, Any]:
        """Create an executive summary using OpenAI."""
        if not self.available():
            raise ServiceWarning("OpenAI API key missing. Summary unavailable.")
        prompt_lines = [
            "Given these labeled items and their short rationales, write:",
            "- 5 bullet \"Highlights\"",
            "- 5 bullet \"Risks & Concerns\"",
            "- One-line \"Overall Tone\" (Positive/Neutral/Negative with brief justification)",
            "- 3 \"Recommended Actions\"",
            "Keep it under 180 words total. Be specific but neutral in tone.",
            "Items:",
        ]
        for idx, item in enumerate(findings, start=1):
            prompt_lines.append(
                f"{idx}) [{item.get('label','neutral').upper()}] {item.get('text','')}"
            )
        prompt = "\n".join(prompt_lines)
        try:
            response = self._client.responses.create(  # type: ignore[union-attr]
                model=self.model,
                input=[
                    {"role": "system", "content": SUMMARY_SYSTEM_PROMPT},
                    {"role": "user", "content": prompt},
                ],
                temperature=0.2,
                max_output_tokens=800,
            )
            output_text = self._extract_text(response)
            return {"raw": output_text}
        except Exception as exc:  # noqa: BLE001
            self.logger.error("Failed to generate summary: %s", exc)
            raise ServiceWarning("Unable to generate executive summary at this time.") from exc

    def _vader_sentiment(self, text: str) -> SentimentResult:
        scores = self._analyzer.polarity_scores(text)
        compound = scores["compound"]
        if compound >= 0.2:
            label = "positive"
        elif compound <= -0.2:
            label = "negative"
        else:
            label = "neutral"
        confidence = min(1.0, max(0.0, abs(compound)))
        return SentimentResult(label=label, confidence=confidence)

    def _extract_text(self, response: Any) -> str:
        """Support multiple OpenAI client response shapes."""
        if hasattr(response, "output") and response.output:
            content = response.output[0].content[0]
            return getattr(content, "text", str(content))
        if hasattr(response, "choices"):
            return response.choices[0].message.content  # type: ignore[return-value]
        raise ValueError("Unknown response structure from OpenAI client.")