Merge pull request #949 from Mogbeyi/emmy/week8-solution

LLM Debate Arena
2025-11-04 13:36:18 -05:00
parent d3470d06af b390f9aba2
commit ee5391fcbb
1 changed files with 453 additions and 0 deletions
--- a/week8/community_contributions/emmy/llm_battle.py
+++ b/week8/community_contributions/emmy/llm_battle.py
@@ -0,0 +1,453 @@
+from __future__ import annotations
+
+import json
+import os
+from dataclasses import dataclass, field
+from typing import Dict, Generator, List, Optional, Tuple
+
+import gradio as gr
+from dotenv import load_dotenv
+from openai import OpenAI
+
+load_dotenv()
+
+
+# ---------------------------------------------------------------------------
+# Configuration helpers
+# ---------------------------------------------------------------------------
+
+
+@dataclass
+class AgentConfig:
+    """Holds configuration required to talk to an LLM provider."""
+
+    name: str
+    model: str
+    api_key_env: str
+    base_url_env: Optional[str] = None
+    temperature: float = 0.7
+    supports_json: bool = True
+
+
+def load_client(config: AgentConfig) -> OpenAI:
+    """Create an OpenAI-compatible client for the given agent."""
+    api_key = os.getenv(config.api_key_env) or os.getenv("OPENAI_API_KEY")
+    if not api_key:
+        raise RuntimeError(
+            f"Missing API key for {config.name}. "
+            f"Set {config.api_key_env} or OPENAI_API_KEY."
+        )
+
+    base_url = (
+        os.getenv(config.base_url_env)
+        if config.base_url_env
+        else os.getenv("OPENAI_BASE_URL")
+    )
+
+    return OpenAI(api_key=api_key, base_url=base_url)
+
+
+def extract_text(response) -> str:
+    """Extract text content from an OpenAI-style response object or dict."""
+
+    choices = getattr(response, "choices", None)
+    if choices is None and isinstance(response, dict):
+        choices = response.get("choices")
+    if not choices:
+        raise RuntimeError(f"LLM response missing choices field: {response!r}")
+
+    choice = choices[0]
+    message = getattr(choice, "message", None)
+    if message is None and isinstance(choice, dict):
+        message = choice.get("message")
+
+    content = None
+    if message is not None:
+        content = getattr(message, "content", None)
+        if content is None and isinstance(message, dict):
+            content = message.get("content")
+
+    if isinstance(content, list):
+        parts: List[str] = []
+        for part in content:
+            if isinstance(part, dict):
+                if "text" in part:
+                    parts.append(str(part["text"]))
+                elif "output_text" in part:
+                    parts.append(str(part["output_text"]))
+                elif "type" in part and "content" in part:
+                    parts.append(str(part["content"]))
+            else:
+                parts.append(str(part))
+        content = "".join(parts)
+
+    if content is None:
+        text = getattr(choice, "text", None)
+        if text is None and isinstance(choice, dict):
+            text = choice.get("text")
+        if text:
+            content = text
+
+    if content is None:
+        raise RuntimeError(f"LLM response missing content/text: {response!r}")
+
+    return str(content).strip()
+
+
+# Default configuration leverages OpenAI unless overrides are provided.
+DEBATER_A_CONFIG = AgentConfig(
+    name="Debater A",
+    model=os.getenv("DEBATER_A_MODEL", "gpt-4o"),
+    api_key_env="OPENAI_API_KEY",
+    base_url_env="OPENAI_BASE_URL",
+    temperature=float(os.getenv("DEBATER_A_TEMPERATURE", 0.7)),
+)
+
+DEBATER_B_CONFIG = AgentConfig(
+    name="Debater B",
+    model=os.getenv("DEBATER_B_MODEL", "gemini-2.0-flash"),
+    api_key_env="GOOGLE_API_KEY",
+    base_url_env="GEMINI_BASE_URL",
+    temperature=float(os.getenv("DEBATER_B_TEMPERATURE", 0.7)),
+)
+
+JUDGE_CONFIG = AgentConfig(
+    name="Judge",
+    model=os.getenv("JUDGE_MODEL", "gpt-oss:20b-cloud"),
+    api_key_env="OLLAMA_API_KEY",
+    base_url_env="OLLAMA_BASE_URL",
+    temperature=float(os.getenv("JUDGE_TEMPERATURE", 0.2)),
+    supports_json=False,
+)
+
+REPORTER_CONFIG = AgentConfig(
+    name="Reporter",
+    model=os.getenv("REPORTER_MODEL", "MiniMax-M2"),
+    api_key_env="MINIMAX_API_KEY",
+    base_url_env="MINIMAX_BASE_URL",
+    temperature=float(os.getenv("REPORTER_TEMPERATURE", 0.4)),
+    supports_json=False,
+)
+
+THEME = gr.themes.Default(
+    primary_hue="blue",
+    secondary_hue="sky",
+    neutral_hue="gray",
+)
+
+CUSTOM_CSS = """
+body, .gradio-container {
+    background: radial-gradient(circle at top, #0f172a 0%, #020617 60%, #020617 100%);
+    color: #e2e8f0;
+}
+#live-debate-panel {
+    background: linear-gradient(135deg, rgba(30,64,175,0.95), rgba(29,78,216,0.85));
+    color: #f8fafc;
+    border-radius: 16px;
+    padding: 24px;
+    box-shadow: 0 20px 45px rgba(15,23,42,0.35);
+}
+#live-debate-panel h3 {
+    color: #bfdbfe;
+}
+.gr-button-primary {
+    background: linear-gradient(135deg, #1d4ed8, #2563eb) !important;
+    border: none !important;
+}
+.gr-button-primary:hover {
+    background: linear-gradient(135deg, #2563eb, #1d4ed8) !important;
+}
+"""
+
+# ---------------------------------------------------------------------------
+# Debate runtime classes
+# ---------------------------------------------------------------------------
+
+
+@dataclass
+class DebateState:
+    topic: str
+    stance_a: str
+    stance_b: str
+    transcript: List[Tuple[str, str]] = field(default_factory=list)
+
+
+class LLMAdapter:
+    """Thin wrapper around the OpenAI SDK to simplify prompting."""
+
+    def __init__(self, config: AgentConfig):
+        self.config = config
+        self.client = load_client(config)
+
+    def complete(
+        self,
+        prompt: str,
+        *,
+        system: Optional[str] = None,
+        max_tokens: int = 512,
+        json_mode: bool = False,
+    ) -> str:
+        messages = []
+        if system:
+            messages.append({"role": "system", "content": system})
+        messages.append({"role": "user", "content": prompt})
+
+        params = dict(
+            model=self.config.model,
+            messages=messages,
+            temperature=self.config.temperature,
+            max_tokens=max_tokens,
+        )
+        if json_mode and self.config.supports_json:
+            params["response_format"] = {"type": "json_object"}
+
+        response = self.client.chat.completions.create(**params)
+        return extract_text(response)
+
+
+class Debater:
+    def __init__(self, adapter: LLMAdapter, stance_label: str):
+        self.adapter = adapter
+        self.stance_label = stance_label
+
+    def argue(self, topic: str) -> str:
+        prompt = (
+            f"You are {self.adapter.config.name}, debating the topic:\n"
+            f"'{topic}'.\n\n"
+            f"Present a concise argument that {self.stance_label.lower()} "
+            f"the statement. Use at most 150 words. Provide clear reasoning "
+            f"and, if applicable, cite plausible evidence or examples."
+        )
+        return self.adapter.complete(prompt, max_tokens=300)
+
+
+class Judge:
+    RUBRIC = [
+        "Clarity of the argument",
+        "Use of evidence or examples",
+        "Logical coherence",
+        "Persuasiveness and impact",
+    ]
+
+    def __init__(self, adapter: LLMAdapter):
+        self.adapter = adapter
+
+    def evaluate(self, topic: str, argument_a: str, argument_b: str) -> Dict[str, object]:
+        rubric_text = "\n".join(f"- {item}" for item in self.RUBRIC)
+        prompt = (
+            "You are serving as an impartial debate judge.\n"
+            f"Topic: {topic}\n\n"
+            f"Argument from Debater A:\n{argument_a}\n\n"
+            f"Argument from Debater B:\n{argument_b}\n\n"
+            "Score each debater from 0-10 on the following criteria:\n"
+            f"{rubric_text}\n\n"
+            "Return a JSON object with this exact structure:\n"
+            '{\n'
+            '  "winner": "A" or "B" or "Tie",\n'
+            '  "reason": "brief justification",\n'
+            '  "scores": [\n'
+            '    {"criterion": "...", "debater_a": 0-10, "debater_b": 0-10, "notes": "optional"}\n'
+            "  ]\n"
+            "}\n"
+            "Ensure the JSON is valid."
+        )
+        raw = self.adapter.complete(prompt, max_tokens=400, json_mode=True)
+        try:
+            data = json.loads(raw)
+            if "scores" not in data:
+                raise ValueError("scores missing")
+            return data
+        except Exception:
+            # Fallback: wrap raw text if parsing fails.
+            return {"winner": "Unknown", "reason": raw, "scores": []}
+
+
+class Reporter:
+    def __init__(self, adapter: LLMAdapter):
+        self.adapter = adapter
+
+    def summarize(
+        self,
+        topic: str,
+        argument_a: str,
+        argument_b: str,
+        judge_result: Dict[str, object],
+    ) -> str:
+        prompt = (
+            f"Summarize a single-round debate on '{topic}'.\n\n"
+            f"Debater A argued:\n{argument_a}\n\n"
+            f"Debater B argued:\n{argument_b}\n\n"
+            f"Judge verdict: {json.dumps(judge_result, ensure_ascii=False)}\n\n"
+            "Provide a short journalistic summary (max 200 words) highlighting "
+            "each side's key points and the judge's decision. Use neutral tone."
+        )
+        response = self.adapter.client.chat.completions.create(
+            model=self.adapter.config.model,
+            messages=[
+                {"role": "system", "content": "You are an impartial debate reporter."},
+                {"role": "user", "content": prompt},
+            ],
+            temperature=self.adapter.config.temperature,
+            max_tokens=300,
+            **(
+                {"extra_body": {"reasoning_split": True}}
+                if getattr(self.adapter.client, "base_url", None)
+                and "minimax" in str(self.adapter.client.base_url).lower()
+                else {}
+            ),
+        )
+        return extract_text(response)
+
+
+# ---------------------------------------------------------------------------
+# Debate pipeline + UI
+# ---------------------------------------------------------------------------
+
+
+debater_a = Debater(LLMAdapter(DEBATER_A_CONFIG), stance_label="supports")
+debater_b = Debater(LLMAdapter(DEBATER_B_CONFIG), stance_label="opposes")
+judge = Judge(LLMAdapter(JUDGE_CONFIG))
+reporter = Reporter(LLMAdapter(REPORTER_CONFIG))
+
+
+def format_transcript(transcript: List[Tuple[str, str]]) -> str:
+    """Return markdown-formatted transcript."""
+    lines = []
+    for speaker, message in transcript:
+        lines.append(f"### {speaker}\n\n{message}\n")
+    return "\n".join(lines)
+
+
+def run_debate(
+    topic: str, stance_a: str, stance_b: str
+) -> Generator[Tuple[str, str, List[List[object]], str, str], None, None]:
+    """Generator for Gradio to stream debate progress."""
+    if not topic.strip():
+        warning = "⚠️ Please enter a debate topic to get started."
+        yield warning, "", [], "", ""
+        return
+
+    state = DebateState(topic=topic.strip(), stance_a=stance_a, stance_b=stance_b)
+
+    state.transcript.append(
+        ("Moderator", f"Welcome to the debate on **{state.topic}**!")
+    )
+    yield format_transcript(state.transcript), "Waiting for judge...", [], "", ""
+
+    argument_a = debater_a.argue(state.topic)
+    state.transcript.append((f"Debater A ({state.stance_a})", argument_a))
+    yield format_transcript(state.transcript), "Collecting arguments...", [], "", ""
+
+    argument_b = debater_b.argue(state.topic)
+    state.transcript.append((f"Debater B ({state.stance_b})", argument_b))
+    yield format_transcript(state.transcript), "Judge deliberating...", [], "", ""
+
+    judge_result = judge.evaluate(state.topic, argument_a, argument_b)
+    verdict_text = (
+        f"Winner: {judge_result.get('winner', 'Unknown')}\nReason: "
+        f"{judge_result.get('reason', 'No explanation provided.')}"
+    )
+    score_rows = [
+        [
+            entry.get("criterion", ""),
+            entry.get("debater_a", ""),
+            entry.get("debater_b", ""),
+            entry.get("notes", ""),
+        ]
+        for entry in judge_result.get("scores", [])
+    ]
+    judge_report_md = (
+        f"**Judge Verdict:** {judge_result.get('winner', 'Unknown')}\n\n"
+        f"{judge_result.get('reason', '')}"
+    )
+    yield (
+        format_transcript(state.transcript),
+        judge_report_md,
+        score_rows,
+        verdict_text,
+        format_transcript(state.transcript),
+    )
+
+    reporter_summary = reporter.summarize(
+        state.topic, argument_a, argument_b, judge_result
+    )
+
+    final_markdown = (
+        f"{judge_report_md}\n\n---\n\n"
+        f"**Reporter Summary**\n\n{reporter_summary}"
+    )
+    yield (
+        format_transcript(state.transcript),
+        final_markdown,
+        score_rows,
+        verdict_text,
+        format_transcript(state.transcript),
+    )
+
+
+# ---------------------------------------------------------------------------
+# Gradio Interface
+# ---------------------------------------------------------------------------
+
+
+with gr.Blocks(
+    title="LLM Debate Arena",
+    fill_width=True,
+    theme=THEME,
+    css=CUSTOM_CSS,
+) as demo:
+    gr.Markdown(
+        "# 🔁 LLM Debate Arena\n"
+        "Configure two debating agents, watch their arguments in real time, and "
+        "review the judge's verdict plus a reporter summary."
+    )
+
+    with gr.Row():
+        topic_input = gr.Textbox(
+            label="Debate Topic",
+            placeholder="e.g., Should autonomous delivery robots be allowed in city centers?",
+        )
+    with gr.Row():
+        stance_a_input = gr.Textbox(
+            label="Debater A Stance",
+            value="Supports the statement",
+        )
+        stance_b_input = gr.Textbox(
+            label="Debater B Stance",
+            value="Opposes the statement",
+        )
+
+    run_button = gr.Button("Start Debate", variant="primary")
+
+    with gr.Tab("Live Debate"):
+        transcript_md = gr.Markdown(
+            "### Waiting for the debate to start...",
+            elem_id="live-debate-panel",
+        )
+
+    with gr.Tab("Judge's Report"):
+        judge_md = gr.Markdown("Judge verdict will appear here.")
+        score_table = gr.Dataframe(
+            headers=["Criterion", "Debater A", "Debater B", "Notes"],
+            datatype=["str", "number", "number", "str"],
+            interactive=False,
+        )
+        verdict_box = gr.Textbox(
+            label="Verdict Detail",
+            interactive=False,
+        )
+        transcript_box = gr.Textbox(
+            label="Full Transcript (for copying)",
+            interactive=False,
+            lines=10,
+        )
+
+    run_button.click(
+        fn=run_debate,
+        inputs=[topic_input, stance_a_input, stance_b_input],
+        outputs=[transcript_md, judge_md, score_table, verdict_box, transcript_box],
+        queue=True,
+    )
+
+if __name__ == "__main__":
+    demo.queue(default_concurrency_limit=4).launch()