diff --git a/week8/community_contributions/emmy/llm_battle.py b/week8/community_contributions/emmy/llm_battle.py new file mode 100644 index 0000000..b419d72 --- /dev/null +++ b/week8/community_contributions/emmy/llm_battle.py @@ -0,0 +1,453 @@ +from __future__ import annotations + +import json +import os +from dataclasses import dataclass, field +from typing import Dict, Generator, List, Optional, Tuple + +import gradio as gr +from dotenv import load_dotenv +from openai import OpenAI + +load_dotenv() + + +# --------------------------------------------------------------------------- +# Configuration helpers +# --------------------------------------------------------------------------- + + +@dataclass +class AgentConfig: + """Holds configuration required to talk to an LLM provider.""" + + name: str + model: str + api_key_env: str + base_url_env: Optional[str] = None + temperature: float = 0.7 + supports_json: bool = True + + +def load_client(config: AgentConfig) -> OpenAI: + """Create an OpenAI-compatible client for the given agent.""" + api_key = os.getenv(config.api_key_env) or os.getenv("OPENAI_API_KEY") + if not api_key: + raise RuntimeError( + f"Missing API key for {config.name}. " + f"Set {config.api_key_env} or OPENAI_API_KEY." + ) + + base_url = ( + os.getenv(config.base_url_env) + if config.base_url_env + else os.getenv("OPENAI_BASE_URL") + ) + + return OpenAI(api_key=api_key, base_url=base_url) + + +def extract_text(response) -> str: + """Extract text content from an OpenAI-style response object or dict.""" + + choices = getattr(response, "choices", None) + if choices is None and isinstance(response, dict): + choices = response.get("choices") + if not choices: + raise RuntimeError(f"LLM response missing choices field: {response!r}") + + choice = choices[0] + message = getattr(choice, "message", None) + if message is None and isinstance(choice, dict): + message = choice.get("message") + + content = None + if message is not None: + content = getattr(message, "content", None) + if content is None and isinstance(message, dict): + content = message.get("content") + + if isinstance(content, list): + parts: List[str] = [] + for part in content: + if isinstance(part, dict): + if "text" in part: + parts.append(str(part["text"])) + elif "output_text" in part: + parts.append(str(part["output_text"])) + elif "type" in part and "content" in part: + parts.append(str(part["content"])) + else: + parts.append(str(part)) + content = "".join(parts) + + if content is None: + text = getattr(choice, "text", None) + if text is None and isinstance(choice, dict): + text = choice.get("text") + if text: + content = text + + if content is None: + raise RuntimeError(f"LLM response missing content/text: {response!r}") + + return str(content).strip() + + +# Default configuration leverages OpenAI unless overrides are provided. +DEBATER_A_CONFIG = AgentConfig( + name="Debater A", + model=os.getenv("DEBATER_A_MODEL", "gpt-4o"), + api_key_env="OPENAI_API_KEY", + base_url_env="OPENAI_BASE_URL", + temperature=float(os.getenv("DEBATER_A_TEMPERATURE", 0.7)), +) + +DEBATER_B_CONFIG = AgentConfig( + name="Debater B", + model=os.getenv("DEBATER_B_MODEL", "gemini-2.0-flash"), + api_key_env="GOOGLE_API_KEY", + base_url_env="GEMINI_BASE_URL", + temperature=float(os.getenv("DEBATER_B_TEMPERATURE", 0.7)), +) + +JUDGE_CONFIG = AgentConfig( + name="Judge", + model=os.getenv("JUDGE_MODEL", "gpt-oss:20b-cloud"), + api_key_env="OLLAMA_API_KEY", + base_url_env="OLLAMA_BASE_URL", + temperature=float(os.getenv("JUDGE_TEMPERATURE", 0.2)), + supports_json=False, +) + +REPORTER_CONFIG = AgentConfig( + name="Reporter", + model=os.getenv("REPORTER_MODEL", "MiniMax-M2"), + api_key_env="MINIMAX_API_KEY", + base_url_env="MINIMAX_BASE_URL", + temperature=float(os.getenv("REPORTER_TEMPERATURE", 0.4)), + supports_json=False, +) + +THEME = gr.themes.Default( + primary_hue="blue", + secondary_hue="sky", + neutral_hue="gray", +) + +CUSTOM_CSS = """ +body, .gradio-container { + background: radial-gradient(circle at top, #0f172a 0%, #020617 60%, #020617 100%); + color: #e2e8f0; +} +#live-debate-panel { + background: linear-gradient(135deg, rgba(30,64,175,0.95), rgba(29,78,216,0.85)); + color: #f8fafc; + border-radius: 16px; + padding: 24px; + box-shadow: 0 20px 45px rgba(15,23,42,0.35); +} +#live-debate-panel h3 { + color: #bfdbfe; +} +.gr-button-primary { + background: linear-gradient(135deg, #1d4ed8, #2563eb) !important; + border: none !important; +} +.gr-button-primary:hover { + background: linear-gradient(135deg, #2563eb, #1d4ed8) !important; +} +""" + +# --------------------------------------------------------------------------- +# Debate runtime classes +# --------------------------------------------------------------------------- + + +@dataclass +class DebateState: + topic: str + stance_a: str + stance_b: str + transcript: List[Tuple[str, str]] = field(default_factory=list) + + +class LLMAdapter: + """Thin wrapper around the OpenAI SDK to simplify prompting.""" + + def __init__(self, config: AgentConfig): + self.config = config + self.client = load_client(config) + + def complete( + self, + prompt: str, + *, + system: Optional[str] = None, + max_tokens: int = 512, + json_mode: bool = False, + ) -> str: + messages = [] + if system: + messages.append({"role": "system", "content": system}) + messages.append({"role": "user", "content": prompt}) + + params = dict( + model=self.config.model, + messages=messages, + temperature=self.config.temperature, + max_tokens=max_tokens, + ) + if json_mode and self.config.supports_json: + params["response_format"] = {"type": "json_object"} + + response = self.client.chat.completions.create(**params) + return extract_text(response) + + +class Debater: + def __init__(self, adapter: LLMAdapter, stance_label: str): + self.adapter = adapter + self.stance_label = stance_label + + def argue(self, topic: str) -> str: + prompt = ( + f"You are {self.adapter.config.name}, debating the topic:\n" + f"'{topic}'.\n\n" + f"Present a concise argument that {self.stance_label.lower()} " + f"the statement. Use at most 150 words. Provide clear reasoning " + f"and, if applicable, cite plausible evidence or examples." + ) + return self.adapter.complete(prompt, max_tokens=300) + + +class Judge: + RUBRIC = [ + "Clarity of the argument", + "Use of evidence or examples", + "Logical coherence", + "Persuasiveness and impact", + ] + + def __init__(self, adapter: LLMAdapter): + self.adapter = adapter + + def evaluate(self, topic: str, argument_a: str, argument_b: str) -> Dict[str, object]: + rubric_text = "\n".join(f"- {item}" for item in self.RUBRIC) + prompt = ( + "You are serving as an impartial debate judge.\n" + f"Topic: {topic}\n\n" + f"Argument from Debater A:\n{argument_a}\n\n" + f"Argument from Debater B:\n{argument_b}\n\n" + "Score each debater from 0-10 on the following criteria:\n" + f"{rubric_text}\n\n" + "Return a JSON object with this exact structure:\n" + '{\n' + ' "winner": "A" or "B" or "Tie",\n' + ' "reason": "brief justification",\n' + ' "scores": [\n' + ' {"criterion": "...", "debater_a": 0-10, "debater_b": 0-10, "notes": "optional"}\n' + " ]\n" + "}\n" + "Ensure the JSON is valid." + ) + raw = self.adapter.complete(prompt, max_tokens=400, json_mode=True) + try: + data = json.loads(raw) + if "scores" not in data: + raise ValueError("scores missing") + return data + except Exception: + # Fallback: wrap raw text if parsing fails. + return {"winner": "Unknown", "reason": raw, "scores": []} + + +class Reporter: + def __init__(self, adapter: LLMAdapter): + self.adapter = adapter + + def summarize( + self, + topic: str, + argument_a: str, + argument_b: str, + judge_result: Dict[str, object], + ) -> str: + prompt = ( + f"Summarize a single-round debate on '{topic}'.\n\n" + f"Debater A argued:\n{argument_a}\n\n" + f"Debater B argued:\n{argument_b}\n\n" + f"Judge verdict: {json.dumps(judge_result, ensure_ascii=False)}\n\n" + "Provide a short journalistic summary (max 200 words) highlighting " + "each side's key points and the judge's decision. Use neutral tone." + ) + response = self.adapter.client.chat.completions.create( + model=self.adapter.config.model, + messages=[ + {"role": "system", "content": "You are an impartial debate reporter."}, + {"role": "user", "content": prompt}, + ], + temperature=self.adapter.config.temperature, + max_tokens=300, + **( + {"extra_body": {"reasoning_split": True}} + if getattr(self.adapter.client, "base_url", None) + and "minimax" in str(self.adapter.client.base_url).lower() + else {} + ), + ) + return extract_text(response) + + +# --------------------------------------------------------------------------- +# Debate pipeline + UI +# --------------------------------------------------------------------------- + + +debater_a = Debater(LLMAdapter(DEBATER_A_CONFIG), stance_label="supports") +debater_b = Debater(LLMAdapter(DEBATER_B_CONFIG), stance_label="opposes") +judge = Judge(LLMAdapter(JUDGE_CONFIG)) +reporter = Reporter(LLMAdapter(REPORTER_CONFIG)) + + +def format_transcript(transcript: List[Tuple[str, str]]) -> str: + """Return markdown-formatted transcript.""" + lines = [] + for speaker, message in transcript: + lines.append(f"### {speaker}\n\n{message}\n") + return "\n".join(lines) + + +def run_debate( + topic: str, stance_a: str, stance_b: str +) -> Generator[Tuple[str, str, List[List[object]], str, str], None, None]: + """Generator for Gradio to stream debate progress.""" + if not topic.strip(): + warning = "⚠️ Please enter a debate topic to get started." + yield warning, "", [], "", "" + return + + state = DebateState(topic=topic.strip(), stance_a=stance_a, stance_b=stance_b) + + state.transcript.append( + ("Moderator", f"Welcome to the debate on **{state.topic}**!") + ) + yield format_transcript(state.transcript), "Waiting for judge...", [], "", "" + + argument_a = debater_a.argue(state.topic) + state.transcript.append((f"Debater A ({state.stance_a})", argument_a)) + yield format_transcript(state.transcript), "Collecting arguments...", [], "", "" + + argument_b = debater_b.argue(state.topic) + state.transcript.append((f"Debater B ({state.stance_b})", argument_b)) + yield format_transcript(state.transcript), "Judge deliberating...", [], "", "" + + judge_result = judge.evaluate(state.topic, argument_a, argument_b) + verdict_text = ( + f"Winner: {judge_result.get('winner', 'Unknown')}\nReason: " + f"{judge_result.get('reason', 'No explanation provided.')}" + ) + score_rows = [ + [ + entry.get("criterion", ""), + entry.get("debater_a", ""), + entry.get("debater_b", ""), + entry.get("notes", ""), + ] + for entry in judge_result.get("scores", []) + ] + judge_report_md = ( + f"**Judge Verdict:** {judge_result.get('winner', 'Unknown')}\n\n" + f"{judge_result.get('reason', '')}" + ) + yield ( + format_transcript(state.transcript), + judge_report_md, + score_rows, + verdict_text, + format_transcript(state.transcript), + ) + + reporter_summary = reporter.summarize( + state.topic, argument_a, argument_b, judge_result + ) + + final_markdown = ( + f"{judge_report_md}\n\n---\n\n" + f"**Reporter Summary**\n\n{reporter_summary}" + ) + yield ( + format_transcript(state.transcript), + final_markdown, + score_rows, + verdict_text, + format_transcript(state.transcript), + ) + + +# --------------------------------------------------------------------------- +# Gradio Interface +# --------------------------------------------------------------------------- + + +with gr.Blocks( + title="LLM Debate Arena", + fill_width=True, + theme=THEME, + css=CUSTOM_CSS, +) as demo: + gr.Markdown( + "# 🔁 LLM Debate Arena\n" + "Configure two debating agents, watch their arguments in real time, and " + "review the judge's verdict plus a reporter summary." + ) + + with gr.Row(): + topic_input = gr.Textbox( + label="Debate Topic", + placeholder="e.g., Should autonomous delivery robots be allowed in city centers?", + ) + with gr.Row(): + stance_a_input = gr.Textbox( + label="Debater A Stance", + value="Supports the statement", + ) + stance_b_input = gr.Textbox( + label="Debater B Stance", + value="Opposes the statement", + ) + + run_button = gr.Button("Start Debate", variant="primary") + + with gr.Tab("Live Debate"): + transcript_md = gr.Markdown( + "### Waiting for the debate to start...", + elem_id="live-debate-panel", + ) + + with gr.Tab("Judge's Report"): + judge_md = gr.Markdown("Judge verdict will appear here.") + score_table = gr.Dataframe( + headers=["Criterion", "Debater A", "Debater B", "Notes"], + datatype=["str", "number", "number", "str"], + interactive=False, + ) + verdict_box = gr.Textbox( + label="Verdict Detail", + interactive=False, + ) + transcript_box = gr.Textbox( + label="Full Transcript (for copying)", + interactive=False, + lines=10, + ) + + run_button.click( + fn=run_debate, + inputs=[topic_input, stance_a_input, stance_b_input], + outputs=[transcript_md, judge_md, score_table, verdict_box, transcript_box], + queue=True, + ) + +if __name__ == "__main__": + demo.queue(default_concurrency_limit=4).launch()