Merge pull request #949 from Mogbeyi/emmy/week8-solution

LLM Debate Arena
This commit is contained in:
Ed Donner
2025-11-04 13:36:18 -05:00
committed by GitHub

View File

@@ -0,0 +1,453 @@
from __future__ import annotations
import json
import os
from dataclasses import dataclass, field
from typing import Dict, Generator, List, Optional, Tuple
import gradio as gr
from dotenv import load_dotenv
from openai import OpenAI
load_dotenv()
# ---------------------------------------------------------------------------
# Configuration helpers
# ---------------------------------------------------------------------------
@dataclass
class AgentConfig:
"""Holds configuration required to talk to an LLM provider."""
name: str
model: str
api_key_env: str
base_url_env: Optional[str] = None
temperature: float = 0.7
supports_json: bool = True
def load_client(config: AgentConfig) -> OpenAI:
"""Create an OpenAI-compatible client for the given agent."""
api_key = os.getenv(config.api_key_env) or os.getenv("OPENAI_API_KEY")
if not api_key:
raise RuntimeError(
f"Missing API key for {config.name}. "
f"Set {config.api_key_env} or OPENAI_API_KEY."
)
base_url = (
os.getenv(config.base_url_env)
if config.base_url_env
else os.getenv("OPENAI_BASE_URL")
)
return OpenAI(api_key=api_key, base_url=base_url)
def extract_text(response) -> str:
"""Extract text content from an OpenAI-style response object or dict."""
choices = getattr(response, "choices", None)
if choices is None and isinstance(response, dict):
choices = response.get("choices")
if not choices:
raise RuntimeError(f"LLM response missing choices field: {response!r}")
choice = choices[0]
message = getattr(choice, "message", None)
if message is None and isinstance(choice, dict):
message = choice.get("message")
content = None
if message is not None:
content = getattr(message, "content", None)
if content is None and isinstance(message, dict):
content = message.get("content")
if isinstance(content, list):
parts: List[str] = []
for part in content:
if isinstance(part, dict):
if "text" in part:
parts.append(str(part["text"]))
elif "output_text" in part:
parts.append(str(part["output_text"]))
elif "type" in part and "content" in part:
parts.append(str(part["content"]))
else:
parts.append(str(part))
content = "".join(parts)
if content is None:
text = getattr(choice, "text", None)
if text is None and isinstance(choice, dict):
text = choice.get("text")
if text:
content = text
if content is None:
raise RuntimeError(f"LLM response missing content/text: {response!r}")
return str(content).strip()
# Default configuration leverages OpenAI unless overrides are provided.
DEBATER_A_CONFIG = AgentConfig(
name="Debater A",
model=os.getenv("DEBATER_A_MODEL", "gpt-4o"),
api_key_env="OPENAI_API_KEY",
base_url_env="OPENAI_BASE_URL",
temperature=float(os.getenv("DEBATER_A_TEMPERATURE", 0.7)),
)
DEBATER_B_CONFIG = AgentConfig(
name="Debater B",
model=os.getenv("DEBATER_B_MODEL", "gemini-2.0-flash"),
api_key_env="GOOGLE_API_KEY",
base_url_env="GEMINI_BASE_URL",
temperature=float(os.getenv("DEBATER_B_TEMPERATURE", 0.7)),
)
JUDGE_CONFIG = AgentConfig(
name="Judge",
model=os.getenv("JUDGE_MODEL", "gpt-oss:20b-cloud"),
api_key_env="OLLAMA_API_KEY",
base_url_env="OLLAMA_BASE_URL",
temperature=float(os.getenv("JUDGE_TEMPERATURE", 0.2)),
supports_json=False,
)
REPORTER_CONFIG = AgentConfig(
name="Reporter",
model=os.getenv("REPORTER_MODEL", "MiniMax-M2"),
api_key_env="MINIMAX_API_KEY",
base_url_env="MINIMAX_BASE_URL",
temperature=float(os.getenv("REPORTER_TEMPERATURE", 0.4)),
supports_json=False,
)
THEME = gr.themes.Default(
primary_hue="blue",
secondary_hue="sky",
neutral_hue="gray",
)
CUSTOM_CSS = """
body, .gradio-container {
background: radial-gradient(circle at top, #0f172a 0%, #020617 60%, #020617 100%);
color: #e2e8f0;
}
#live-debate-panel {
background: linear-gradient(135deg, rgba(30,64,175,0.95), rgba(29,78,216,0.85));
color: #f8fafc;
border-radius: 16px;
padding: 24px;
box-shadow: 0 20px 45px rgba(15,23,42,0.35);
}
#live-debate-panel h3 {
color: #bfdbfe;
}
.gr-button-primary {
background: linear-gradient(135deg, #1d4ed8, #2563eb) !important;
border: none !important;
}
.gr-button-primary:hover {
background: linear-gradient(135deg, #2563eb, #1d4ed8) !important;
}
"""
# ---------------------------------------------------------------------------
# Debate runtime classes
# ---------------------------------------------------------------------------
@dataclass
class DebateState:
topic: str
stance_a: str
stance_b: str
transcript: List[Tuple[str, str]] = field(default_factory=list)
class LLMAdapter:
"""Thin wrapper around the OpenAI SDK to simplify prompting."""
def __init__(self, config: AgentConfig):
self.config = config
self.client = load_client(config)
def complete(
self,
prompt: str,
*,
system: Optional[str] = None,
max_tokens: int = 512,
json_mode: bool = False,
) -> str:
messages = []
if system:
messages.append({"role": "system", "content": system})
messages.append({"role": "user", "content": prompt})
params = dict(
model=self.config.model,
messages=messages,
temperature=self.config.temperature,
max_tokens=max_tokens,
)
if json_mode and self.config.supports_json:
params["response_format"] = {"type": "json_object"}
response = self.client.chat.completions.create(**params)
return extract_text(response)
class Debater:
def __init__(self, adapter: LLMAdapter, stance_label: str):
self.adapter = adapter
self.stance_label = stance_label
def argue(self, topic: str) -> str:
prompt = (
f"You are {self.adapter.config.name}, debating the topic:\n"
f"'{topic}'.\n\n"
f"Present a concise argument that {self.stance_label.lower()} "
f"the statement. Use at most 150 words. Provide clear reasoning "
f"and, if applicable, cite plausible evidence or examples."
)
return self.adapter.complete(prompt, max_tokens=300)
class Judge:
RUBRIC = [
"Clarity of the argument",
"Use of evidence or examples",
"Logical coherence",
"Persuasiveness and impact",
]
def __init__(self, adapter: LLMAdapter):
self.adapter = adapter
def evaluate(self, topic: str, argument_a: str, argument_b: str) -> Dict[str, object]:
rubric_text = "\n".join(f"- {item}" for item in self.RUBRIC)
prompt = (
"You are serving as an impartial debate judge.\n"
f"Topic: {topic}\n\n"
f"Argument from Debater A:\n{argument_a}\n\n"
f"Argument from Debater B:\n{argument_b}\n\n"
"Score each debater from 0-10 on the following criteria:\n"
f"{rubric_text}\n\n"
"Return a JSON object with this exact structure:\n"
'{\n'
' "winner": "A" or "B" or "Tie",\n'
' "reason": "brief justification",\n'
' "scores": [\n'
' {"criterion": "...", "debater_a": 0-10, "debater_b": 0-10, "notes": "optional"}\n'
" ]\n"
"}\n"
"Ensure the JSON is valid."
)
raw = self.adapter.complete(prompt, max_tokens=400, json_mode=True)
try:
data = json.loads(raw)
if "scores" not in data:
raise ValueError("scores missing")
return data
except Exception:
# Fallback: wrap raw text if parsing fails.
return {"winner": "Unknown", "reason": raw, "scores": []}
class Reporter:
def __init__(self, adapter: LLMAdapter):
self.adapter = adapter
def summarize(
self,
topic: str,
argument_a: str,
argument_b: str,
judge_result: Dict[str, object],
) -> str:
prompt = (
f"Summarize a single-round debate on '{topic}'.\n\n"
f"Debater A argued:\n{argument_a}\n\n"
f"Debater B argued:\n{argument_b}\n\n"
f"Judge verdict: {json.dumps(judge_result, ensure_ascii=False)}\n\n"
"Provide a short journalistic summary (max 200 words) highlighting "
"each side's key points and the judge's decision. Use neutral tone."
)
response = self.adapter.client.chat.completions.create(
model=self.adapter.config.model,
messages=[
{"role": "system", "content": "You are an impartial debate reporter."},
{"role": "user", "content": prompt},
],
temperature=self.adapter.config.temperature,
max_tokens=300,
**(
{"extra_body": {"reasoning_split": True}}
if getattr(self.adapter.client, "base_url", None)
and "minimax" in str(self.adapter.client.base_url).lower()
else {}
),
)
return extract_text(response)
# ---------------------------------------------------------------------------
# Debate pipeline + UI
# ---------------------------------------------------------------------------
debater_a = Debater(LLMAdapter(DEBATER_A_CONFIG), stance_label="supports")
debater_b = Debater(LLMAdapter(DEBATER_B_CONFIG), stance_label="opposes")
judge = Judge(LLMAdapter(JUDGE_CONFIG))
reporter = Reporter(LLMAdapter(REPORTER_CONFIG))
def format_transcript(transcript: List[Tuple[str, str]]) -> str:
"""Return markdown-formatted transcript."""
lines = []
for speaker, message in transcript:
lines.append(f"### {speaker}\n\n{message}\n")
return "\n".join(lines)
def run_debate(
topic: str, stance_a: str, stance_b: str
) -> Generator[Tuple[str, str, List[List[object]], str, str], None, None]:
"""Generator for Gradio to stream debate progress."""
if not topic.strip():
warning = "⚠️ Please enter a debate topic to get started."
yield warning, "", [], "", ""
return
state = DebateState(topic=topic.strip(), stance_a=stance_a, stance_b=stance_b)
state.transcript.append(
("Moderator", f"Welcome to the debate on **{state.topic}**!")
)
yield format_transcript(state.transcript), "Waiting for judge...", [], "", ""
argument_a = debater_a.argue(state.topic)
state.transcript.append((f"Debater A ({state.stance_a})", argument_a))
yield format_transcript(state.transcript), "Collecting arguments...", [], "", ""
argument_b = debater_b.argue(state.topic)
state.transcript.append((f"Debater B ({state.stance_b})", argument_b))
yield format_transcript(state.transcript), "Judge deliberating...", [], "", ""
judge_result = judge.evaluate(state.topic, argument_a, argument_b)
verdict_text = (
f"Winner: {judge_result.get('winner', 'Unknown')}\nReason: "
f"{judge_result.get('reason', 'No explanation provided.')}"
)
score_rows = [
[
entry.get("criterion", ""),
entry.get("debater_a", ""),
entry.get("debater_b", ""),
entry.get("notes", ""),
]
for entry in judge_result.get("scores", [])
]
judge_report_md = (
f"**Judge Verdict:** {judge_result.get('winner', 'Unknown')}\n\n"
f"{judge_result.get('reason', '')}"
)
yield (
format_transcript(state.transcript),
judge_report_md,
score_rows,
verdict_text,
format_transcript(state.transcript),
)
reporter_summary = reporter.summarize(
state.topic, argument_a, argument_b, judge_result
)
final_markdown = (
f"{judge_report_md}\n\n---\n\n"
f"**Reporter Summary**\n\n{reporter_summary}"
)
yield (
format_transcript(state.transcript),
final_markdown,
score_rows,
verdict_text,
format_transcript(state.transcript),
)
# ---------------------------------------------------------------------------
# Gradio Interface
# ---------------------------------------------------------------------------
with gr.Blocks(
title="LLM Debate Arena",
fill_width=True,
theme=THEME,
css=CUSTOM_CSS,
) as demo:
gr.Markdown(
"# 🔁 LLM Debate Arena\n"
"Configure two debating agents, watch their arguments in real time, and "
"review the judge's verdict plus a reporter summary."
)
with gr.Row():
topic_input = gr.Textbox(
label="Debate Topic",
placeholder="e.g., Should autonomous delivery robots be allowed in city centers?",
)
with gr.Row():
stance_a_input = gr.Textbox(
label="Debater A Stance",
value="Supports the statement",
)
stance_b_input = gr.Textbox(
label="Debater B Stance",
value="Opposes the statement",
)
run_button = gr.Button("Start Debate", variant="primary")
with gr.Tab("Live Debate"):
transcript_md = gr.Markdown(
"### Waiting for the debate to start...",
elem_id="live-debate-panel",
)
with gr.Tab("Judge's Report"):
judge_md = gr.Markdown("Judge verdict will appear here.")
score_table = gr.Dataframe(
headers=["Criterion", "Debater A", "Debater B", "Notes"],
datatype=["str", "number", "number", "str"],
interactive=False,
)
verdict_box = gr.Textbox(
label="Verdict Detail",
interactive=False,
)
transcript_box = gr.Textbox(
label="Full Transcript (for copying)",
interactive=False,
lines=10,
)
run_button.click(
fn=run_debate,
inputs=[topic_input, stance_a_input, stance_b_input],
outputs=[transcript_md, judge_md, score_table, verdict_box, transcript_box],
queue=True,
)
if __name__ == "__main__":
demo.queue(default_concurrency_limit=4).launch()