Merge pull request #949 from Mogbeyi/emmy/week8-solution
LLM Debate Arena
This commit is contained in:
453
week8/community_contributions/emmy/llm_battle.py
Normal file
453
week8/community_contributions/emmy/llm_battle.py
Normal file
@@ -0,0 +1,453 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Dict, Generator, List, Optional, Tuple
|
||||
|
||||
import gradio as gr
|
||||
from dotenv import load_dotenv
|
||||
from openai import OpenAI
|
||||
|
||||
load_dotenv()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Configuration helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@dataclass
|
||||
class AgentConfig:
|
||||
"""Holds configuration required to talk to an LLM provider."""
|
||||
|
||||
name: str
|
||||
model: str
|
||||
api_key_env: str
|
||||
base_url_env: Optional[str] = None
|
||||
temperature: float = 0.7
|
||||
supports_json: bool = True
|
||||
|
||||
|
||||
def load_client(config: AgentConfig) -> OpenAI:
|
||||
"""Create an OpenAI-compatible client for the given agent."""
|
||||
api_key = os.getenv(config.api_key_env) or os.getenv("OPENAI_API_KEY")
|
||||
if not api_key:
|
||||
raise RuntimeError(
|
||||
f"Missing API key for {config.name}. "
|
||||
f"Set {config.api_key_env} or OPENAI_API_KEY."
|
||||
)
|
||||
|
||||
base_url = (
|
||||
os.getenv(config.base_url_env)
|
||||
if config.base_url_env
|
||||
else os.getenv("OPENAI_BASE_URL")
|
||||
)
|
||||
|
||||
return OpenAI(api_key=api_key, base_url=base_url)
|
||||
|
||||
|
||||
def extract_text(response) -> str:
|
||||
"""Extract text content from an OpenAI-style response object or dict."""
|
||||
|
||||
choices = getattr(response, "choices", None)
|
||||
if choices is None and isinstance(response, dict):
|
||||
choices = response.get("choices")
|
||||
if not choices:
|
||||
raise RuntimeError(f"LLM response missing choices field: {response!r}")
|
||||
|
||||
choice = choices[0]
|
||||
message = getattr(choice, "message", None)
|
||||
if message is None and isinstance(choice, dict):
|
||||
message = choice.get("message")
|
||||
|
||||
content = None
|
||||
if message is not None:
|
||||
content = getattr(message, "content", None)
|
||||
if content is None and isinstance(message, dict):
|
||||
content = message.get("content")
|
||||
|
||||
if isinstance(content, list):
|
||||
parts: List[str] = []
|
||||
for part in content:
|
||||
if isinstance(part, dict):
|
||||
if "text" in part:
|
||||
parts.append(str(part["text"]))
|
||||
elif "output_text" in part:
|
||||
parts.append(str(part["output_text"]))
|
||||
elif "type" in part and "content" in part:
|
||||
parts.append(str(part["content"]))
|
||||
else:
|
||||
parts.append(str(part))
|
||||
content = "".join(parts)
|
||||
|
||||
if content is None:
|
||||
text = getattr(choice, "text", None)
|
||||
if text is None and isinstance(choice, dict):
|
||||
text = choice.get("text")
|
||||
if text:
|
||||
content = text
|
||||
|
||||
if content is None:
|
||||
raise RuntimeError(f"LLM response missing content/text: {response!r}")
|
||||
|
||||
return str(content).strip()
|
||||
|
||||
|
||||
# Default configuration leverages OpenAI unless overrides are provided.
|
||||
DEBATER_A_CONFIG = AgentConfig(
|
||||
name="Debater A",
|
||||
model=os.getenv("DEBATER_A_MODEL", "gpt-4o"),
|
||||
api_key_env="OPENAI_API_KEY",
|
||||
base_url_env="OPENAI_BASE_URL",
|
||||
temperature=float(os.getenv("DEBATER_A_TEMPERATURE", 0.7)),
|
||||
)
|
||||
|
||||
DEBATER_B_CONFIG = AgentConfig(
|
||||
name="Debater B",
|
||||
model=os.getenv("DEBATER_B_MODEL", "gemini-2.0-flash"),
|
||||
api_key_env="GOOGLE_API_KEY",
|
||||
base_url_env="GEMINI_BASE_URL",
|
||||
temperature=float(os.getenv("DEBATER_B_TEMPERATURE", 0.7)),
|
||||
)
|
||||
|
||||
JUDGE_CONFIG = AgentConfig(
|
||||
name="Judge",
|
||||
model=os.getenv("JUDGE_MODEL", "gpt-oss:20b-cloud"),
|
||||
api_key_env="OLLAMA_API_KEY",
|
||||
base_url_env="OLLAMA_BASE_URL",
|
||||
temperature=float(os.getenv("JUDGE_TEMPERATURE", 0.2)),
|
||||
supports_json=False,
|
||||
)
|
||||
|
||||
REPORTER_CONFIG = AgentConfig(
|
||||
name="Reporter",
|
||||
model=os.getenv("REPORTER_MODEL", "MiniMax-M2"),
|
||||
api_key_env="MINIMAX_API_KEY",
|
||||
base_url_env="MINIMAX_BASE_URL",
|
||||
temperature=float(os.getenv("REPORTER_TEMPERATURE", 0.4)),
|
||||
supports_json=False,
|
||||
)
|
||||
|
||||
THEME = gr.themes.Default(
|
||||
primary_hue="blue",
|
||||
secondary_hue="sky",
|
||||
neutral_hue="gray",
|
||||
)
|
||||
|
||||
CUSTOM_CSS = """
|
||||
body, .gradio-container {
|
||||
background: radial-gradient(circle at top, #0f172a 0%, #020617 60%, #020617 100%);
|
||||
color: #e2e8f0;
|
||||
}
|
||||
#live-debate-panel {
|
||||
background: linear-gradient(135deg, rgba(30,64,175,0.95), rgba(29,78,216,0.85));
|
||||
color: #f8fafc;
|
||||
border-radius: 16px;
|
||||
padding: 24px;
|
||||
box-shadow: 0 20px 45px rgba(15,23,42,0.35);
|
||||
}
|
||||
#live-debate-panel h3 {
|
||||
color: #bfdbfe;
|
||||
}
|
||||
.gr-button-primary {
|
||||
background: linear-gradient(135deg, #1d4ed8, #2563eb) !important;
|
||||
border: none !important;
|
||||
}
|
||||
.gr-button-primary:hover {
|
||||
background: linear-gradient(135deg, #2563eb, #1d4ed8) !important;
|
||||
}
|
||||
"""
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Debate runtime classes
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@dataclass
|
||||
class DebateState:
|
||||
topic: str
|
||||
stance_a: str
|
||||
stance_b: str
|
||||
transcript: List[Tuple[str, str]] = field(default_factory=list)
|
||||
|
||||
|
||||
class LLMAdapter:
|
||||
"""Thin wrapper around the OpenAI SDK to simplify prompting."""
|
||||
|
||||
def __init__(self, config: AgentConfig):
|
||||
self.config = config
|
||||
self.client = load_client(config)
|
||||
|
||||
def complete(
|
||||
self,
|
||||
prompt: str,
|
||||
*,
|
||||
system: Optional[str] = None,
|
||||
max_tokens: int = 512,
|
||||
json_mode: bool = False,
|
||||
) -> str:
|
||||
messages = []
|
||||
if system:
|
||||
messages.append({"role": "system", "content": system})
|
||||
messages.append({"role": "user", "content": prompt})
|
||||
|
||||
params = dict(
|
||||
model=self.config.model,
|
||||
messages=messages,
|
||||
temperature=self.config.temperature,
|
||||
max_tokens=max_tokens,
|
||||
)
|
||||
if json_mode and self.config.supports_json:
|
||||
params["response_format"] = {"type": "json_object"}
|
||||
|
||||
response = self.client.chat.completions.create(**params)
|
||||
return extract_text(response)
|
||||
|
||||
|
||||
class Debater:
|
||||
def __init__(self, adapter: LLMAdapter, stance_label: str):
|
||||
self.adapter = adapter
|
||||
self.stance_label = stance_label
|
||||
|
||||
def argue(self, topic: str) -> str:
|
||||
prompt = (
|
||||
f"You are {self.adapter.config.name}, debating the topic:\n"
|
||||
f"'{topic}'.\n\n"
|
||||
f"Present a concise argument that {self.stance_label.lower()} "
|
||||
f"the statement. Use at most 150 words. Provide clear reasoning "
|
||||
f"and, if applicable, cite plausible evidence or examples."
|
||||
)
|
||||
return self.adapter.complete(prompt, max_tokens=300)
|
||||
|
||||
|
||||
class Judge:
|
||||
RUBRIC = [
|
||||
"Clarity of the argument",
|
||||
"Use of evidence or examples",
|
||||
"Logical coherence",
|
||||
"Persuasiveness and impact",
|
||||
]
|
||||
|
||||
def __init__(self, adapter: LLMAdapter):
|
||||
self.adapter = adapter
|
||||
|
||||
def evaluate(self, topic: str, argument_a: str, argument_b: str) -> Dict[str, object]:
|
||||
rubric_text = "\n".join(f"- {item}" for item in self.RUBRIC)
|
||||
prompt = (
|
||||
"You are serving as an impartial debate judge.\n"
|
||||
f"Topic: {topic}\n\n"
|
||||
f"Argument from Debater A:\n{argument_a}\n\n"
|
||||
f"Argument from Debater B:\n{argument_b}\n\n"
|
||||
"Score each debater from 0-10 on the following criteria:\n"
|
||||
f"{rubric_text}\n\n"
|
||||
"Return a JSON object with this exact structure:\n"
|
||||
'{\n'
|
||||
' "winner": "A" or "B" or "Tie",\n'
|
||||
' "reason": "brief justification",\n'
|
||||
' "scores": [\n'
|
||||
' {"criterion": "...", "debater_a": 0-10, "debater_b": 0-10, "notes": "optional"}\n'
|
||||
" ]\n"
|
||||
"}\n"
|
||||
"Ensure the JSON is valid."
|
||||
)
|
||||
raw = self.adapter.complete(prompt, max_tokens=400, json_mode=True)
|
||||
try:
|
||||
data = json.loads(raw)
|
||||
if "scores" not in data:
|
||||
raise ValueError("scores missing")
|
||||
return data
|
||||
except Exception:
|
||||
# Fallback: wrap raw text if parsing fails.
|
||||
return {"winner": "Unknown", "reason": raw, "scores": []}
|
||||
|
||||
|
||||
class Reporter:
|
||||
def __init__(self, adapter: LLMAdapter):
|
||||
self.adapter = adapter
|
||||
|
||||
def summarize(
|
||||
self,
|
||||
topic: str,
|
||||
argument_a: str,
|
||||
argument_b: str,
|
||||
judge_result: Dict[str, object],
|
||||
) -> str:
|
||||
prompt = (
|
||||
f"Summarize a single-round debate on '{topic}'.\n\n"
|
||||
f"Debater A argued:\n{argument_a}\n\n"
|
||||
f"Debater B argued:\n{argument_b}\n\n"
|
||||
f"Judge verdict: {json.dumps(judge_result, ensure_ascii=False)}\n\n"
|
||||
"Provide a short journalistic summary (max 200 words) highlighting "
|
||||
"each side's key points and the judge's decision. Use neutral tone."
|
||||
)
|
||||
response = self.adapter.client.chat.completions.create(
|
||||
model=self.adapter.config.model,
|
||||
messages=[
|
||||
{"role": "system", "content": "You are an impartial debate reporter."},
|
||||
{"role": "user", "content": prompt},
|
||||
],
|
||||
temperature=self.adapter.config.temperature,
|
||||
max_tokens=300,
|
||||
**(
|
||||
{"extra_body": {"reasoning_split": True}}
|
||||
if getattr(self.adapter.client, "base_url", None)
|
||||
and "minimax" in str(self.adapter.client.base_url).lower()
|
||||
else {}
|
||||
),
|
||||
)
|
||||
return extract_text(response)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Debate pipeline + UI
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
debater_a = Debater(LLMAdapter(DEBATER_A_CONFIG), stance_label="supports")
|
||||
debater_b = Debater(LLMAdapter(DEBATER_B_CONFIG), stance_label="opposes")
|
||||
judge = Judge(LLMAdapter(JUDGE_CONFIG))
|
||||
reporter = Reporter(LLMAdapter(REPORTER_CONFIG))
|
||||
|
||||
|
||||
def format_transcript(transcript: List[Tuple[str, str]]) -> str:
|
||||
"""Return markdown-formatted transcript."""
|
||||
lines = []
|
||||
for speaker, message in transcript:
|
||||
lines.append(f"### {speaker}\n\n{message}\n")
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def run_debate(
|
||||
topic: str, stance_a: str, stance_b: str
|
||||
) -> Generator[Tuple[str, str, List[List[object]], str, str], None, None]:
|
||||
"""Generator for Gradio to stream debate progress."""
|
||||
if not topic.strip():
|
||||
warning = "⚠️ Please enter a debate topic to get started."
|
||||
yield warning, "", [], "", ""
|
||||
return
|
||||
|
||||
state = DebateState(topic=topic.strip(), stance_a=stance_a, stance_b=stance_b)
|
||||
|
||||
state.transcript.append(
|
||||
("Moderator", f"Welcome to the debate on **{state.topic}**!")
|
||||
)
|
||||
yield format_transcript(state.transcript), "Waiting for judge...", [], "", ""
|
||||
|
||||
argument_a = debater_a.argue(state.topic)
|
||||
state.transcript.append((f"Debater A ({state.stance_a})", argument_a))
|
||||
yield format_transcript(state.transcript), "Collecting arguments...", [], "", ""
|
||||
|
||||
argument_b = debater_b.argue(state.topic)
|
||||
state.transcript.append((f"Debater B ({state.stance_b})", argument_b))
|
||||
yield format_transcript(state.transcript), "Judge deliberating...", [], "", ""
|
||||
|
||||
judge_result = judge.evaluate(state.topic, argument_a, argument_b)
|
||||
verdict_text = (
|
||||
f"Winner: {judge_result.get('winner', 'Unknown')}\nReason: "
|
||||
f"{judge_result.get('reason', 'No explanation provided.')}"
|
||||
)
|
||||
score_rows = [
|
||||
[
|
||||
entry.get("criterion", ""),
|
||||
entry.get("debater_a", ""),
|
||||
entry.get("debater_b", ""),
|
||||
entry.get("notes", ""),
|
||||
]
|
||||
for entry in judge_result.get("scores", [])
|
||||
]
|
||||
judge_report_md = (
|
||||
f"**Judge Verdict:** {judge_result.get('winner', 'Unknown')}\n\n"
|
||||
f"{judge_result.get('reason', '')}"
|
||||
)
|
||||
yield (
|
||||
format_transcript(state.transcript),
|
||||
judge_report_md,
|
||||
score_rows,
|
||||
verdict_text,
|
||||
format_transcript(state.transcript),
|
||||
)
|
||||
|
||||
reporter_summary = reporter.summarize(
|
||||
state.topic, argument_a, argument_b, judge_result
|
||||
)
|
||||
|
||||
final_markdown = (
|
||||
f"{judge_report_md}\n\n---\n\n"
|
||||
f"**Reporter Summary**\n\n{reporter_summary}"
|
||||
)
|
||||
yield (
|
||||
format_transcript(state.transcript),
|
||||
final_markdown,
|
||||
score_rows,
|
||||
verdict_text,
|
||||
format_transcript(state.transcript),
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Gradio Interface
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
with gr.Blocks(
|
||||
title="LLM Debate Arena",
|
||||
fill_width=True,
|
||||
theme=THEME,
|
||||
css=CUSTOM_CSS,
|
||||
) as demo:
|
||||
gr.Markdown(
|
||||
"# 🔁 LLM Debate Arena\n"
|
||||
"Configure two debating agents, watch their arguments in real time, and "
|
||||
"review the judge's verdict plus a reporter summary."
|
||||
)
|
||||
|
||||
with gr.Row():
|
||||
topic_input = gr.Textbox(
|
||||
label="Debate Topic",
|
||||
placeholder="e.g., Should autonomous delivery robots be allowed in city centers?",
|
||||
)
|
||||
with gr.Row():
|
||||
stance_a_input = gr.Textbox(
|
||||
label="Debater A Stance",
|
||||
value="Supports the statement",
|
||||
)
|
||||
stance_b_input = gr.Textbox(
|
||||
label="Debater B Stance",
|
||||
value="Opposes the statement",
|
||||
)
|
||||
|
||||
run_button = gr.Button("Start Debate", variant="primary")
|
||||
|
||||
with gr.Tab("Live Debate"):
|
||||
transcript_md = gr.Markdown(
|
||||
"### Waiting for the debate to start...",
|
||||
elem_id="live-debate-panel",
|
||||
)
|
||||
|
||||
with gr.Tab("Judge's Report"):
|
||||
judge_md = gr.Markdown("Judge verdict will appear here.")
|
||||
score_table = gr.Dataframe(
|
||||
headers=["Criterion", "Debater A", "Debater B", "Notes"],
|
||||
datatype=["str", "number", "number", "str"],
|
||||
interactive=False,
|
||||
)
|
||||
verdict_box = gr.Textbox(
|
||||
label="Verdict Detail",
|
||||
interactive=False,
|
||||
)
|
||||
transcript_box = gr.Textbox(
|
||||
label="Full Transcript (for copying)",
|
||||
interactive=False,
|
||||
lines=10,
|
||||
)
|
||||
|
||||
run_button.click(
|
||||
fn=run_debate,
|
||||
inputs=[topic_input, stance_a_input, stance_b_input],
|
||||
outputs=[transcript_md, judge_md, score_table, verdict_box, transcript_box],
|
||||
queue=True,
|
||||
)
|
||||
|
||||
if __name__ == "__main__":
|
||||
demo.queue(default_concurrency_limit=4).launch()
|
||||
Reference in New Issue
Block a user