454 lines
14 KiB
Python
454 lines
14 KiB
Python
from __future__ import annotations
|
|
|
|
import json
|
|
import os
|
|
from dataclasses import dataclass, field
|
|
from typing import Dict, Generator, List, Optional, Tuple
|
|
|
|
import gradio as gr
|
|
from dotenv import load_dotenv
|
|
from openai import OpenAI
|
|
|
|
load_dotenv()
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Configuration helpers
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
@dataclass
|
|
class AgentConfig:
|
|
"""Holds configuration required to talk to an LLM provider."""
|
|
|
|
name: str
|
|
model: str
|
|
api_key_env: str
|
|
base_url_env: Optional[str] = None
|
|
temperature: float = 0.7
|
|
supports_json: bool = True
|
|
|
|
|
|
def load_client(config: AgentConfig) -> OpenAI:
|
|
"""Create an OpenAI-compatible client for the given agent."""
|
|
api_key = os.getenv(config.api_key_env) or os.getenv("OPENAI_API_KEY")
|
|
if not api_key:
|
|
raise RuntimeError(
|
|
f"Missing API key for {config.name}. "
|
|
f"Set {config.api_key_env} or OPENAI_API_KEY."
|
|
)
|
|
|
|
base_url = (
|
|
os.getenv(config.base_url_env)
|
|
if config.base_url_env
|
|
else os.getenv("OPENAI_BASE_URL")
|
|
)
|
|
|
|
return OpenAI(api_key=api_key, base_url=base_url)
|
|
|
|
|
|
def extract_text(response) -> str:
|
|
"""Extract text content from an OpenAI-style response object or dict."""
|
|
|
|
choices = getattr(response, "choices", None)
|
|
if choices is None and isinstance(response, dict):
|
|
choices = response.get("choices")
|
|
if not choices:
|
|
raise RuntimeError(f"LLM response missing choices field: {response!r}")
|
|
|
|
choice = choices[0]
|
|
message = getattr(choice, "message", None)
|
|
if message is None and isinstance(choice, dict):
|
|
message = choice.get("message")
|
|
|
|
content = None
|
|
if message is not None:
|
|
content = getattr(message, "content", None)
|
|
if content is None and isinstance(message, dict):
|
|
content = message.get("content")
|
|
|
|
if isinstance(content, list):
|
|
parts: List[str] = []
|
|
for part in content:
|
|
if isinstance(part, dict):
|
|
if "text" in part:
|
|
parts.append(str(part["text"]))
|
|
elif "output_text" in part:
|
|
parts.append(str(part["output_text"]))
|
|
elif "type" in part and "content" in part:
|
|
parts.append(str(part["content"]))
|
|
else:
|
|
parts.append(str(part))
|
|
content = "".join(parts)
|
|
|
|
if content is None:
|
|
text = getattr(choice, "text", None)
|
|
if text is None and isinstance(choice, dict):
|
|
text = choice.get("text")
|
|
if text:
|
|
content = text
|
|
|
|
if content is None:
|
|
raise RuntimeError(f"LLM response missing content/text: {response!r}")
|
|
|
|
return str(content).strip()
|
|
|
|
|
|
# Default configuration leverages OpenAI unless overrides are provided.
|
|
DEBATER_A_CONFIG = AgentConfig(
|
|
name="Debater A",
|
|
model=os.getenv("DEBATER_A_MODEL", "gpt-4o"),
|
|
api_key_env="OPENAI_API_KEY",
|
|
base_url_env="OPENAI_BASE_URL",
|
|
temperature=float(os.getenv("DEBATER_A_TEMPERATURE", 0.7)),
|
|
)
|
|
|
|
DEBATER_B_CONFIG = AgentConfig(
|
|
name="Debater B",
|
|
model=os.getenv("DEBATER_B_MODEL", "gemini-2.0-flash"),
|
|
api_key_env="GOOGLE_API_KEY",
|
|
base_url_env="GEMINI_BASE_URL",
|
|
temperature=float(os.getenv("DEBATER_B_TEMPERATURE", 0.7)),
|
|
)
|
|
|
|
JUDGE_CONFIG = AgentConfig(
|
|
name="Judge",
|
|
model=os.getenv("JUDGE_MODEL", "gpt-oss:20b-cloud"),
|
|
api_key_env="OLLAMA_API_KEY",
|
|
base_url_env="OLLAMA_BASE_URL",
|
|
temperature=float(os.getenv("JUDGE_TEMPERATURE", 0.2)),
|
|
supports_json=False,
|
|
)
|
|
|
|
REPORTER_CONFIG = AgentConfig(
|
|
name="Reporter",
|
|
model=os.getenv("REPORTER_MODEL", "MiniMax-M2"),
|
|
api_key_env="MINIMAX_API_KEY",
|
|
base_url_env="MINIMAX_BASE_URL",
|
|
temperature=float(os.getenv("REPORTER_TEMPERATURE", 0.4)),
|
|
supports_json=False,
|
|
)
|
|
|
|
THEME = gr.themes.Default(
|
|
primary_hue="blue",
|
|
secondary_hue="sky",
|
|
neutral_hue="gray",
|
|
)
|
|
|
|
CUSTOM_CSS = """
|
|
body, .gradio-container {
|
|
background: radial-gradient(circle at top, #0f172a 0%, #020617 60%, #020617 100%);
|
|
color: #e2e8f0;
|
|
}
|
|
#live-debate-panel {
|
|
background: linear-gradient(135deg, rgba(30,64,175,0.95), rgba(29,78,216,0.85));
|
|
color: #f8fafc;
|
|
border-radius: 16px;
|
|
padding: 24px;
|
|
box-shadow: 0 20px 45px rgba(15,23,42,0.35);
|
|
}
|
|
#live-debate-panel h3 {
|
|
color: #bfdbfe;
|
|
}
|
|
.gr-button-primary {
|
|
background: linear-gradient(135deg, #1d4ed8, #2563eb) !important;
|
|
border: none !important;
|
|
}
|
|
.gr-button-primary:hover {
|
|
background: linear-gradient(135deg, #2563eb, #1d4ed8) !important;
|
|
}
|
|
"""
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Debate runtime classes
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
@dataclass
|
|
class DebateState:
|
|
topic: str
|
|
stance_a: str
|
|
stance_b: str
|
|
transcript: List[Tuple[str, str]] = field(default_factory=list)
|
|
|
|
|
|
class LLMAdapter:
|
|
"""Thin wrapper around the OpenAI SDK to simplify prompting."""
|
|
|
|
def __init__(self, config: AgentConfig):
|
|
self.config = config
|
|
self.client = load_client(config)
|
|
|
|
def complete(
|
|
self,
|
|
prompt: str,
|
|
*,
|
|
system: Optional[str] = None,
|
|
max_tokens: int = 512,
|
|
json_mode: bool = False,
|
|
) -> str:
|
|
messages = []
|
|
if system:
|
|
messages.append({"role": "system", "content": system})
|
|
messages.append({"role": "user", "content": prompt})
|
|
|
|
params = dict(
|
|
model=self.config.model,
|
|
messages=messages,
|
|
temperature=self.config.temperature,
|
|
max_tokens=max_tokens,
|
|
)
|
|
if json_mode and self.config.supports_json:
|
|
params["response_format"] = {"type": "json_object"}
|
|
|
|
response = self.client.chat.completions.create(**params)
|
|
return extract_text(response)
|
|
|
|
|
|
class Debater:
|
|
def __init__(self, adapter: LLMAdapter, stance_label: str):
|
|
self.adapter = adapter
|
|
self.stance_label = stance_label
|
|
|
|
def argue(self, topic: str) -> str:
|
|
prompt = (
|
|
f"You are {self.adapter.config.name}, debating the topic:\n"
|
|
f"'{topic}'.\n\n"
|
|
f"Present a concise argument that {self.stance_label.lower()} "
|
|
f"the statement. Use at most 150 words. Provide clear reasoning "
|
|
f"and, if applicable, cite plausible evidence or examples."
|
|
)
|
|
return self.adapter.complete(prompt, max_tokens=300)
|
|
|
|
|
|
class Judge:
|
|
RUBRIC = [
|
|
"Clarity of the argument",
|
|
"Use of evidence or examples",
|
|
"Logical coherence",
|
|
"Persuasiveness and impact",
|
|
]
|
|
|
|
def __init__(self, adapter: LLMAdapter):
|
|
self.adapter = adapter
|
|
|
|
def evaluate(self, topic: str, argument_a: str, argument_b: str) -> Dict[str, object]:
|
|
rubric_text = "\n".join(f"- {item}" for item in self.RUBRIC)
|
|
prompt = (
|
|
"You are serving as an impartial debate judge.\n"
|
|
f"Topic: {topic}\n\n"
|
|
f"Argument from Debater A:\n{argument_a}\n\n"
|
|
f"Argument from Debater B:\n{argument_b}\n\n"
|
|
"Score each debater from 0-10 on the following criteria:\n"
|
|
f"{rubric_text}\n\n"
|
|
"Return a JSON object with this exact structure:\n"
|
|
'{\n'
|
|
' "winner": "A" or "B" or "Tie",\n'
|
|
' "reason": "brief justification",\n'
|
|
' "scores": [\n'
|
|
' {"criterion": "...", "debater_a": 0-10, "debater_b": 0-10, "notes": "optional"}\n'
|
|
" ]\n"
|
|
"}\n"
|
|
"Ensure the JSON is valid."
|
|
)
|
|
raw = self.adapter.complete(prompt, max_tokens=400, json_mode=True)
|
|
try:
|
|
data = json.loads(raw)
|
|
if "scores" not in data:
|
|
raise ValueError("scores missing")
|
|
return data
|
|
except Exception:
|
|
# Fallback: wrap raw text if parsing fails.
|
|
return {"winner": "Unknown", "reason": raw, "scores": []}
|
|
|
|
|
|
class Reporter:
|
|
def __init__(self, adapter: LLMAdapter):
|
|
self.adapter = adapter
|
|
|
|
def summarize(
|
|
self,
|
|
topic: str,
|
|
argument_a: str,
|
|
argument_b: str,
|
|
judge_result: Dict[str, object],
|
|
) -> str:
|
|
prompt = (
|
|
f"Summarize a single-round debate on '{topic}'.\n\n"
|
|
f"Debater A argued:\n{argument_a}\n\n"
|
|
f"Debater B argued:\n{argument_b}\n\n"
|
|
f"Judge verdict: {json.dumps(judge_result, ensure_ascii=False)}\n\n"
|
|
"Provide a short journalistic summary (max 200 words) highlighting "
|
|
"each side's key points and the judge's decision. Use neutral tone."
|
|
)
|
|
response = self.adapter.client.chat.completions.create(
|
|
model=self.adapter.config.model,
|
|
messages=[
|
|
{"role": "system", "content": "You are an impartial debate reporter."},
|
|
{"role": "user", "content": prompt},
|
|
],
|
|
temperature=self.adapter.config.temperature,
|
|
max_tokens=300,
|
|
**(
|
|
{"extra_body": {"reasoning_split": True}}
|
|
if getattr(self.adapter.client, "base_url", None)
|
|
and "minimax" in str(self.adapter.client.base_url).lower()
|
|
else {}
|
|
),
|
|
)
|
|
return extract_text(response)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Debate pipeline + UI
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
debater_a = Debater(LLMAdapter(DEBATER_A_CONFIG), stance_label="supports")
|
|
debater_b = Debater(LLMAdapter(DEBATER_B_CONFIG), stance_label="opposes")
|
|
judge = Judge(LLMAdapter(JUDGE_CONFIG))
|
|
reporter = Reporter(LLMAdapter(REPORTER_CONFIG))
|
|
|
|
|
|
def format_transcript(transcript: List[Tuple[str, str]]) -> str:
|
|
"""Return markdown-formatted transcript."""
|
|
lines = []
|
|
for speaker, message in transcript:
|
|
lines.append(f"### {speaker}\n\n{message}\n")
|
|
return "\n".join(lines)
|
|
|
|
|
|
def run_debate(
|
|
topic: str, stance_a: str, stance_b: str
|
|
) -> Generator[Tuple[str, str, List[List[object]], str, str], None, None]:
|
|
"""Generator for Gradio to stream debate progress."""
|
|
if not topic.strip():
|
|
warning = "⚠️ Please enter a debate topic to get started."
|
|
yield warning, "", [], "", ""
|
|
return
|
|
|
|
state = DebateState(topic=topic.strip(), stance_a=stance_a, stance_b=stance_b)
|
|
|
|
state.transcript.append(
|
|
("Moderator", f"Welcome to the debate on **{state.topic}**!")
|
|
)
|
|
yield format_transcript(state.transcript), "Waiting for judge...", [], "", ""
|
|
|
|
argument_a = debater_a.argue(state.topic)
|
|
state.transcript.append((f"Debater A ({state.stance_a})", argument_a))
|
|
yield format_transcript(state.transcript), "Collecting arguments...", [], "", ""
|
|
|
|
argument_b = debater_b.argue(state.topic)
|
|
state.transcript.append((f"Debater B ({state.stance_b})", argument_b))
|
|
yield format_transcript(state.transcript), "Judge deliberating...", [], "", ""
|
|
|
|
judge_result = judge.evaluate(state.topic, argument_a, argument_b)
|
|
verdict_text = (
|
|
f"Winner: {judge_result.get('winner', 'Unknown')}\nReason: "
|
|
f"{judge_result.get('reason', 'No explanation provided.')}"
|
|
)
|
|
score_rows = [
|
|
[
|
|
entry.get("criterion", ""),
|
|
entry.get("debater_a", ""),
|
|
entry.get("debater_b", ""),
|
|
entry.get("notes", ""),
|
|
]
|
|
for entry in judge_result.get("scores", [])
|
|
]
|
|
judge_report_md = (
|
|
f"**Judge Verdict:** {judge_result.get('winner', 'Unknown')}\n\n"
|
|
f"{judge_result.get('reason', '')}"
|
|
)
|
|
yield (
|
|
format_transcript(state.transcript),
|
|
judge_report_md,
|
|
score_rows,
|
|
verdict_text,
|
|
format_transcript(state.transcript),
|
|
)
|
|
|
|
reporter_summary = reporter.summarize(
|
|
state.topic, argument_a, argument_b, judge_result
|
|
)
|
|
|
|
final_markdown = (
|
|
f"{judge_report_md}\n\n---\n\n"
|
|
f"**Reporter Summary**\n\n{reporter_summary}"
|
|
)
|
|
yield (
|
|
format_transcript(state.transcript),
|
|
final_markdown,
|
|
score_rows,
|
|
verdict_text,
|
|
format_transcript(state.transcript),
|
|
)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Gradio Interface
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
with gr.Blocks(
|
|
title="LLM Debate Arena",
|
|
fill_width=True,
|
|
theme=THEME,
|
|
css=CUSTOM_CSS,
|
|
) as demo:
|
|
gr.Markdown(
|
|
"# 🔁 LLM Debate Arena\n"
|
|
"Configure two debating agents, watch their arguments in real time, and "
|
|
"review the judge's verdict plus a reporter summary."
|
|
)
|
|
|
|
with gr.Row():
|
|
topic_input = gr.Textbox(
|
|
label="Debate Topic",
|
|
placeholder="e.g., Should autonomous delivery robots be allowed in city centers?",
|
|
)
|
|
with gr.Row():
|
|
stance_a_input = gr.Textbox(
|
|
label="Debater A Stance",
|
|
value="Supports the statement",
|
|
)
|
|
stance_b_input = gr.Textbox(
|
|
label="Debater B Stance",
|
|
value="Opposes the statement",
|
|
)
|
|
|
|
run_button = gr.Button("Start Debate", variant="primary")
|
|
|
|
with gr.Tab("Live Debate"):
|
|
transcript_md = gr.Markdown(
|
|
"### Waiting for the debate to start...",
|
|
elem_id="live-debate-panel",
|
|
)
|
|
|
|
with gr.Tab("Judge's Report"):
|
|
judge_md = gr.Markdown("Judge verdict will appear here.")
|
|
score_table = gr.Dataframe(
|
|
headers=["Criterion", "Debater A", "Debater B", "Notes"],
|
|
datatype=["str", "number", "number", "str"],
|
|
interactive=False,
|
|
)
|
|
verdict_box = gr.Textbox(
|
|
label="Verdict Detail",
|
|
interactive=False,
|
|
)
|
|
transcript_box = gr.Textbox(
|
|
label="Full Transcript (for copying)",
|
|
interactive=False,
|
|
lines=10,
|
|
)
|
|
|
|
run_button.click(
|
|
fn=run_debate,
|
|
inputs=[topic_input, stance_a_input, stance_b_input],
|
|
outputs=[transcript_md, judge_md, score_table, verdict_box, transcript_box],
|
|
queue=True,
|
|
)
|
|
|
|
if __name__ == "__main__":
|
|
demo.queue(default_concurrency_limit=4).launch()
|