Add community contribution: Wikipedia Summarizer Battle (Ollama + OpenAI)

This commit is contained in:
khashayarbayati1
2025-09-25 17:11:08 -04:00
parent 3286cfb395
commit eb4df28e34
5 changed files with 508 additions and 0 deletions

View File

@@ -0,0 +1,214 @@
# imports
import os, json, ast, pathlib
import requests
from dotenv import load_dotenv
from bs4 import BeautifulSoup
from openai import OpenAI
import traceback
from typing import List, Dict
from httpx import Timeout
# ---------- utils ----------
def openai_api_key_loader():
load_dotenv(dotenv_path=".env", override=True)
api_key = os.getenv('OPENAI_API_KEY')
if not api_key:
print("❌ No API key found. Please check your .env file.")
return False
if not api_key.startswith("sk-proj-"):
print("⚠️ API key found, but does not start with 'sk-proj-'. Check you're using the right one.")
return False
if api_key.strip() != api_key:
print("⚠️ API key has leading/trailing whitespace. Please clean it.")
return False
print("✅ API key found and looks good!")
return True
def ollama_installed_tags(base_url="http://localhost:11434"):
r = requests.get(f"{base_url}/api/tags", timeout=10)
r.raise_for_status()
return {m["name"] for m in r.json().get("models", [])}
def get_urls(file_name: str):
with open(f"{file_name}.txt", "r") as f:
content = f.read()
url_dict = ast.literal_eval(content) # expects a dict literal in the file
return url_dict
def text_from_url(url: str):
session = requests.Session()
session.headers.update({
"User-Agent": (
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
"AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/117.0.0.0 Safari/537.36"
)
})
resp = session.get(url, timeout=30)
resp.raise_for_status()
soup = BeautifulSoup(resp.content, 'html.parser')
title = soup.title.string.strip() if soup.title and soup.title.string else "No title found"
body = soup.body
if not body:
return title, ""
for irrelevant in body(["script", "style", "img", "input", "noscript"]):
irrelevant.decompose()
text = body.get_text(separator="\n", strip=True)
return title, text
# ---------- contestants (Ollama) ----------
def summarize_with_model(text: str, model: str, ollama_client: OpenAI) -> str:
clipped = text[:9000] # keep it modest for small models
messages = [
{"role": "system", "content": "You are a concise, faithful web summarizer."},
{"role": "user", "content": (
"Summarize the article below in 46 bullet points. "
"Be factual, avoid speculation, and do not add information not present in the text.\n\n"
f"=== ARTICLE START ===\n{clipped}\n=== ARTICLE END ==="
)}
]
stream = ollama_client.chat.completions.create(
model=model,
messages=messages,
temperature=0,
stream=True,
extra_body={"keep_alive": "30m", "num_ctx": 2048}
)
chunks = []
for event in stream:
delta = getattr(event.choices[0].delta, "content", None)
if delta:
chunks.append(delta)
return "".join(chunks).strip()
# ---------- judge (ChatGPT) ----------
JUDGE_MODEL = "gpt-4o-mini"
def judge_summaries(category: str, url: str, source_text: str, summaries: dict, judge_client: OpenAI) -> dict:
src = source_text[:12000]
judge_prompt = f"""
You are the referee in a web summarization contest.
Task:
1) Read the SOURCE ARTICLE (below).
2) Evaluate EACH SUMMARY on: Coverage, Accuracy/Faithfulness, Clarity/Organization, Conciseness.
3) Give a 05 integer SCORE for each model (5 best).
4) Brief rationale (12 sentences per model).
5) Choose a single WINNER (tie-break on accuracy then clarity).
Return STRICT JSON only with this schema:
{{
"category": "{category}",
"url": "{url}",
"scores": {{
"<model_name>": {{ "score": <0-5>, "rationale": "<1-2 sentences>" }}
}},
"winner": "<model_name>"
}}
SOURCE ARTICLE:
{src}
SUMMARIES:
"""
for m, s in summaries.items():
judge_prompt += f"\n--- {m} ---\n{s}\n"
messages = [
{"role": "system", "content": "You are a strict, reliable evaluation judge for summaries."},
{"role": "user", "content": judge_prompt}
]
resp = judge_client.chat.completions.create(
model=JUDGE_MODEL,
messages=messages,
response_format={"type": "json_object"},
temperature=0
)
content = resp.choices[0].message.content
try:
return json.loads(content)
except json.JSONDecodeError:
# fallback: wrap in an envelope if the model added extra text
start = content.find("{")
end = content.rfind("}")
return json.loads(content[start:end+1])
def run_battle(url_dict: Dict[str, str], ollama_client: OpenAI, judge_client: OpenAI, models: List[str]) -> List[dict]:
all_results = []
for category, url in url_dict.items():
title, text = text_from_url(url)
summaries = {}
for m in models:
try:
summaries[m] = summarize_with_model(text, m, ollama_client)
except Exception as e:
print(f"\n--- Error from {m} ---")
print(repr(e))
traceback.print_exc()
summaries[m] = f"[ERROR from {m}: {e}]"
clean_summaries = {m: s for m, s in summaries.items() if not s.startswith("[ERROR")}
verdict = judge_summaries(category, url, text, clean_summaries or summaries, judge_client)
all_results.append(verdict)
return all_results
def warmup(ollama_client: OpenAI, model: str):
try:
ollama_client.chat.completions.create(
model=model,
messages=[{"role": "user", "content": "OK"}],
temperature=0,
extra_body={"keep_alive": "30m"}
)
except Exception as e:
print(f"[warmup] {model}: {e}")
# ---------- main ----------
def main():
if not openai_api_key_loader():
return
# contestants (local Ollama)
ollama_client = OpenAI(
base_url="http://localhost:11434/v1",
api_key="ollama",
timeout=Timeout(300.0, connect=30.0) # generous read/connect timeouts
)
# judge (cloud OpenAI)
judge_client = OpenAI()
available = ollama_installed_tags()
desired = ["llama3.2:latest", "deepseek-r1:1.5b", "phi3:latest"] # keep here
models = [m for m in desired if m in available]
print("Available:", sorted(available))
print("Desired :", desired)
print("Running :", models)
if not models:
raise RuntimeError(f"No desired models installed. Have: {sorted(available)}")
url_dict = get_urls(file_name="urls")
for m in models:
warmup(ollama_client, m)
results = run_battle(url_dict, ollama_client, judge_client, models)
pathlib.Path("battle_results.json").write_text(json.dumps(results, indent=2), encoding="utf-8")
print(json.dumps(results, indent=2))
if __name__ == "__main__":
main()