215 lines
7.4 KiB
Python
215 lines
7.4 KiB
Python
# imports
|
||
import os, json, ast, pathlib
|
||
import requests
|
||
from dotenv import load_dotenv
|
||
from bs4 import BeautifulSoup
|
||
from openai import OpenAI
|
||
import traceback
|
||
from typing import List, Dict
|
||
from httpx import Timeout
|
||
|
||
|
||
# ---------- utils ----------
|
||
def openai_api_key_loader():
|
||
load_dotenv(dotenv_path=".env", override=True)
|
||
api_key = os.getenv('OPENAI_API_KEY')
|
||
if not api_key:
|
||
print("❌ No API key found. Please check your .env file.")
|
||
return False
|
||
if not api_key.startswith("sk-proj-"):
|
||
print("⚠️ API key found, but does not start with 'sk-proj-'. Check you're using the right one.")
|
||
return False
|
||
if api_key.strip() != api_key:
|
||
print("⚠️ API key has leading/trailing whitespace. Please clean it.")
|
||
return False
|
||
print("✅ API key found and looks good!")
|
||
return True
|
||
|
||
def ollama_installed_tags(base_url="http://localhost:11434"):
|
||
r = requests.get(f"{base_url}/api/tags", timeout=10)
|
||
r.raise_for_status()
|
||
return {m["name"] for m in r.json().get("models", [])}
|
||
|
||
def get_urls(file_name: str):
|
||
with open(f"{file_name}.txt", "r") as f:
|
||
content = f.read()
|
||
url_dict = ast.literal_eval(content) # expects a dict literal in the file
|
||
return url_dict
|
||
|
||
def text_from_url(url: str):
|
||
session = requests.Session()
|
||
session.headers.update({
|
||
"User-Agent": (
|
||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
|
||
"AppleWebKit/537.36 (KHTML, like Gecko) "
|
||
"Chrome/117.0.0.0 Safari/537.36"
|
||
)
|
||
})
|
||
resp = session.get(url, timeout=30)
|
||
resp.raise_for_status()
|
||
soup = BeautifulSoup(resp.content, 'html.parser')
|
||
|
||
title = soup.title.string.strip() if soup.title and soup.title.string else "No title found"
|
||
|
||
body = soup.body
|
||
if not body:
|
||
return title, ""
|
||
|
||
for irrelevant in body(["script", "style", "img", "input", "noscript"]):
|
||
irrelevant.decompose()
|
||
|
||
text = body.get_text(separator="\n", strip=True)
|
||
return title, text
|
||
|
||
# ---------- contestants (Ollama) ----------
|
||
def summarize_with_model(text: str, model: str, ollama_client: OpenAI) -> str:
|
||
clipped = text[:9000] # keep it modest for small models
|
||
messages = [
|
||
{"role": "system", "content": "You are a concise, faithful web summarizer."},
|
||
{"role": "user", "content": (
|
||
"Summarize the article below in 4–6 bullet points. "
|
||
"Be factual, avoid speculation, and do not add information not present in the text.\n\n"
|
||
f"=== ARTICLE START ===\n{clipped}\n=== ARTICLE END ==="
|
||
)}
|
||
]
|
||
stream = ollama_client.chat.completions.create(
|
||
model=model,
|
||
messages=messages,
|
||
temperature=0,
|
||
stream=True,
|
||
extra_body={"keep_alive": "30m", "num_ctx": 2048}
|
||
)
|
||
chunks = []
|
||
for event in stream:
|
||
delta = getattr(event.choices[0].delta, "content", None)
|
||
if delta:
|
||
chunks.append(delta)
|
||
return "".join(chunks).strip()
|
||
|
||
# ---------- judge (ChatGPT) ----------
|
||
JUDGE_MODEL = "gpt-4o-mini"
|
||
|
||
def judge_summaries(category: str, url: str, source_text: str, summaries: dict, judge_client: OpenAI) -> dict:
|
||
src = source_text[:12000]
|
||
judge_prompt = f"""
|
||
You are the referee in a web summarization contest.
|
||
|
||
Task:
|
||
1) Read the SOURCE ARTICLE (below).
|
||
2) Evaluate EACH SUMMARY on: Coverage, Accuracy/Faithfulness, Clarity/Organization, Conciseness.
|
||
3) Give a 0–5 integer SCORE for each model (5 best).
|
||
4) Brief rationale (1–2 sentences per model).
|
||
5) Choose a single WINNER (tie-break on accuracy then clarity).
|
||
|
||
Return STRICT JSON only with this schema:
|
||
{{
|
||
"category": "{category}",
|
||
"url": "{url}",
|
||
"scores": {{
|
||
"<model_name>": {{ "score": <0-5>, "rationale": "<1-2 sentences>" }}
|
||
}},
|
||
"winner": "<model_name>"
|
||
}}
|
||
|
||
SOURCE ARTICLE:
|
||
{src}
|
||
|
||
SUMMARIES:
|
||
"""
|
||
for m, s in summaries.items():
|
||
judge_prompt += f"\n--- {m} ---\n{s}\n"
|
||
|
||
messages = [
|
||
{"role": "system", "content": "You are a strict, reliable evaluation judge for summaries."},
|
||
{"role": "user", "content": judge_prompt}
|
||
]
|
||
resp = judge_client.chat.completions.create(
|
||
model=JUDGE_MODEL,
|
||
messages=messages,
|
||
response_format={"type": "json_object"},
|
||
temperature=0
|
||
)
|
||
content = resp.choices[0].message.content
|
||
try:
|
||
return json.loads(content)
|
||
except json.JSONDecodeError:
|
||
# fallback: wrap in an envelope if the model added extra text
|
||
start = content.find("{")
|
||
end = content.rfind("}")
|
||
return json.loads(content[start:end+1])
|
||
|
||
|
||
def run_battle(url_dict: Dict[str, str], ollama_client: OpenAI, judge_client: OpenAI, models: List[str]) -> List[dict]:
|
||
all_results = []
|
||
|
||
for category, url in url_dict.items():
|
||
title, text = text_from_url(url)
|
||
summaries = {}
|
||
|
||
for m in models:
|
||
try:
|
||
summaries[m] = summarize_with_model(text, m, ollama_client)
|
||
except Exception as e:
|
||
print(f"\n--- Error from {m} ---")
|
||
print(repr(e))
|
||
traceback.print_exc()
|
||
summaries[m] = f"[ERROR from {m}: {e}]"
|
||
|
||
clean_summaries = {m: s for m, s in summaries.items() if not s.startswith("[ERROR")}
|
||
verdict = judge_summaries(category, url, text, clean_summaries or summaries, judge_client)
|
||
|
||
all_results.append(verdict)
|
||
|
||
return all_results
|
||
|
||
def warmup(ollama_client: OpenAI, model: str):
|
||
try:
|
||
ollama_client.chat.completions.create(
|
||
model=model,
|
||
messages=[{"role": "user", "content": "OK"}],
|
||
temperature=0,
|
||
extra_body={"keep_alive": "30m"}
|
||
)
|
||
except Exception as e:
|
||
print(f"[warmup] {model}: {e}")
|
||
|
||
|
||
|
||
# ---------- main ----------
|
||
def main():
|
||
if not openai_api_key_loader():
|
||
return
|
||
|
||
# contestants (local Ollama)
|
||
ollama_client = OpenAI(
|
||
base_url="http://localhost:11434/v1",
|
||
api_key="ollama",
|
||
timeout=Timeout(300.0, connect=30.0) # generous read/connect timeouts
|
||
)
|
||
# judge (cloud OpenAI)
|
||
judge_client = OpenAI()
|
||
|
||
available = ollama_installed_tags()
|
||
desired = ["llama3.2:latest", "deepseek-r1:1.5b", "phi3:latest"] # keep here
|
||
models = [m for m in desired if m in available]
|
||
|
||
print("Available:", sorted(available))
|
||
print("Desired :", desired)
|
||
print("Running :", models)
|
||
|
||
if not models:
|
||
raise RuntimeError(f"No desired models installed. Have: {sorted(available)}")
|
||
|
||
url_dict = get_urls(file_name="urls")
|
||
|
||
|
||
for m in models:
|
||
warmup(ollama_client, m)
|
||
results = run_battle(url_dict, ollama_client, judge_client, models)
|
||
|
||
pathlib.Path("battle_results.json").write_text(json.dumps(results, indent=2), encoding="utf-8")
|
||
print(json.dumps(results, indent=2))
|
||
|
||
if __name__ == "__main__":
|
||
main()
|