# imports import os, json, ast, pathlib import requests from dotenv import load_dotenv from bs4 import BeautifulSoup from openai import OpenAI import traceback from typing import List, Dict from httpx import Timeout # ---------- utils ---------- def openai_api_key_loader(): load_dotenv(dotenv_path=".env", override=True) api_key = os.getenv('OPENAI_API_KEY') if not api_key: print("❌ No API key found. Please check your .env file.") return False if not api_key.startswith("sk-proj-"): print("⚠️ API key found, but does not start with 'sk-proj-'. Check you're using the right one.") return False if api_key.strip() != api_key: print("⚠️ API key has leading/trailing whitespace. Please clean it.") return False print("✅ API key found and looks good!") return True def ollama_installed_tags(base_url="http://localhost:11434"): r = requests.get(f"{base_url}/api/tags", timeout=10) r.raise_for_status() return {m["name"] for m in r.json().get("models", [])} def get_urls(file_name: str): with open(f"{file_name}.txt", "r") as f: content = f.read() url_dict = ast.literal_eval(content) # expects a dict literal in the file return url_dict def text_from_url(url: str): session = requests.Session() session.headers.update({ "User-Agent": ( "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " "AppleWebKit/537.36 (KHTML, like Gecko) " "Chrome/117.0.0.0 Safari/537.36" ) }) resp = session.get(url, timeout=30) resp.raise_for_status() soup = BeautifulSoup(resp.content, 'html.parser') title = soup.title.string.strip() if soup.title and soup.title.string else "No title found" body = soup.body if not body: return title, "" for irrelevant in body(["script", "style", "img", "input", "noscript"]): irrelevant.decompose() text = body.get_text(separator="\n", strip=True) return title, text # ---------- contestants (Ollama) ---------- def summarize_with_model(text: str, model: str, ollama_client: OpenAI) -> str: clipped = text[:9000] # keep it modest for small models messages = [ {"role": "system", "content": "You are a concise, faithful web summarizer."}, {"role": "user", "content": ( "Summarize the article below in 4–6 bullet points. " "Be factual, avoid speculation, and do not add information not present in the text.\n\n" f"=== ARTICLE START ===\n{clipped}\n=== ARTICLE END ===" )} ] stream = ollama_client.chat.completions.create( model=model, messages=messages, temperature=0, stream=True, extra_body={"keep_alive": "30m", "num_ctx": 2048} ) chunks = [] for event in stream: delta = getattr(event.choices[0].delta, "content", None) if delta: chunks.append(delta) return "".join(chunks).strip() # ---------- judge (ChatGPT) ---------- JUDGE_MODEL = "gpt-4o-mini" def judge_summaries(category: str, url: str, source_text: str, summaries: dict, judge_client: OpenAI) -> dict: src = source_text[:12000] judge_prompt = f""" You are the referee in a web summarization contest. Task: 1) Read the SOURCE ARTICLE (below). 2) Evaluate EACH SUMMARY on: Coverage, Accuracy/Faithfulness, Clarity/Organization, Conciseness. 3) Give a 0–5 integer SCORE for each model (5 best). 4) Brief rationale (1–2 sentences per model). 5) Choose a single WINNER (tie-break on accuracy then clarity). Return STRICT JSON only with this schema: {{ "category": "{category}", "url": "{url}", "scores": {{ "": {{ "score": <0-5>, "rationale": "<1-2 sentences>" }} }}, "winner": "" }} SOURCE ARTICLE: {src} SUMMARIES: """ for m, s in summaries.items(): judge_prompt += f"\n--- {m} ---\n{s}\n" messages = [ {"role": "system", "content": "You are a strict, reliable evaluation judge for summaries."}, {"role": "user", "content": judge_prompt} ] resp = judge_client.chat.completions.create( model=JUDGE_MODEL, messages=messages, response_format={"type": "json_object"}, temperature=0 ) content = resp.choices[0].message.content try: return json.loads(content) except json.JSONDecodeError: # fallback: wrap in an envelope if the model added extra text start = content.find("{") end = content.rfind("}") return json.loads(content[start:end+1]) def run_battle(url_dict: Dict[str, str], ollama_client: OpenAI, judge_client: OpenAI, models: List[str]) -> List[dict]: all_results = [] for category, url in url_dict.items(): title, text = text_from_url(url) summaries = {} for m in models: try: summaries[m] = summarize_with_model(text, m, ollama_client) except Exception as e: print(f"\n--- Error from {m} ---") print(repr(e)) traceback.print_exc() summaries[m] = f"[ERROR from {m}: {e}]" clean_summaries = {m: s for m, s in summaries.items() if not s.startswith("[ERROR")} verdict = judge_summaries(category, url, text, clean_summaries or summaries, judge_client) all_results.append(verdict) return all_results def warmup(ollama_client: OpenAI, model: str): try: ollama_client.chat.completions.create( model=model, messages=[{"role": "user", "content": "OK"}], temperature=0, extra_body={"keep_alive": "30m"} ) except Exception as e: print(f"[warmup] {model}: {e}") # ---------- main ---------- def main(): if not openai_api_key_loader(): return # contestants (local Ollama) ollama_client = OpenAI( base_url="http://localhost:11434/v1", api_key="ollama", timeout=Timeout(300.0, connect=30.0) # generous read/connect timeouts ) # judge (cloud OpenAI) judge_client = OpenAI() available = ollama_installed_tags() desired = ["llama3.2:latest", "deepseek-r1:1.5b", "phi3:latest"] # keep here models = [m for m in desired if m in available] print("Available:", sorted(available)) print("Desired :", desired) print("Running :", models) if not models: raise RuntimeError(f"No desired models installed. Have: {sorted(available)}") url_dict = get_urls(file_name="urls") for m in models: warmup(ollama_client, m) results = run_battle(url_dict, ollama_client, judge_client, models) pathlib.Path("battle_results.json").write_text(json.dumps(results, indent=2), encoding="utf-8") print(json.dumps(results, indent=2)) if __name__ == "__main__": main()