Files

215 lines
7.4 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# imports
import os, json, ast, pathlib
import requests
from dotenv import load_dotenv
from bs4 import BeautifulSoup
from openai import OpenAI
import traceback
from typing import List, Dict
from httpx import Timeout
# ---------- utils ----------
def openai_api_key_loader():
load_dotenv(dotenv_path=".env", override=True)
api_key = os.getenv('OPENAI_API_KEY')
if not api_key:
print("❌ No API key found. Please check your .env file.")
return False
if not api_key.startswith("sk-proj-"):
print("⚠️ API key found, but does not start with 'sk-proj-'. Check you're using the right one.")
return False
if api_key.strip() != api_key:
print("⚠️ API key has leading/trailing whitespace. Please clean it.")
return False
print("✅ API key found and looks good!")
return True
def ollama_installed_tags(base_url="http://localhost:11434"):
r = requests.get(f"{base_url}/api/tags", timeout=10)
r.raise_for_status()
return {m["name"] for m in r.json().get("models", [])}
def get_urls(file_name: str):
with open(f"{file_name}.txt", "r") as f:
content = f.read()
url_dict = ast.literal_eval(content) # expects a dict literal in the file
return url_dict
def text_from_url(url: str):
session = requests.Session()
session.headers.update({
"User-Agent": (
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
"AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/117.0.0.0 Safari/537.36"
)
})
resp = session.get(url, timeout=30)
resp.raise_for_status()
soup = BeautifulSoup(resp.content, 'html.parser')
title = soup.title.string.strip() if soup.title and soup.title.string else "No title found"
body = soup.body
if not body:
return title, ""
for irrelevant in body(["script", "style", "img", "input", "noscript"]):
irrelevant.decompose()
text = body.get_text(separator="\n", strip=True)
return title, text
# ---------- contestants (Ollama) ----------
def summarize_with_model(text: str, model: str, ollama_client: OpenAI) -> str:
clipped = text[:9000] # keep it modest for small models
messages = [
{"role": "system", "content": "You are a concise, faithful web summarizer."},
{"role": "user", "content": (
"Summarize the article below in 46 bullet points. "
"Be factual, avoid speculation, and do not add information not present in the text.\n\n"
f"=== ARTICLE START ===\n{clipped}\n=== ARTICLE END ==="
)}
]
stream = ollama_client.chat.completions.create(
model=model,
messages=messages,
temperature=0,
stream=True,
extra_body={"keep_alive": "30m", "num_ctx": 2048}
)
chunks = []
for event in stream:
delta = getattr(event.choices[0].delta, "content", None)
if delta:
chunks.append(delta)
return "".join(chunks).strip()
# ---------- judge (ChatGPT) ----------
JUDGE_MODEL = "gpt-4o-mini"
def judge_summaries(category: str, url: str, source_text: str, summaries: dict, judge_client: OpenAI) -> dict:
src = source_text[:12000]
judge_prompt = f"""
You are the referee in a web summarization contest.
Task:
1) Read the SOURCE ARTICLE (below).
2) Evaluate EACH SUMMARY on: Coverage, Accuracy/Faithfulness, Clarity/Organization, Conciseness.
3) Give a 05 integer SCORE for each model (5 best).
4) Brief rationale (12 sentences per model).
5) Choose a single WINNER (tie-break on accuracy then clarity).
Return STRICT JSON only with this schema:
{{
"category": "{category}",
"url": "{url}",
"scores": {{
"<model_name>": {{ "score": <0-5>, "rationale": "<1-2 sentences>" }}
}},
"winner": "<model_name>"
}}
SOURCE ARTICLE:
{src}
SUMMARIES:
"""
for m, s in summaries.items():
judge_prompt += f"\n--- {m} ---\n{s}\n"
messages = [
{"role": "system", "content": "You are a strict, reliable evaluation judge for summaries."},
{"role": "user", "content": judge_prompt}
]
resp = judge_client.chat.completions.create(
model=JUDGE_MODEL,
messages=messages,
response_format={"type": "json_object"},
temperature=0
)
content = resp.choices[0].message.content
try:
return json.loads(content)
except json.JSONDecodeError:
# fallback: wrap in an envelope if the model added extra text
start = content.find("{")
end = content.rfind("}")
return json.loads(content[start:end+1])
def run_battle(url_dict: Dict[str, str], ollama_client: OpenAI, judge_client: OpenAI, models: List[str]) -> List[dict]:
all_results = []
for category, url in url_dict.items():
title, text = text_from_url(url)
summaries = {}
for m in models:
try:
summaries[m] = summarize_with_model(text, m, ollama_client)
except Exception as e:
print(f"\n--- Error from {m} ---")
print(repr(e))
traceback.print_exc()
summaries[m] = f"[ERROR from {m}: {e}]"
clean_summaries = {m: s for m, s in summaries.items() if not s.startswith("[ERROR")}
verdict = judge_summaries(category, url, text, clean_summaries or summaries, judge_client)
all_results.append(verdict)
return all_results
def warmup(ollama_client: OpenAI, model: str):
try:
ollama_client.chat.completions.create(
model=model,
messages=[{"role": "user", "content": "OK"}],
temperature=0,
extra_body={"keep_alive": "30m"}
)
except Exception as e:
print(f"[warmup] {model}: {e}")
# ---------- main ----------
def main():
if not openai_api_key_loader():
return
# contestants (local Ollama)
ollama_client = OpenAI(
base_url="http://localhost:11434/v1",
api_key="ollama",
timeout=Timeout(300.0, connect=30.0) # generous read/connect timeouts
)
# judge (cloud OpenAI)
judge_client = OpenAI()
available = ollama_installed_tags()
desired = ["llama3.2:latest", "deepseek-r1:1.5b", "phi3:latest"] # keep here
models = [m for m in desired if m in available]
print("Available:", sorted(available))
print("Desired :", desired)
print("Running :", models)
if not models:
raise RuntimeError(f"No desired models installed. Have: {sorted(available)}")
url_dict = get_urls(file_name="urls")
for m in models:
warmup(ollama_client, m)
results = run_battle(url_dict, ollama_client, judge_client, models)
pathlib.Path("battle_results.json").write_text(json.dumps(results, indent=2), encoding="utf-8")
print(json.dumps(results, indent=2))
if __name__ == "__main__":
main()