import os import requests from dotenv import load_dotenv from bs4 import BeautifulSoup from IPython.display import Markdown, display from openai import OpenAI #Function to get API key for OpanAI from .env file def get_api_key(): load_dotenv(override=True) api_key = os.getenv("OPENAI_API_KEY") if not api_key: print("No API Key found") elif not api_key.startswith("sk-"): print("Invalid API Key. Should start with sk-") elif api_key.strip() != api_key: print("Remove leading and trailing spaces fron the key") else: print("API Key found and looks good!") return api_key #load API key and OpenAI class api_key = get_api_key() openai = OpenAI() #headers and class for website to summarize headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36" } class Website: def __init__(self, url): self.url = url response = requests.get(url, headers=headers) soup = BeautifulSoup(response.content, 'html.parser') self.title = soup.title.string if soup.title else "No title found" for irrelevant in soup.body(["script", "style", "img", "input"]): irrelevant.decompose() self.text = soup.body.get_text(separator="\n", strip=True) #define prompts system_prompt = "You are an assistant that analyzes the contents of a website \ and provides a short summary, ignoring text that might be navigation related. \ Respond in markdown." def user_prompt_for(website): user_prompt = f"You are looking at a website titled {website.title}" user_prompt += "\nThe contents of this website is as follows; \ please provide a short summary of this website in markdown. \ If it includes news or announcements, then summarize these too.\n\n" user_prompt += website.text return user_prompt #prepare message for use in OpenAI call def messages_for(website): return [ {"role": "system", "content": system_prompt}, {"role": "user", "content": user_prompt_for(website)} ] #define function to summarize a given website def summarize(url): website = Website(url) response = openai.chat.completions.create( model = "gpt-4o-mini", messages = messages_for(website) ) return response.choices[0].message.content #function to display summary in markdown format def display_summary(url): summary = summarize(url) display(Markdown(summary)) print(summary) url = "https://edwarddonner.com" display_summary(url)