diff --git a/week1/community-contributions/ag-w1d1-site-summary.py b/week1/community-contributions/ag-w1d1-site-summary.py new file mode 100644 index 0000000..02872d8 --- /dev/null +++ b/week1/community-contributions/ag-w1d1-site-summary.py @@ -0,0 +1,76 @@ +import os +import requests +from dotenv import load_dotenv +from bs4 import BeautifulSoup +from IPython.display import Markdown, display +from openai import OpenAI + +#Function to get API key for OpanAI from .env file +def get_api_key(): + load_dotenv(override=True) + api_key = os.getenv("OPENAI_API_KEY") + if not api_key: + print("No API Key found") + elif not api_key.startswith("sk-"): + print("Invalid API Key. Should start with sk-") + elif api_key.strip() != api_key: + print("Remove leading and trailing spaces fron the key") + else: + print("API Key found and looks good!") + return api_key + +#load API key and OpenAI class +api_key = get_api_key() +openai = OpenAI() + +#headers and class for website to summarize +headers = { + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36" +} +class Website: + def __init__(self, url): + self.url = url + response = requests.get(url, headers=headers) + soup = BeautifulSoup(response.content, 'html.parser') + self.title = soup.title.string if soup.title else "No title found" + for irrelevant in soup.body(["script", "style", "img", "input"]): + irrelevant.decompose() + self.text = soup.body.get_text(separator="\n", strip=True) + +#define prompts +system_prompt = "You are an assistant that analyzes the contents of a website \ +and provides a short summary, ignoring text that might be navigation related. \ +Respond in markdown." + +def user_prompt_for(website): + user_prompt = f"You are looking at a website titled {website.title}" + user_prompt += "\nThe contents of this website is as follows; \ +please provide a short summary of this website in markdown. \ +If it includes news or announcements, then summarize these too.\n\n" + user_prompt += website.text + return user_prompt + +#prepare message for use in OpenAI call +def messages_for(website): + return [ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": user_prompt_for(website)} + ] + +#define function to summarize a given website +def summarize(url): + website = Website(url) + response = openai.chat.completions.create( + model = "gpt-4o-mini", + messages = messages_for(website) + ) + return response.choices[0].message.content + +#function to display summary in markdown format +def display_summary(url): + summary = summarize(url) + display(Markdown(summary)) + print(summary) + +url = "https://edwarddonner.com" +display_summary(url) \ No newline at end of file