A python file for website summarization
This commit is contained in:
76
week1/community-contributions/ag-w1d1-site-summary.py
Normal file
76
week1/community-contributions/ag-w1d1-site-summary.py
Normal file
@@ -0,0 +1,76 @@
|
|||||||
|
import os
|
||||||
|
import requests
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
from IPython.display import Markdown, display
|
||||||
|
from openai import OpenAI
|
||||||
|
|
||||||
|
#Function to get API key for OpanAI from .env file
|
||||||
|
def get_api_key():
|
||||||
|
load_dotenv(override=True)
|
||||||
|
api_key = os.getenv("OPENAI_API_KEY")
|
||||||
|
if not api_key:
|
||||||
|
print("No API Key found")
|
||||||
|
elif not api_key.startswith("sk-"):
|
||||||
|
print("Invalid API Key. Should start with sk-")
|
||||||
|
elif api_key.strip() != api_key:
|
||||||
|
print("Remove leading and trailing spaces fron the key")
|
||||||
|
else:
|
||||||
|
print("API Key found and looks good!")
|
||||||
|
return api_key
|
||||||
|
|
||||||
|
#load API key and OpenAI class
|
||||||
|
api_key = get_api_key()
|
||||||
|
openai = OpenAI()
|
||||||
|
|
||||||
|
#headers and class for website to summarize
|
||||||
|
headers = {
|
||||||
|
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
|
||||||
|
}
|
||||||
|
class Website:
|
||||||
|
def __init__(self, url):
|
||||||
|
self.url = url
|
||||||
|
response = requests.get(url, headers=headers)
|
||||||
|
soup = BeautifulSoup(response.content, 'html.parser')
|
||||||
|
self.title = soup.title.string if soup.title else "No title found"
|
||||||
|
for irrelevant in soup.body(["script", "style", "img", "input"]):
|
||||||
|
irrelevant.decompose()
|
||||||
|
self.text = soup.body.get_text(separator="\n", strip=True)
|
||||||
|
|
||||||
|
#define prompts
|
||||||
|
system_prompt = "You are an assistant that analyzes the contents of a website \
|
||||||
|
and provides a short summary, ignoring text that might be navigation related. \
|
||||||
|
Respond in markdown."
|
||||||
|
|
||||||
|
def user_prompt_for(website):
|
||||||
|
user_prompt = f"You are looking at a website titled {website.title}"
|
||||||
|
user_prompt += "\nThe contents of this website is as follows; \
|
||||||
|
please provide a short summary of this website in markdown. \
|
||||||
|
If it includes news or announcements, then summarize these too.\n\n"
|
||||||
|
user_prompt += website.text
|
||||||
|
return user_prompt
|
||||||
|
|
||||||
|
#prepare message for use in OpenAI call
|
||||||
|
def messages_for(website):
|
||||||
|
return [
|
||||||
|
{"role": "system", "content": system_prompt},
|
||||||
|
{"role": "user", "content": user_prompt_for(website)}
|
||||||
|
]
|
||||||
|
|
||||||
|
#define function to summarize a given website
|
||||||
|
def summarize(url):
|
||||||
|
website = Website(url)
|
||||||
|
response = openai.chat.completions.create(
|
||||||
|
model = "gpt-4o-mini",
|
||||||
|
messages = messages_for(website)
|
||||||
|
)
|
||||||
|
return response.choices[0].message.content
|
||||||
|
|
||||||
|
#function to display summary in markdown format
|
||||||
|
def display_summary(url):
|
||||||
|
summary = summarize(url)
|
||||||
|
display(Markdown(summary))
|
||||||
|
print(summary)
|
||||||
|
|
||||||
|
url = "https://edwarddonner.com"
|
||||||
|
display_summary(url)
|
||||||
Reference in New Issue
Block a user