diff --git a/week1/community-contributions/Playwright_Scrapping_Project/scraping_script.py b/week1/community-contributions/Playwright_Scrapping_Project/scraping_script.py new file mode 100644 index 0000000..7f9d619 --- /dev/null +++ b/week1/community-contributions/Playwright_Scrapping_Project/scraping_script.py @@ -0,0 +1,56 @@ +import os +import openai +from IPython.display import Markdown, display +from dotenv import load_dotenv +from playwright.sync_api import sync_playwright +from bs4 import BeautifulSoup + +load_dotenv() +openai.api_key = os.getenv("OPENAI_API_KEY") # Or set it directly + +def scrape_website(url): + # Code to scrape a website using Playwright + with sync_playwright() as p: + browser = p.chromium.launch(headless=True) + page = browser.new_page() + page.goto(url) + content = page.content() + browser.close() + return content + +def summarize_content(html_content): + #Get only the text parts of the webpage + soup = BeautifulSoup(html_content, 'html.parser') + summary_text = soup.get_text(separator=' ', strip=True) + # Code to summarize using OpenAI API + system_prompt = ("You summarize html content as markdown.") + user_prompt = ( + "You are a helpful assistant. Summarize the following HTML webpage content in markdown with simple terms:\n\n" + + summary_text + ) + response = openai.chat.completions.create( + model="gpt-4o", + messages=[{"role": "user", "content": user_prompt}] + ) + return response.choices[0].message.content + +def save_markdown(summary, filename="summary.md", url=None): + #Open the file summary.md + with open(filename, "w", encoding="utf-8") as f: + if url: + f.write(f"# Summary of [{url}]({url})\n\n") + else: + f.write("# Summary\n\n") + f.write(summary.strip()) + +# 4. Main Logic +def main(): + url = input("Enter the URL to summarize: ").strip() + html = scrape_website(url) + summary = summarize_content(html) + save_markdown(summary, filename="summary.md", url=url) + print("✅ Summary saved to summary.md") + +# 5. Entry Point +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/week1/community-contributions/Playwright_Scrapping_Project/summary.md b/week1/community-contributions/Playwright_Scrapping_Project/summary.md new file mode 100644 index 0000000..6aa8639 --- /dev/null +++ b/week1/community-contributions/Playwright_Scrapping_Project/summary.md @@ -0,0 +1,34 @@ +# Summary of [https://www.willwight.com/](https://www.willwight.com/) + +# Will Wight - New York Times Best-Selling Author + +### Overview +Will Wight is a renowned author known for the "Cradle" series, alongside other works like "The Last Horizon" and "The Traveler's Gate Trilogy." He combines humor and storytelling in his blog and engages actively with his readers. + +### Books +- **The Last Horizon**: Currently ongoing series. +- **Cradle**: A 12-book series, now complete. +- **The Traveler's Gate Trilogy**: Completed series. +- **The Elder Empire**: Consists of two trilogies with stories happening simultaneously, totaling 6 books. + +### Recent Highlights +- **The Pilot Release**: The fourth book in "The Last Horizon" series, celebrated on July 4th, 2025. The 26th book by Will, marking a milestone as his next book will be his 27th. +- **Barnes & Noble Success**: A significant achievement of getting Will's books stocked nationwide in Barnes & Noble, marking a breakthrough for indie publishing. + +### Blog Highlights +- Will shares personal anecdotes and behind-the-scenes insights into his creative process. +- A humorous tone is used, including whimsical stories about his life and writing challenges. +- Recent experiences at Epic Universe theme park with thoughts on its design and offerings. + +### Connect +- **Mailing List**: Over 15,000 fans subscribe to receive updates on new stories and releases. +- **Hidden Gnome Publishing**: The entity behind Will's publications, working to bring his books to wider audiences. + +### Extras +- **Merch**: Available for fans wanting to support and connect with Will's universe. +- **Podcast**: Offers sneak peeks, discussions, and insights into Will's works. + +### Humorous Note +Will humorously describes himself transforming into a "monstrous mongoose" during a full moon, adding a quirky touch to his persona. + +For more detailed information on books, blogs, and extras, visit Will's website and explore his engaging world of storytelling! \ No newline at end of file