LLM_Engineering_OLD/week3/community-contributions/ai-web-summarizer/summarizer/fetcher.py

import requests
from bs4 import BeautifulSoup

def fetch_web_content(url):
    try:
        response = requests.get(url)
        response.raise_for_status()

        # Parse the HTML content
        soup = BeautifulSoup(response.text, 'html.parser')

        # Extract readable text from the web page (ignoring scripts, styles, etc.)
        page_text = soup.get_text(separator=' ', strip=True)

        return page_text[:5000]  # Limit to 5000 chars (API limitation)
    except requests.exceptions.RequestException as e:
        print(f"Error fetching the webpage: {e}")
        return None

if __name__ == "__main__":
    url = "https://en.wikipedia.org/wiki/Natural_language_processing"
    content = fetch_web_content(url)
    print(content[:500])  # Print a sample of the content