Deleted the old kiran-text-summarizer-gpt5mini notebook and added refactored versions for OpenAI GPT-5-mini and Ollama models under week1_assignments. Introduced a reusable scrape_website.py module for web scraping logic. Updated 'day2 EXERCISE.ipynb' to set execution counts and include output for code cells, improving reproducibility and clarity.
15 lines
567 B
Python
15 lines
567 B
Python
from bs4 import BeautifulSoup
|
|
import requests
|
|
|
|
|
|
class ScrapeWebsite:
|
|
|
|
def __init__(self, url, headers):
|
|
""" Scraping Website which provides title and content"""
|
|
self.url = url
|
|
response = requests.get(self.url, headers=headers)
|
|
soup = BeautifulSoup(response.content, 'html.parser')
|
|
self.title = soup.title.string if soup.title else "No title found"
|
|
for irrelevant in soup.body(["script", "style", "img", "input"]):
|
|
irrelevant.decompose()
|
|
self.text = soup.body.get_text(separator="\n", strip=True) |