Minor refinements

This commit is contained in:
Edward Donner
2024-12-10 22:18:25 -05:00
parent fc0305bf81
commit 38c2317c0e
5 changed files with 15 additions and 5 deletions

View File

@@ -184,6 +184,11 @@
"# A class to represent a Webpage\n",
"# If you're not familiar with Classes, check out the \"Intermediate Python\" notebook\n",
"\n",
"# Some websites need you to use proper headers when fetching them:\n",
"headers = {\n",
" \"User-Agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36\"\n",
"}\n",
"\n",
"class Website:\n",
"\n",
" def __init__(self, url):\n",
@@ -191,7 +196,7 @@
" Create this Website object from the given url using the BeautifulSoup library\n",
" \"\"\"\n",
" self.url = url\n",
" response = requests.get(url)\n",
" response = requests.get(url, headers=headers)\n",
" soup = BeautifulSoup(response.content, 'html.parser')\n",
" self.title = soup.title.string if soup.title else \"No title found\"\n",
" for irrelevant in soup.body([\"script\", \"style\", \"img\", \"input\"]):\n",