Disabling SSL cert validation, and suppressing warnings. Fixes issue #217

This commit is contained in:
David La Motta
2025-02-26 14:25:11 -05:00
parent 8ffd3a523f
commit 9d010328ef

View File

@@ -0,0 +1,81 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "a98030af-fcd1-4d63-a36e-38ba053498fa",
"metadata": {},
"source": [
"# A Small Tweak to Week1-Day5\n",
"\n",
"If you have network restrictions (such as using a custom DNS provider, or firewall rules at work), you can disable SSL cert verification.\n",
"Once you do that and start executing your code, the output will be riddled with warnings. Thankfully, you can suppress those warnings,too.\n",
"\n",
"See the 2 lines added to the init method, below."
]
},
{
"cell_type": "code",
"execution_count": 22,
"id": "106dd65e-90af-4ca8-86b6-23a41840645b",
"metadata": {},
"outputs": [],
"source": [
"# A class to represent a Webpage\n",
"\n",
"# Some websites need you to use proper headers when fetching them:\n",
"headers = {\n",
" \"User-Agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36\"\n",
"}\n",
"\n",
"class Website:\n",
" \"\"\"\n",
" A utility class to represent a Website that we have scraped, now with links\n",
" \"\"\"\n",
"\n",
" def __init__(self, url):\n",
" self.url = url\n",
"\n",
" #\n",
" # If you must disable SSL cert validation, and also suppress all the warning that will come with it,\n",
" # add the 2 lines below. This comes in very handy if you have DNS/firewall restrictions; alas, use\n",
" # with caution, especially if deploying this in a non-dev environment.\n",
" requests.packages.urllib3.disable_warnings() \n",
" response = requests.get(url, headers=headers, verify=False) \n",
" # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
" \n",
" self.body = response.content\n",
" soup = BeautifulSoup(self.body, 'html.parser')\n",
" self.title = soup.title.string if soup.title else \"No title found\"\n",
" if soup.body:\n",
" for irrelevant in soup.body([\"script\", \"style\", \"img\", \"input\"]):\n",
" irrelevant.decompose()\n",
" self.text = soup.body.get_text(separator=\"\\n\", strip=True)\n",
" else:\n",
" self.text = \"\"\n",
" links = [link.get('href') for link in soup.find_all('a')]\n",
" self.links = [link for link in links if link]"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.11"
}
},
"nbformat": 4,
"nbformat_minor": 5
}