Disabling SSL cert validation, and suppressing warnings. Fixes issue #217
This commit is contained in:
81
week1/community-contributions/day5-disable-ssl.ipynb
Normal file
81
week1/community-contributions/day5-disable-ssl.ipynb
Normal file
@@ -0,0 +1,81 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "a98030af-fcd1-4d63-a36e-38ba053498fa",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# A Small Tweak to Week1-Day5\n",
|
||||||
|
"\n",
|
||||||
|
"If you have network restrictions (such as using a custom DNS provider, or firewall rules at work), you can disable SSL cert verification.\n",
|
||||||
|
"Once you do that and start executing your code, the output will be riddled with warnings. Thankfully, you can suppress those warnings,too.\n",
|
||||||
|
"\n",
|
||||||
|
"See the 2 lines added to the init method, below."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 22,
|
||||||
|
"id": "106dd65e-90af-4ca8-86b6-23a41840645b",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# A class to represent a Webpage\n",
|
||||||
|
"\n",
|
||||||
|
"# Some websites need you to use proper headers when fetching them:\n",
|
||||||
|
"headers = {\n",
|
||||||
|
" \"User-Agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36\"\n",
|
||||||
|
"}\n",
|
||||||
|
"\n",
|
||||||
|
"class Website:\n",
|
||||||
|
" \"\"\"\n",
|
||||||
|
" A utility class to represent a Website that we have scraped, now with links\n",
|
||||||
|
" \"\"\"\n",
|
||||||
|
"\n",
|
||||||
|
" def __init__(self, url):\n",
|
||||||
|
" self.url = url\n",
|
||||||
|
"\n",
|
||||||
|
" #\n",
|
||||||
|
" # If you must disable SSL cert validation, and also suppress all the warning that will come with it,\n",
|
||||||
|
" # add the 2 lines below. This comes in very handy if you have DNS/firewall restrictions; alas, use\n",
|
||||||
|
" # with caution, especially if deploying this in a non-dev environment.\n",
|
||||||
|
" requests.packages.urllib3.disable_warnings() \n",
|
||||||
|
" response = requests.get(url, headers=headers, verify=False) \n",
|
||||||
|
" # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
|
||||||
|
" \n",
|
||||||
|
" self.body = response.content\n",
|
||||||
|
" soup = BeautifulSoup(self.body, 'html.parser')\n",
|
||||||
|
" self.title = soup.title.string if soup.title else \"No title found\"\n",
|
||||||
|
" if soup.body:\n",
|
||||||
|
" for irrelevant in soup.body([\"script\", \"style\", \"img\", \"input\"]):\n",
|
||||||
|
" irrelevant.decompose()\n",
|
||||||
|
" self.text = soup.body.get_text(separator=\"\\n\", strip=True)\n",
|
||||||
|
" else:\n",
|
||||||
|
" self.text = \"\"\n",
|
||||||
|
" links = [link.get('href') for link in soup.find_all('a')]\n",
|
||||||
|
" self.links = [link for link in links if link]"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3 (ipykernel)",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.11.11"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 5
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user