From 967b1c4101db91cf6023b89e671fb386b1d08d2a Mon Sep 17 00:00:00 2001 From: vinitparak Date: Tue, 16 Sep 2025 11:30:42 -0500 Subject: [PATCH] Added my contributions to community-contributions --- .../URLScrapping-linkscrapping.ipynb | 333 ++++ .../firstpage_summary.csv | 1355 +++++++++++++++++ .../website-url-scrapping-csv/requirement.txt | 6 + 3 files changed, 1694 insertions(+) create mode 100644 week1/community-contributions/website-url-scrapping-csv/URLScrapping-linkscrapping.ipynb create mode 100644 week1/community-contributions/website-url-scrapping-csv/firstpage_summary.csv create mode 100644 week1/community-contributions/website-url-scrapping-csv/requirement.txt diff --git a/week1/community-contributions/website-url-scrapping-csv/URLScrapping-linkscrapping.ipynb b/week1/community-contributions/website-url-scrapping-csv/URLScrapping-linkscrapping.ipynb new file mode 100644 index 0000000..3039660 --- /dev/null +++ b/week1/community-contributions/website-url-scrapping-csv/URLScrapping-linkscrapping.ipynb @@ -0,0 +1,333 @@ +{ + "cells": [ + { + "cell_type": "raw", + "id": "c6227d68-b1f4-4f71-9cc6-18aa3ce54209", + "metadata": {}, + "source": [ + "# First‑Page URL Summarizer (OpenAI)\n", + "\n", + "#This notebook does not crawl a whole site. It only fetches the first page for each provided URL and asks OpenAI to summarize it.\n", + "\n", + "### What it does\n", + "Loads a list of URLs (provided inline or from a file)\n", + "Fetches each page with `aiohttp` (HTML only)\n", + "Extracts text via BeautifulSoup (basic)\n", + "Calls OpenAI to produce a structured JSON summary\n", + "Exports a CSV with: url, http_status, title, meta_description, summary, category, key_entities\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0b0fe0e9-228e-461b-9a3e-f4392974c974", + "metadata": {}, + "outputs": [], + "source": [ + "# (Optional) If running locally, install deps here\n", + "import sys, subprocess\n", + "def pip_install(pkgs):\n", + " subprocess.check_call([sys.executable, \"-m\", \"pip\", \"install\", *pkgs])\n", + "\n", + "pkgs = [\n", + " \"aiohttp>=3.10\",\n", + " \"beautifulsoup4>=4.12\",\n", + " \"lxml>=5.2\",\n", + " \"pandas>=2.2\",\n", + " \"python-dotenv>=1.0\",\n", + " \"openai>=1.51\",\n", + "]\n", + "try:\n", + " import aiohttp, bs4, lxml, pandas, dotenv, openai\n", + "except Exception:\n", + " pip_install(pkgs)\n", + "print(\"Ready ✔\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "86134741-0f8c-4049-894c-f31b27701da8", + "metadata": {}, + "outputs": [], + "source": [ + "import os, asyncio, aiohttp, pandas as pd\n", + "from bs4 import BeautifulSoup\n", + "from urllib.parse import urlparse\n", + "from dotenv import load_dotenv\n", + "from openai import OpenAI\n", + "\n", + "load_dotenv() # reads .env if present\n", + "OPENAI_API_KEY = os.getenv(\"OPENAI_API_KEY\")\n", + "MODEL = os.getenv(\"OPENAI_DEFAULT_MODEL\", \"gpt-4.1-mini\")\n", + "if not OPENAI_API_KEY:\n", + " print(\"Set OPENAI_API_KEY in .env or environment.\")\n", + "client = OpenAI(api_key=OPENAI_API_KEY)\n", + "\n", + "DEFAULT_HEADERS = {\"User-Agent\": \"FirstPageSummarizer/1.0 (+https://edwarddonner.com\"}" + ] + }, + { + "cell_type": "raw", + "id": "b96c4ed0-4c50-4347-8cc4-22ea21e7e483", + "metadata": {}, + "source": [ + "## 1) Provide URLs\n", + "You can paste a small list below, or set `URLS_FILE` to a text/CSV file containing URLs (one per line or in a column named `url`)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7ce4aef5-8df8-4f47-91b3-c3ecc7c4c8be", + "metadata": {}, + "outputs": [], + "source": [ + "URLS_INLINE = [\n", + " \"https://edwarddonner.com\"\n", + "]\n", + "URLS_FILE = None # e.g., \"urls.txt\" or \"urls.csv\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ba9f6f25-a04c-44fe-a16c-f7b5c47ed100", + "metadata": {}, + "outputs": [], + "source": [ + "import csv\n", + "def load_urls(urls_inline, urls_file):\n", + " urls = []\n", + " if urls_file and os.path.exists(urls_file):\n", + " if urls_file.endswith(\".csv\"):\n", + " df = pd.read_csv(urls_file)\n", + " if \"url\" in df.columns:\n", + " urls.extend(df[\"url\"].dropna().tolist())\n", + " else:\n", + " with open(urls_file, \"r\", encoding=\"utf-8\") as f:\n", + " for line in f:\n", + " line=line.strip()\n", + " if line:\n", + " urls.append(line)\n", + " urls.extend([u for u in urls_inline if u])\n", + " # de-dup while preserving order\n", + " seen=set(); out=[]\n", + " for u in urls:\n", + " if u not in seen:\n", + " seen.add(u); out.append(u)\n", + " return out\n", + "\n", + "URLS = load_urls(URLS_INLINE, URLS_FILE)\n", + "print(f\"Loaded {len(URLS)} URLs\")" + ] + }, + { + "cell_type": "raw", + "id": "bb3761f0-3684-4f30-92e9-869fd4556529", + "metadata": {}, + "source": [ + "## 2) Fetch first page HTML only\n", + "This grabs the main HTML and extracts simple metadata and body text. No link following." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7a7582b6-8277-4967-9d98-8cceeeab486d", + "metadata": {}, + "outputs": [], + "source": [ + "from aiohttp import ClientTimeout\n", + "from bs4 import BeautifulSoup\n", + "try:\n", + " from bs4 import FeatureNotFound\n", + "except Exception:\n", + " class FeatureNotFound(Exception):\n", + " ...\n", + "\n", + "DEFAULT_HEADERS = {\"User-Agent\": \"FirstPageSummarizer/1.0 (+https://edwarddonner.com)\"}\n", + "\n", + "async def fetch_one(session, url):\n", + " \"\"\"Fetch just one page (HTML if available).\"\"\"\n", + " try:\n", + " async with session.get(\n", + " url,\n", + " timeout=ClientTimeout(total=20),\n", + " headers=DEFAULT_HEADERS,\n", + " allow_redirects=True\n", + " ) as r:\n", + " ctype = r.headers.get(\"Content-Type\", \"\") or \"\"\n", + " is_html = \"html\" in ctype.lower()\n", + " text = await r.text(errors=\"ignore\") if is_html else \"\"\n", + " return {\n", + " \"url\": str(r.url),\n", + " \"status\": r.status,\n", + " \"content_type\": ctype,\n", + " \"html\": text,\n", + " }\n", + " except Exception as e:\n", + " return {\"url\": url, \"status\": None, \"content_type\": \"\", \"html\": \"\", \"error\": str(e)}\n", + "\n", + "def make_soup(html: str) -> BeautifulSoup:\n", + " \"\"\"Try lxml parser first, fall back to built-in html.parser if missing.\"\"\"\n", + " try:\n", + " return BeautifulSoup(html, \"lxml\")\n", + " except FeatureNotFound:\n", + " return BeautifulSoup(html, \"html.parser\")\n", + "\n", + "def extract_fields(url, html):\n", + " \"\"\"Extract title, meta description, and text from HTML.\"\"\"\n", + " soup = make_soup(html)\n", + " title = soup.title.string.strip() if soup.title and soup.title.string else \"\"\n", + "\n", + " meta_desc = \"\"\n", + " m = soup.find(\"meta\", attrs={\"name\": \"description\"})\n", + " if m and m.get(\"content\"):\n", + " meta_desc = m[\"content\"].strip()\n", + "\n", + " for tag in soup([\"script\", \"style\", \"noscript\"]):\n", + " tag.decompose()\n", + "\n", + " text = soup.get_text(\" \", strip=True)\n", + " text = text[:8000] # truncate to limit token size\n", + " return title, meta_desc, text\n", + "\n", + "async def fetch_all(urls):\n", + " \"\"\"Fetch and extract fields for a list of URLs (first page only).\"\"\"\n", + " import aiohttp\n", + " out = []\n", + " async with aiohttp.ClientSession() as session:\n", + " for u in urls:\n", + " resp = await fetch_one(session, u)\n", + " if resp.get(\"html\"):\n", + " title, meta_desc, text = extract_fields(resp[\"url\"], resp[\"html\"])\n", + " resp.update({\"title\": title, \"meta_description\": meta_desc, \"text\": text})\n", + " out.append(resp)\n", + " return out\n", + "\n", + "# Example usage in notebook (if URLS is defined):\n", + "# results = await fetch_all(URLS)\n", + "# len(results), results[:1]\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d30a3c6d-b208-4d6b-a5ea-e4276935a629", + "metadata": {}, + "outputs": [], + "source": [ + "URLS = [\"https://edwarddonner.com\", \"https://www.wikipedia.org/\"]\n", + "results = await fetch_all(URLS)\n", + "len(results), results[:1]\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b2a53f08-4374-4125-9de8-6e1060e31200", + "metadata": {}, + "outputs": [], + "source": [ + "import os, json\n", + "from dotenv import load_dotenv\n", + "from openai import OpenAI\n", + "\n", + "load_dotenv()\n", + "client = OpenAI(api_key=os.getenv(\"OPENAI_API_KEY\"))\n", + "MODEL = os.getenv(\"OPENAI_DEFAULT_MODEL\", \"gpt-4.1-mini\")\n", + "\n", + "SYSTEM_PROMPT = \"\"\"\n", + "You summarize a web page for migration planning. \n", + "Return JSON with:\n", + "- title: short page title\n", + "- meta_description: concise (<= 160 chars)\n", + "- summary: 3-5 bullet points as a single string\n", + "- category: one of [blog, docs, product, pricing, careers, marketing, legal, support, account, other]\n", + "- key_entities: array of 3-8 important entities/keywords\n", + "\"\"\"\n", + "\n", + "def summarize_page(row):\n", + " user = (\n", + " f\"URL: {row['url']}\\n\"\n", + " f\"{row.get('title','')}\\n\"\n", + " f\"{row.get('meta_description','')}\\n\"\n", + " f\"\\n{row.get('text','')[:6000]}\\n\"\n", + " )\n", + " resp = client.responses.create(\n", + " model=MODEL,\n", + " input=[\n", + " {\"role\": \"system\", \"content\": SYSTEM_PROMPT},\n", + " {\"role\": \"user\", \"content\": user},\n", + " ],\n", + " response_format={\"type\": \"json_object\"}\n", + " )\n", + " return json.loads(resp.output[0].content[0].text)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "59f7d992-e7f0-4287-bd19-f8062fefe8c3", + "metadata": {}, + "outputs": [], + "source": [ + "enriched = []\n", + "for r in results:\n", + " if r.get(\"status\") and 200 <= r[\"status\"] < 400 and \"html\" in r.get(\"content_type\",\"\").lower():\n", + " try:\n", + " data = summarize_page(r)\n", + " enriched.append({**r, **data})\n", + " except Exception as e:\n", + " enriched.append({**r, \"error\": str(e)})\n", + " else:\n", + " enriched.append({**r, \"error\": \"Non-HTML or bad status\"})\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "822d8108-64c2-4cf1-abc5-1acd288b7574", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "\n", + "df = pd.DataFrame(enriched)\n", + "df.to_csv(\"firstpage_summary.csv\", index=False)\n", + "df.head()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0f05d05c-bf6d-4236-8767-8695e4d4618f", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.13" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/week1/community-contributions/website-url-scrapping-csv/firstpage_summary.csv b/week1/community-contributions/website-url-scrapping-csv/firstpage_summary.csv new file mode 100644 index 0000000..a21bd0b --- /dev/null +++ b/week1/community-contributions/website-url-scrapping-csv/firstpage_summary.csv @@ -0,0 +1,1355 @@ +url,status,content_type,html,title,meta_description,text,error +https://edwarddonner.com,200,text/html; charset=UTF-8," + + + + + + + + + + + + + + + + + + + + + + + +Home - Edward Donner + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+
+ + +
+
+ + + +
+
+
+ + +
+
+
+ + +

Well, hi there.

+ + + +

I’m Ed. I like writing code and experimenting with LLMs, and hopefully you’re here because you do too. I also enjoy DJing (but I’m badly out of practice), amateur electronic music production (very amateur) and losing myself in Hacker News, nodding my head sagely to things I only half understand.

+ + + +

I’m the co-founder and CTO of Nebula.io. We’re applying AI to a field where it can make a massive, positive impact: helping people discover their potential and pursue their reason for being. Recruiters use our product today to source, understand, engage and manage talent. I’m previously the founder and CEO of AI startup untapt, acquired in 2021.

+ + + +

We work with groundbreaking, proprietary LLMs verticalized for talent, we’ve patented our matching model, and our award-winning platform has happy customers and tons of press coverage. Connect with me for more!

+ + + +
+
+
+
+
+ + + +
+
+
+

Navigation

+ + +
+ + + +
+

Get in touch

+ + + +
+

ed [at] edwarddonner [dot] com

+ + + +

www.edwarddonner.com

+
+
+ + + +
+

Follow me

+ + + + +
+ + + +
+

Subscribe to newsletter

+ + +
+
+
+
+

+ +

+

+ + + + + + + + +

+
+
+
+
+
+
+
+
+ + + + + + + + + + + +",Home - Edward Donner,,"Home - Edward Donner Home Connect Four Outsmart An arena that pits LLMs against each other in a battle of diplomacy and deviousness About Posts Well, hi there. I’m Ed. I like writing code and experimenting with LLMs, and hopefully you’re here because you do too. I also enjoy DJing (but I’m badly out of practice), amateur electronic music production ( very amateur) and losing myself in Hacker News , nodding my head sagely to things I only half understand. I’m the co-founder and CTO of Nebula.io . We’re applying AI to a field where it can make a massive, positive impact: helping people discover their potential and pursue their reason for being. Recruiters use our product today to source, understand, engage and manage talent. I’m previously the founder and CEO of AI startup untapt, acquired in 2021 . We work with groundbreaking, proprietary LLMs verticalized for talent, we’ve patented our matching model, and our award-winning platform has happy customers and tons of press coverage. Connect with me for more! September 15, 2025 AI in Production: Gen AI and Agentic AI on AWS at scale May 28, 2025 Connecting my courses – become an LLM expert and leader May 18, 2025 2025 AI Executive Briefing April 21, 2025 The Complete Agentic AI Engineering Course Navigation Home Connect Four Outsmart An arena that pits LLMs against each other in a battle of diplomacy and deviousness About Posts Get in touch ed [at] edwarddonner [dot] com www.edwarddonner.com Follow me LinkedIn Twitter Facebook Subscribe to newsletter Type your email… Subscribe",Responses.create() got an unexpected keyword argument 'response_format' +https://www.wikipedia.org/,200,text/html," + + + +Wikipedia + + + + + + + + + + + + + + + + + +
+
+ +

+ +Wikipedia + +The Free Encyclopedia +

+
+ +
+
+
+ +
+ + +
+
+ +
+
+ +
+
+ + +
+
+
+ +
+
+ + + + + + + +",Wikipedia,"Wikipedia is a free online encyclopedia, created and edited by volunteers around the world and hosted by the Wikimedia Foundation.","Wikipedia Wikipedia The Free Encyclopedia English 7,056,000+ articles 日本語 1,472,000+ 記事 Русский 2 063 000+ статей Deutsch 3.051.000+ Artikel Français 2 709 000+ articles Español 2.061.000+ artículos 中文 1,500,000+ 条目 / 條目 Italiano 1.935.000+ voci Polski 1 668 000+ haseł Português 1.155.000+ artigos Search Wikipedia Afrikaans Shqip العربية Asturianu Azərbaycanca Български 閩南語 / Bân-lâm-gú বাংলা Беларуская Català Čeština Cymraeg Dansk Deutsch Eesti Ελληνικά English Español Esperanto Euskara فارسی Français Galego 한국어 Հայերեն हिन्दी Hrvatski Bahasa Indonesia Italiano עברית ქართული Ladin Latina Latviešu Lietuvių Magyar Македонски Malagasy मराठी مصرى Bahasa Melayu Bahaso Minangkabau မြန်မာဘာသာ Nederlands 日本語 Norsk (bokmål) Norsk (nynorsk) Нохчийн Oʻzbekcha / Ўзбекча Polski Português Қазақша / Qazaqşa / قازاقشا Română Simple English Sinugboanong Binisaya Slovenčina Slovenščina Српски / Srpski Srpskohrvatski / Српскохрватски Suomi Svenska Kiswahili தமிழ் Татарча / Tatarça తెలుగు ภาษาไทย Тоҷикӣ تۆرکجه Türkçe Українська اردو Tiếng Việt Winaray 中文 Русский 粵語 Search Read Wikipedia in your language 1,000,000+ articles العربية Deutsch English Español فارسی Français Italiano مصرى Nederlands 日本語 Polski Português Sinugboanong Binisaya Svenska Українська Tiếng Việt Winaray 中文 Русский 100,000+ articles Afrikaans Shqip Asturianu Azərbaycanca Български 閩南語 / Bân-lâm-gú বাংলা Беларуская Català Čeština Cymraeg Dansk Eesti Ελληνικά Esperanto Euskara Galego 한국어 Հայերեն हिन्दी Hrvatski Bahasa Indonesia עברית ქართული Ladin Latina Latviešu Lietuvių Magyar Македонски Malagasy मराठी Bahasa Melayu Bahaso Minangkabau မြန်မာဘာသာ Norsk bokmål nynorsk Нохчийн Oʻzbekcha / Ўзбекча Қазақша / Qazaqşa / قازاقشا Română Simple English Slovenčina Slovenščina Српски / Srpski Srpskohrvatski / Српскохрватски Suomi Kiswahili தமிழ் Татарча / Tatarça తెలుగు ภาษาไทย Тоҷикӣ تۆرکجه Türkçe اردو 粵語 10,000+ articles Bahsa Acèh Alemannisch አማርኛ Aragonés Արեւմտահայերէն Bahasa Hulontalo Basa Bali Bahasa Banjar Basa Banyumasan Башҡортса Беларуская (тарашкевіца) Bikol Central বিষ্ণুপ্রিয়া মণিপুরী Boarisch Bosanski Brezhoneg Чӑвашла Dagbanli الدارجة Diné Bizaad Emigliàn–Rumagnòl Fiji Hindi Føroyskt Frysk Fulfulde Gaeilge Gàidhlig گیلکی ગુજરાતી Hak-kâ-ngî / 客家語 Hausa Hornjoserbsce Ido Igbo Ilokano Interlingua Interlingue Ирон Íslenska Jawa ಕನ್ನಡ Kapampangan ភាសាខ្មែរ Kotava Kreyòl Ayisyen Kurdî / كوردی کوردیی ناوەندی Кыргызча Кырык мары Lëtzebuergesch Lìgure Limburgs Lombard मैथिली മലയാളം მარგალური مازِرونی Mìng-dĕ̤ng-ngṳ̄ / 閩東語 Монгол Napulitano नेपाल भाषा नेपाली Nordfriisk Occitan Олык марий ଓଡି଼ଆ অসমীযা় ਪੰਜਾਬੀ پنجابی (شاہ مکھی) پښتو Piemontèis Plattdüütsch Qaraqalpaqsha Qırımtatarca Runa Simi Русиньскый संस्कृतम् ᱥᱟᱱᱛᱟᱲᱤ سرائیکی Саха Тыла Scots ChiShona Sicilianu සිංහල سنڌي Ślůnski Basa Sunda Taclḥit Tagalog ၽႃႇသႃႇတႆး ⵜⴰⵎⴰⵣⵉⵖⵜ ⵜⴰⵏⴰⵡⴰⵢⵜ tolışi chiTumbuka Basa Ugi Vèneto Volapük Walon 文言 吴语 ייִדיש Yorùbá Zazaki žemaitėška isiZulu ꯃꯤꯇꯩ ꯂꯣꯟ 1,000+ articles Dzhudezmo / לאדינו Адыгэбзэ Ænglisc Anarâškielâ अंगिका Аԥсшәа armãneashti Arpitan atikamekw ܐܬܘܪܝܐ Avañe’ẽ Авар Aymar Batak Toba Betawi भोजपुरी Bislama བོད་ཡིག Буряад Chavacano de Zamboanga Chichewa Corsu Vahcuengh / 話僮 Dagaare Davvisámegiella Deitsch ދިވެހިބަސް Dolnoserbski Dusun Bundu-liwan Эрзянь Estremeñu Eʋegbe Farefare Fɔ̀ngbè Furlan Gaelg Gagauz ГӀалгӀай Ghanaian Pidgin Gĩkũyũ 赣语 / 贛語 Gungbe Хальмг ʻŌlelo Hawaiʻi Ikinyarwanda Jaku Iban Kabɩyɛ Yerwa Kanuri Kaszëbsczi Kernewek Коми Перем коми Kongo कोंकणी / Konknni كٲشُر Kriyòl Gwiyannen Kumoring Kʋsaal ພາສາລາວ Лакку Latgaļu Лезги Li Niha Lingála Lingua Franca Nova livvinkarjala lojban Luganda Madhurâ Malti Mandailing Māori Mfantse Mirandés Мокшень ဘာသာ မန် Moore ߒߞߏ Na Vosa Vaka-Viti Nāhuatlahtōlli Naijá Nedersaksisch Nouormand / Normaund Novial Afaan Oromoo ပအိုဝ်ႏဘာႏသာႏ पालि Pangasinán Pangcah Papiamentu Patois Pfälzisch Picard Къарачай–малкъар Ripoarisch Rumantsch Sakizaya Gagana Sāmoa Sardu Seediq Seeltersk Sesotho Sesotho sa Leboa Setswana ꠍꠤꠟꠐꠤ Словѣ́ньскъ / ⰔⰎⰑⰂⰡⰐⰠⰔⰍⰟ Soomaaliga Sranantongo SiSwati Reo tahiti Taqbaylit Tarandíne Tayal Tetun Tok Pisin faka Tonga ᏣᎳᎩ Türkmençe Twi Tyap Тыва дыл Удмурт ئۇيغۇرچه Vepsän võro West-Vlams Wolof isiXhosa Zeêuws алтай тил अवधी डोटेली ತುಳು 100+ articles Bajau Sama Bamanankan Chamoru རྫོང་ཁ 𐌲𐌿𐍄𐌹𐍃𐌺 Igala ᐃᓄᒃᑎᑐᑦ / Inuktitut Iñupiak isiNdebele seSewula Kalaallisut Nupe Obolo pinayuanan Ποντιακά romani čhib Ikirundi руски Sängö ᥖᥭᥰᥖᥬᥳᥑᥨᥒᥰ ትግርኛ Thuɔŋjäŋ Tsėhesenėstsestotse Xitsonga Tshivenḓa Wayuunaiki адыгабзэ ရခိုင် Other languages Wikipedia is hosted by the Wikimedia Foundation, a non-profit organization that also hosts a range of other projects. You can support our work with a donation. Download Wikipedia for Android or iOS Save your favorite articles to read offline, sync your reading lists across devices and customize your reading experience with the official Wikipedia app. Google Play Store Apple App Store Commons Free media collection Wikivoyage Free travel guide Wiktionary Free dictionary Wikibooks Free textbooks Wikinews Free news source Wikidata Free knowledge base Wikiversity Free learning resources Wikiquote Free quote compendium MediaWiki Free & open wiki software Wikisource Free content library Wikispecies Free species directory Wikifunctions Free function library Meta-Wiki Community coordination & documentation This page is available under the Creative Commons Attribution-ShareAlike License Terms of Use Privacy Policy",Responses.create() got an unexpected keyword argument 'response_format' diff --git a/week1/community-contributions/website-url-scrapping-csv/requirement.txt b/week1/community-contributions/website-url-scrapping-csv/requirement.txt new file mode 100644 index 0000000..ffc5bb2 --- /dev/null +++ b/week1/community-contributions/website-url-scrapping-csv/requirement.txt @@ -0,0 +1,6 @@ +aiohttp>=3.10 +beautifulsoup4>=4.12 +lxml>=5.2 +pandas>=2.2 +python-dotenv>=1.0 +openai>=1.51