{ "cells": [ { "cell_type": "markdown", "source": [ "## Web2Quiz: Generator Quiz from webpage content." ], "metadata": { "id": "n3vd295elWxh" }, "id": "n3vd295elWxh" }, { "cell_type": "code", "execution_count": null, "id": "f4484fcf-8b39-4c3f-9674-37970ed71988", "metadata": { "id": "f4484fcf-8b39-4c3f-9674-37970ed71988" }, "outputs": [], "source": [ "#.env upload\n", "from google.colab import files\n", "uploaded = files.upload()" ] }, { "cell_type": "code", "source": [ "!pip install dotenv\n" ], "metadata": { "id": "VTpN_jVbMKuk" }, "id": "VTpN_jVbMKuk", "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "import os\n", "from dotenv import load_dotenv" ], "metadata": { "id": "twYi9eJwL2h1" }, "id": "twYi9eJwL2h1", "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "load_dotenv(override=True)\n", "api_key = os.getenv('OPENROUTER_KEY')\n", "\n", "# Check the key\n", "if not api_key:\n", " print(\"No API key was found - please head over to the troubleshooting notebook in this folder to identify & fix!\")\n", "# elif not api_key.startswith(\"sk-proj-\"):\n", "# print(\"An API key was found, but it doesn't start sk-proj-; please check you're using the right key - see troubleshooting notebook\")\n", "elif api_key.strip() != api_key:\n", " print(\"An API key was found, but it looks like it might have space or tab characters at the start or end - please remove them - see troubleshooting notebook\")\n", "else:\n", " print(\"API key found and looks good so far!\")\n" ], "metadata": { "id": "NRnUTEkZL2eZ" }, "id": "NRnUTEkZL2eZ", "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "!pip install openai" ], "metadata": { "id": "RRuKJ_pzL2be" }, "id": "RRuKJ_pzL2be", "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "!pip install requests beautifulsoup4\n", "!pip install selenium" ], "metadata": { "id": "DWsPpdjOVPTW" }, "id": "DWsPpdjOVPTW", "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "from bs4 import BeautifulSoup\n", "import requests\n", "from tempfile import mkdtemp\n", "from selenium import webdriver\n", "from selenium.webdriver.chrome.options import Options\n", "from selenium.webdriver.support.ui import WebDriverWait\n", "from selenium.webdriver.support import expected_conditions as EC\n", "from selenium.webdriver.common.by import By\n", "\n", "class Website:\n", " def __init__(self, url, use_selenium=False):\n", " \"\"\"\n", " Create Website object from the given URL.\n", " If use_selenium=True, fetch page with Selenium.\n", " Otherwise, use requests + BeautifulSoup.\n", " \"\"\"\n", " self.url = url\n", " self.title = \"\"\n", " self.text = \"\"\n", " self.use_selenium = use_selenium\n", "\n", " if self.use_selenium:\n", " html = self._fetch_with_selenium()\n", " else:\n", " html = self._fetch_with_requests()\n", "\n", " if not html:\n", " self.title = \"Error fetching page\"\n", " self.text = \"Could not retrieve HTML content.\"\n", " return\n", "\n", " soup = BeautifulSoup(html, \"html.parser\")\n", " self.title = soup.title.string if soup.title else \"No title found\"\n", "\n", " # content_div = soup.find('div', id='content')\n", " if soup.body:\n", " for irrelevant in soup.body([\"script\", \"style\", \"img\", \"input\", \"header\", \"footer\", \"nav\", \"aside\"]):\n", " irrelevant.decompose()\n", " self.text = soup.body.get_text(separator=\"\\n\", strip=True)\n", " else:\n", " self.text = \"No body tag found in the HTML.\"\n", "\n", " # Basic html scrapper\n", " def _fetch_with_requests(self):\n", " \"\"\"Fetch HTML using requests.\"\"\"\n", " try:\n", " headers = {\"User-Agent\": \"Mozilla/5.0\"}\n", " response = requests.get(self.url, headers=headers, timeout=10)\n", " response.raise_for_status()\n", " return response.text\n", " except requests.exceptions.RequestException as e:\n", " print(f\"Error fetching with requests: {e}\")\n", " return None\n", "\n", " # Dynamic html scrapper\n", " def _fetch_with_selenium(self):\n", " \"\"\"Fetch HTML using Selenium with improved options.\"\"\"\n", " options = Options()\n", " options.add_argument(\"--no-sandbox\")\n", " options.add_argument(\"--disable-dev-shm-usage\")\n", " options.add_argument(\"--headless\")\n", " options.add_argument(f\"--user-data-dir={mkdtemp()}\")\n", "\n", " driver = None\n", " try:\n", " driver = webdriver.Chrome(options=options)\n", " driver.get(self.url)\n", "\n", " WebDriverWait(driver, 10).until(\n", " EC.presence_of_element_located((By.TAG_NAME, \"body\"))\n", " )\n", "\n", " html = driver.page_source\n", " return html\n", " except Exception as e:\n", " print(f\"An error occurred during Selenium fetch: {e}\")\n", " return None\n", " finally:\n", " if driver:\n", " driver.quit()\n", "\n" ], "metadata": { "id": "PzBP0tXXcrP-" }, "id": "PzBP0tXXcrP-", "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "site1 = Website(\"https://en.wikipedia.org/wiki/Integration_testing\", use_selenium=False)\n", "print(\"Title:\", site1.title)\n", "print(\"Text preview:\", site1.text[:200])\n", "\n", "site2 = Website(\"https://www.tpointtech.com/java-for-loop\", use_selenium=True)\n", "print(\"Title:\", site2.title)\n", "print(\"Text preview:\", site2.text[:200])" ], "metadata": { "id": "vsNmh5b5c6Gq" }, "id": "vsNmh5b5c6Gq", "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "# Step 1: Create your prompts\n", "system_prompt = f\"You are a MCQ quiz generator. Analyze the provided TEXT and filter CONTENT relevent to {site1.title}. Then based on the relevant CONTENT generate 10 MCQs. List all correct options at the end.\"\n", "user_prompt = f\"Below is provided TEXT : \\n{site1.text}\"\n", "\n", "# Step 2: Make the messages list\n", "messages = [\n", " {\"role\": \"system\", \"content\": system_prompt},\n", " {\"role\": \"user\", \"content\": user_prompt}\n", "]\n", "\n", "# Step 3: Call OpenAI\n", "openai = OpenAI(base_url=\"https://openrouter.ai/api/v1\", api_key=api_key)\n", "\n", "# Step 4: print the result\n", "response = openai.chat.completions.create(model=\"qwen/qwen2.5-vl-72b-instruct:free\", messages=messages)\n", "print(response.choices[0].message.content)" ], "metadata": { "collapsed": true, "id": "BYdc1w70QFD2" }, "id": "BYdc1w70QFD2", "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "# Step 1: Create your prompts\n", "system_prompt = f\"You are a MCQ quiz generator. Analyze the provided TEXT and filter CONTENT relevent to {site2.title}. Then based on the relevant CONTENT generate 10 MCQs. List all correct options at the end.\"\n", "user_prompt = f\"Below is provided TEXT : \\n{site2.text}\"\n", "\n", "# Step 2: Make the messages list\n", "messages = [\n", " {\"role\": \"system\", \"content\": system_prompt},\n", " {\"role\": \"user\", \"content\": user_prompt}\n", "]\n", "\n", "# Step 3: Call OpenAI\n", "openai = OpenAI(base_url=\"https://openrouter.ai/api/v1\", api_key=api_key)\n", "\n", "# Step 4: print the result\n", "response = openai.chat.completions.create(model=\"qwen/qwen2.5-vl-72b-instruct:free\", messages=messages)\n", "print(response.choices[0].message.content)" ], "metadata": { "id": "Rv8vxFHtQFBm" }, "id": "Rv8vxFHtQFBm", "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [], "metadata": { "id": "o5tIkQ95_2Hc" }, "id": "o5tIkQ95_2Hc", "execution_count": null, "outputs": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.12" }, "colab": { "provenance": [] } }, "nbformat": 4, "nbformat_minor": 5 }