{ "cells": [ { "cell_type": "markdown", "id": "fe12c203-e6a6-452c-a655-afb8a03a4ff5", "metadata": {}, "source": [ "# Week 1 Exercise | Study Guide Generation with Llama 3.2" ] }, { "cell_type": "code", "execution_count": null, "id": "c1070317-3ed9-4659-abe3-828943230e03", "metadata": { "editable": false, "slideshow": { "slide_type": "" }, "tags": [] }, "outputs": [], "source": [ "import requests\n", "import json\n", "import re\n", "from bs4 import BeautifulSoup\n", "from IPython.display import Markdown, display, update_display\n", "from openai import OpenAI" ] }, { "cell_type": "code", "execution_count": null, "id": "4a456906-915a-4bfd-bb9d-57e505c5093f", "metadata": {}, "outputs": [], "source": [ "openai = OpenAI(base_url='http://localhost:11434/v1', api_key='ollama')\n", "MODEL = 'llama3.2'" ] }, { "cell_type": "markdown", "id": "5cd638a2-ab65-41cf-97bb-673c3ec117c4", "metadata": {}, "source": [ "### 1. Web Scraper" ] }, { "cell_type": "code", "execution_count": null, "id": "504f3bce-f922-46a9-844a-b13d47507b8a", "metadata": {}, "outputs": [], "source": [ "headers = {\n", " \"User-Agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36\"\n", "}\n", "\n", "class Website:\n", "\n", " def __init__(self, url):\n", " self.url = url\n", " response = requests.get(url, headers=headers)\n", " self.body = response.content\n", " soup = BeautifulSoup(self.body, 'html.parser')\n", " self.title = soup.title.string if soup.title else \"No title found\"\n", " if soup.body:\n", " for irrelevant in soup.body([\"script\", \"style\", \"img\", \"input\"]):\n", " irrelevant.decompose()\n", " self.text = soup.body.get_text(separator=\"\\n\", strip=True)\n", " else:\n", " self.text = \"\"\n", " links = [link.get('href') for link in soup.find_all('a')]\n", " self.links = [link for link in links if link]\n", "\n", " def get_contents(self):\n", " return f\"Webpage Title:\\n{self.title}\\nWebpage Contents:\\n{self.text}\\n\\n\"" ] }, { "cell_type": "markdown", "id": "2bbf43c5-774d-4d4e-91ff-772781fdfeaf", "metadata": {}, "source": [ "### 2. Curriculum Extraction" ] }, { "cell_type": "code", "execution_count": null, "id": "3f0d0137-52b0-47a8-81a8-11a90a010798", "metadata": {}, "outputs": [], "source": [ "curriculum_system_prompt = \"\"\"You are provided with the text content of a webpage. \n", "Your task is to design a student-friendly curriculum from this content. \n", "Break down the material into clear modules or lessons, each with a title and a short description. \n", "Focus on organizing the information in a logical order, as if preparing a study plan.\n", "\n", "You should respond in JSON as in this example:\n", "{\n", " \"curriculum\": [\n", " {\n", " \"module\": \"Introduction to Machine Learning\",\n", " \"description\": \"Basic concepts and history of machine learning, why it matters, and common applications.\"\n", " },\n", " {\n", " \"module\": \"Supervised Learning\",\n", " \"description\": \"Learn about labeled data, classification, and regression methods.\"\n", " },\n", " {\n", " \"module\": \"Unsupervised Learning\",\n", " \"description\": \"Understand clustering, dimensionality reduction, and when to use unsupervised approaches.\"\n", " }\n", " ]\n", "}\n", "\"\"\"" ] }, { "cell_type": "code", "execution_count": null, "id": "d89a0be8-0254-43b5-ab9a-6224069a1246", "metadata": {}, "outputs": [], "source": [ "def get_curriculum_user_prompt(website):\n", " user_prompt = f\"Here is the text content of the website at {website.url}:\\n\\n\"\n", " user_prompt += website.text\n", " user_prompt += \"\\n\\nPlease create a student-friendly curriculum from this content. \"\n", " user_prompt += \"Break it down into clear modules or lessons, each with a title and a short description. \"\n", " user_prompt += \"Return your response in JSON format\"\n", " return user_prompt" ] }, { "cell_type": "code", "execution_count": null, "id": "da74104c-81a3-4d12-a377-e202ddfe57bc", "metadata": {}, "outputs": [], "source": [ "def get_curriculum(website):\n", " stream = openai.chat.completions.create(\n", " model=MODEL,\n", " messages=[\n", " {\"role\": \"system\", \"content\": curriculum_system_prompt},\n", " {\"role\": \"user\", \"content\": get_curriculum_user_prompt(website)}\n", " ],\n", " stream=True\n", " )\n", " response_text = \"\"\n", " display_handle = display(Markdown(\"\"), display_id=True)\n", " for chunk in stream:\n", " delta = chunk.choices[0].delta.content or ''\n", " response_text += delta\n", " update_display(Markdown(response_text), display_id=display_handle.display_id)\n", " try:\n", " json_text = re.search(r\"\\{.*\\}\", response_text, re.DOTALL).group()\n", " curriculum_json = json.loads(json_text)\n", " except Exception as e:\n", " print(\"Failed to parse JSON:\", e)\n", " curriculum_json = {\"error\": \"JSON parse failed\", \"raw\": response_text}\n", "\n", " return curriculum_json" ] }, { "cell_type": "markdown", "id": "df68eafc-e529-400c-a61b-0140c38909a3", "metadata": {}, "source": [ "### 3. Study Guide" ] }, { "cell_type": "code", "execution_count": null, "id": "5b3db9d4-5edd-4a0c-8d5c-45ea455d8eb0", "metadata": {}, "outputs": [], "source": [ "guide_system_prompt = \"\"\"You are an educational assistant. \n", "You are given a curriculum JSON with modules and descriptions.\n", "Your task is to create a student-friendly study guide based on this curriculum.\n", "- Organize the guide step by step, with clear headings, tips, and examples where appropriate.\n", "- Make it engaging and easy to follow.\n", "- Adapt the content according to the student's level, language, and tone.\n", "- Always respond in markdown format suitable for a student guide.\n", "\"\"\"" ] }, { "cell_type": "code", "execution_count": null, "id": "16f85360-6f06-4bb3-878a-5f3b8d8f20d7", "metadata": {}, "outputs": [], "source": [ "def get_study_guide_user_prompt(curriculum_json, student_level=\"beginner\", language=\"English\", tone=\"friendly\"):\n", " return f\"\"\"\n", " Student Level: {student_level}\n", " Language: {language}\n", " Tone: {tone}\n", " \n", " Here is the curriculum JSON:\n", " \n", " {json.dumps(curriculum_json, indent=2)}\n", " \n", " Please convert it into a study guide for the student.\n", " \"\"\"" ] }, { "cell_type": "code", "execution_count": null, "id": "bc9b949d-df2b-475c-9a84-597a47ed6e85", "metadata": {}, "outputs": [], "source": [ "def stream_study_guide(curriculum_json, student_level=\"beginner\", language=\"English\", tone=\"friendly\"):\n", " \n", " user_prompt = get_study_guide_user_prompt(curriculum_json, student_level, language, tone)\n", " stream = openai.chat.completions.create(\n", " model=MODEL,\n", " messages=[\n", " {\"role\": \"system\", \"content\": guide_system_prompt},\n", " {\"role\": \"user\", \"content\": user_prompt}\n", " ],\n", " stream=True\n", " )\n", "\n", " response_text = \"\"\n", " display_handle = display(Markdown(\"\"), display_id=True)\n", " for chunk in stream:\n", " delta = chunk.choices[0].delta.content or ''\n", " response_text += delta\n", " update_display(Markdown(response_text), display_id=display_handle.display_id)\n", " \n", " return response_text" ] }, { "cell_type": "code", "execution_count": null, "id": "8c289b7c-c991-45b5-adc3-7468af393e50", "metadata": {}, "outputs": [], "source": [ "page = Website(\"https://en.wikipedia.org/wiki/Rock_and_roll\")\n", "curriculum_json = get_curriculum(page)" ] }, { "cell_type": "code", "execution_count": null, "id": "6c697d63-2230-4e04-a28b-c0e8fc85753e", "metadata": {}, "outputs": [], "source": [ "study_guide_text = stream_study_guide(\n", " curriculum_json,\n", " student_level=\"beginner\",\n", " language=\"English\",\n", " tone=\"friendly\"\n", ")" ] }, { "cell_type": "code", "execution_count": null, "id": "c0960f87-fd29-4ae3-8405-f4fde1f50f89", "metadata": {}, "outputs": [], "source": [ "study_guide_text = stream_study_guide(\n", " curriculum_json,\n", " student_level=\"advanced\",\n", " language=\"English\",\n", " tone=\"professional, detailed\"\n", ")" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.13" } }, "nbformat": 4, "nbformat_minor": 5 }