From 410f6f0410c22fbe33a46d01e01ff4cefcbba8a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miray=20G=C3=BCrb=C3=BCz?= <161487392+miraygurbuz@users.noreply.github.com> Date: Wed, 1 Oct 2025 16:31:00 +0300 Subject: [PATCH] add week 1 contributions --- .../day2_exercise_llama3.2.ipynb | 191 +++++++++++ .../week1_exercise_study_guide_llama3.2.ipynb | 309 ++++++++++++++++++ 2 files changed, 500 insertions(+) create mode 100644 week1/community-contributions/day2_exercise_llama3.2.ipynb create mode 100644 week1/community-contributions/week1_exercise_study_guide_llama3.2.ipynb diff --git a/week1/community-contributions/day2_exercise_llama3.2.ipynb b/week1/community-contributions/day2_exercise_llama3.2.ipynb new file mode 100644 index 0000000..6b84d31 --- /dev/null +++ b/week1/community-contributions/day2_exercise_llama3.2.ipynb @@ -0,0 +1,191 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "786b2ed1-f82e-4ca4-8113-c4515b36e970", + "metadata": {}, + "source": [ + "# Day 2 Exercise | Website Summarizer with Llama 3.2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b88bf233-29e0-4c01-a4da-8a16896a95e3", + "metadata": {}, + "outputs": [], + "source": [ + "import requests\n", + "from bs4 import BeautifulSoup\n", + "from IPython.display import Markdown, display" + ] + }, + { + "cell_type": "markdown", + "id": "f66f620e-ebf6-45d3-a710-2bb931cac841", + "metadata": {}, + "source": [ + "### 1. Scraping info from website:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4e300303-02ac-4d60-9c8c-044a4627be9e", + "metadata": {}, + "outputs": [], + "source": [ + "headers = {\n", + " \"User-Agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36\"\n", + "}\n", + "\n", + "class Website:\n", + "\n", + " def __init__(self, url):\n", + " \"\"\"\n", + " Create this Website object from the given url using the BeautifulSoup library\n", + " \"\"\"\n", + " self.url = url\n", + " response = requests.get(url, headers=headers)\n", + " soup = BeautifulSoup(response.content, 'html.parser')\n", + " self.title = soup.title.string if soup.title else \"No title found\"\n", + " for irrelevant in soup.body([\"script\", \"style\", \"img\", \"input\"]):\n", + " irrelevant.decompose()\n", + " self.text = soup.body.get_text(separator=\"\\n\", strip=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "137714b9-24eb-4541-8f24-507dbcd09279", + "metadata": {}, + "outputs": [], + "source": [ + "ed = Website(\"https://edwarddonner.com\")" + ] + }, + { + "cell_type": "markdown", + "id": "77ba1b4b-fc4c-4e3c-bef7-c4d4281d8263", + "metadata": {}, + "source": [ + "### 2. Ollama configuration:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "97811fcb-1ceb-49a8-bfb9-2e610605c406", + "metadata": {}, + "outputs": [], + "source": [ + "OLLAMA_API = \"http://localhost:11434/api/chat\"\n", + "HEADERS = {\"Content-Type\": \"application/json\"}\n", + "MODEL = \"llama3.2\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "392326b8-ad0f-4bc9-b055-6220f8bcc57c", + "metadata": {}, + "outputs": [], + "source": [ + "def user_prompt_for(website):\n", + " user_prompt = f\"You are looking at a website titled {website.title}\"\n", + " user_prompt += \"\\nThe contents of this website is as follows; \\\n", + "please provide a short summary of this website in markdown. \\\n", + "If it includes news or announcements, then summarize these too.\\n\\n\"\n", + " user_prompt += website.text\n", + " return user_prompt\n", + "\n", + "system_prompt = \"You are an assistant that analyzes the contents of a website \\\n", + "and provides a short summary, ignoring text that might be navigation related. \\\n", + "Respond in markdown.\"\n", + "user_prompt = user_prompt_for(ed)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8caa94ff-5ace-4f9b-b2f0-beb6ff550636", + "metadata": {}, + "outputs": [], + "source": [ + "messages = [\n", + " {\"role\": \"system\", \"content\": system_prompt},\n", + " {\"role\": \"user\", \"content\": user_prompt}\n", + "]\n", + "\n", + "payload = {\n", + " \"model\": MODEL,\n", + " \"messages\": messages,\n", + " \"stream\": False\n", + "}" + ] + }, + { + "cell_type": "markdown", + "id": "f5f856bc-0437-4607-9204-5390d2dfd8db", + "metadata": {}, + "source": [ + "### 3. Get & display summary:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a7fd6f93-92ae-419f-b8b6-ee8214e0d93f", + "metadata": {}, + "outputs": [], + "source": [ + "response = requests.post(OLLAMA_API, json=payload, headers=HEADERS)\n", + "summary = response.json()['message']['content']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "78e4a433-b974-463f-82d0-b4696c63e0ab", + "metadata": {}, + "outputs": [], + "source": [ + "def display_summary(summary_text: str):\n", + " cleaned = summary_text.encode('utf-8').decode('unicode_escape')\n", + " cleaned = cleaned.strip()\n", + " display(Markdown(cleaned))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "dc408f1d-fe26-4bd6-859f-d18118f74ca6", + "metadata": {}, + "outputs": [], + "source": [ + "display_summary(summary)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.13" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/week1/community-contributions/week1_exercise_study_guide_llama3.2.ipynb b/week1/community-contributions/week1_exercise_study_guide_llama3.2.ipynb new file mode 100644 index 0000000..c81a8a7 --- /dev/null +++ b/week1/community-contributions/week1_exercise_study_guide_llama3.2.ipynb @@ -0,0 +1,309 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "fe12c203-e6a6-452c-a655-afb8a03a4ff5", + "metadata": {}, + "source": [ + "# Week 1 Exercise | Study Guide Generation with Llama 3.2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c1070317-3ed9-4659-abe3-828943230e03", + "metadata": { + "editable": false, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "import requests\n", + "import json\n", + "import re\n", + "from bs4 import BeautifulSoup\n", + "from IPython.display import Markdown, display, update_display\n", + "from openai import OpenAI" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4a456906-915a-4bfd-bb9d-57e505c5093f", + "metadata": {}, + "outputs": [], + "source": [ + "openai = OpenAI(base_url='http://localhost:11434/v1', api_key='ollama')\n", + "MODEL = 'llama3.2'" + ] + }, + { + "cell_type": "markdown", + "id": "5cd638a2-ab65-41cf-97bb-673c3ec117c4", + "metadata": {}, + "source": [ + "### 1. Web Scraper" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "504f3bce-f922-46a9-844a-b13d47507b8a", + "metadata": {}, + "outputs": [], + "source": [ + "headers = {\n", + " \"User-Agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36\"\n", + "}\n", + "\n", + "class Website:\n", + "\n", + " def __init__(self, url):\n", + " self.url = url\n", + " response = requests.get(url, headers=headers)\n", + " self.body = response.content\n", + " soup = BeautifulSoup(self.body, 'html.parser')\n", + " self.title = soup.title.string if soup.title else \"No title found\"\n", + " if soup.body:\n", + " for irrelevant in soup.body([\"script\", \"style\", \"img\", \"input\"]):\n", + " irrelevant.decompose()\n", + " self.text = soup.body.get_text(separator=\"\\n\", strip=True)\n", + " else:\n", + " self.text = \"\"\n", + " links = [link.get('href') for link in soup.find_all('a')]\n", + " self.links = [link for link in links if link]\n", + "\n", + " def get_contents(self):\n", + " return f\"Webpage Title:\\n{self.title}\\nWebpage Contents:\\n{self.text}\\n\\n\"" + ] + }, + { + "cell_type": "markdown", + "id": "2bbf43c5-774d-4d4e-91ff-772781fdfeaf", + "metadata": {}, + "source": [ + "### 2. Curriculum Extraction" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3f0d0137-52b0-47a8-81a8-11a90a010798", + "metadata": {}, + "outputs": [], + "source": [ + "curriculum_system_prompt = \"\"\"You are provided with the text content of a webpage. \n", + "Your task is to design a student-friendly curriculum from this content. \n", + "Break down the material into clear modules or lessons, each with a title and a short description. \n", + "Focus on organizing the information in a logical order, as if preparing a study plan.\n", + "\n", + "You should respond in JSON as in this example:\n", + "{\n", + " \"curriculum\": [\n", + " {\n", + " \"module\": \"Introduction to Machine Learning\",\n", + " \"description\": \"Basic concepts and history of machine learning, why it matters, and common applications.\"\n", + " },\n", + " {\n", + " \"module\": \"Supervised Learning\",\n", + " \"description\": \"Learn about labeled data, classification, and regression methods.\"\n", + " },\n", + " {\n", + " \"module\": \"Unsupervised Learning\",\n", + " \"description\": \"Understand clustering, dimensionality reduction, and when to use unsupervised approaches.\"\n", + " }\n", + " ]\n", + "}\n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d89a0be8-0254-43b5-ab9a-6224069a1246", + "metadata": {}, + "outputs": [], + "source": [ + "def get_curriculum_user_prompt(website):\n", + " user_prompt = f\"Here is the text content of the website at {website.url}:\\n\\n\"\n", + " user_prompt += website.text\n", + " user_prompt += \"\\n\\nPlease create a student-friendly curriculum from this content. \"\n", + " user_prompt += \"Break it down into clear modules or lessons, each with a title and a short description. \"\n", + " user_prompt += \"Return your response in JSON format\"\n", + " return user_prompt" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "da74104c-81a3-4d12-a377-e202ddfe57bc", + "metadata": {}, + "outputs": [], + "source": [ + "def get_curriculum(website):\n", + " stream = openai.chat.completions.create(\n", + " model=MODEL,\n", + " messages=[\n", + " {\"role\": \"system\", \"content\": curriculum_system_prompt},\n", + " {\"role\": \"user\", \"content\": get_curriculum_user_prompt(website)}\n", + " ],\n", + " stream=True\n", + " )\n", + " response_text = \"\"\n", + " display_handle = display(Markdown(\"\"), display_id=True)\n", + " for chunk in stream:\n", + " delta = chunk.choices[0].delta.content or ''\n", + " response_text += delta\n", + " update_display(Markdown(response_text), display_id=display_handle.display_id)\n", + " try:\n", + " json_text = re.search(r\"\\{.*\\}\", response_text, re.DOTALL).group()\n", + " curriculum_json = json.loads(json_text)\n", + " except Exception as e:\n", + " print(\"Failed to parse JSON:\", e)\n", + " curriculum_json = {\"error\": \"JSON parse failed\", \"raw\": response_text}\n", + "\n", + " return curriculum_json" + ] + }, + { + "cell_type": "markdown", + "id": "df68eafc-e529-400c-a61b-0140c38909a3", + "metadata": {}, + "source": [ + "### 3. Study Guide" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5b3db9d4-5edd-4a0c-8d5c-45ea455d8eb0", + "metadata": {}, + "outputs": [], + "source": [ + "guide_system_prompt = \"\"\"You are an educational assistant. \n", + "You are given a curriculum JSON with modules and descriptions.\n", + "Your task is to create a student-friendly study guide based on this curriculum.\n", + "- Organize the guide step by step, with clear headings, tips, and examples where appropriate.\n", + "- Make it engaging and easy to follow.\n", + "- Adapt the content according to the student's level, language, and tone.\n", + "- Always respond in markdown format suitable for a student guide.\n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "16f85360-6f06-4bb3-878a-5f3b8d8f20d7", + "metadata": {}, + "outputs": [], + "source": [ + "def get_study_guide_user_prompt(curriculum_json, student_level=\"beginner\", language=\"English\", tone=\"friendly\"):\n", + " return f\"\"\"\n", + " Student Level: {student_level}\n", + " Language: {language}\n", + " Tone: {tone}\n", + " \n", + " Here is the curriculum JSON:\n", + " \n", + " {json.dumps(curriculum_json, indent=2)}\n", + " \n", + " Please convert it into a study guide for the student.\n", + " \"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bc9b949d-df2b-475c-9a84-597a47ed6e85", + "metadata": {}, + "outputs": [], + "source": [ + "def stream_study_guide(curriculum_json, student_level=\"beginner\", language=\"English\", tone=\"friendly\"):\n", + " \n", + " user_prompt = get_study_guide_user_prompt(curriculum_json, student_level, language, tone)\n", + " stream = openai.chat.completions.create(\n", + " model=MODEL,\n", + " messages=[\n", + " {\"role\": \"system\", \"content\": guide_system_prompt},\n", + " {\"role\": \"user\", \"content\": user_prompt}\n", + " ],\n", + " stream=True\n", + " )\n", + "\n", + " response_text = \"\"\n", + " display_handle = display(Markdown(\"\"), display_id=True)\n", + " for chunk in stream:\n", + " delta = chunk.choices[0].delta.content or ''\n", + " response_text += delta\n", + " update_display(Markdown(response_text), display_id=display_handle.display_id)\n", + " \n", + " return response_text" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8c289b7c-c991-45b5-adc3-7468af393e50", + "metadata": {}, + "outputs": [], + "source": [ + "page = Website(\"https://en.wikipedia.org/wiki/Rock_and_roll\")\n", + "curriculum_json = get_curriculum(page)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6c697d63-2230-4e04-a28b-c0e8fc85753e", + "metadata": {}, + "outputs": [], + "source": [ + "study_guide_text = stream_study_guide(\n", + " curriculum_json,\n", + " student_level=\"beginner\",\n", + " language=\"English\",\n", + " tone=\"friendly\"\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c0960f87-fd29-4ae3-8405-f4fde1f50f89", + "metadata": {}, + "outputs": [], + "source": [ + "study_guide_text = stream_study_guide(\n", + " curriculum_json,\n", + " student_level=\"advanced\",\n", + " language=\"English\",\n", + " tone=\"professional, detailed\"\n", + ")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.13" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}