Merge branch 'ed-donner:main' into main
This commit is contained in:
103
week1/community-contributions/Day-1_email_summarizers.ipynb
Normal file
103
week1/community-contributions/Day-1_email_summarizers.ipynb
Normal file
@@ -0,0 +1,103 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "d7a6bb51",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# import library\n",
|
||||
"from openai import OpenAI\n",
|
||||
"import os\n",
|
||||
"from dotenv import load_dotenv\n",
|
||||
"\n",
|
||||
"# Load your API key from an .env file\n",
|
||||
"load_dotenv(override=True)\n",
|
||||
"api_key = os.getenv('OPENAI_API_KEY')\n",
|
||||
"openai = OpenAI()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "7ac4cdf9",
|
||||
"metadata": {
|
||||
"vscode": {
|
||||
"languageId": "plaintext"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Step 1: Create your prompts\n",
|
||||
"system_prompt = \"you are a helpful assistant that suggests an appropriate short subject line for an email based on its contents.\"\n",
|
||||
"\n",
|
||||
"user_prompt = \"\"\"\n",
|
||||
"Hi John,\n",
|
||||
"I hope this email finds you well. I wanted to follow up on our meeting last week regarding the quarterly budget proposal.\n",
|
||||
"After reviewing the numbers with my team, we've identified some areas where we can reduce costs by approximately 15% without impacting our core operations. This would involve consolidating some vendor contracts and optimizing our software licensing.\n",
|
||||
"Could we schedule a meeting next week to discuss these findings in detail? I'm available Tuesday through Thursday afternoon.\n",
|
||||
"Looking forward to hearing from you.\n",
|
||||
"\n",
|
||||
"Best regards,\n",
|
||||
"Sarah\n",
|
||||
"\"\"\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "a77ca09e",
|
||||
"metadata": {
|
||||
"vscode": {
|
||||
"languageId": "plaintext"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Step 2: Make the messages list\n",
|
||||
"messages = [\n",
|
||||
" {\"role\": \"system\", \"content\": system_prompt},\n",
|
||||
" {\"role\": \"user\", \"content\": user_prompt}\n",
|
||||
"]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "8404f0fe",
|
||||
"metadata": {
|
||||
"vscode": {
|
||||
"languageId": "plaintext"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Step 3: Call OpenAI\n",
|
||||
"response = openai.chat.completions.create(model=\"gpt-4o-mini\", messages=messages)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "7a4875f7",
|
||||
"metadata": {
|
||||
"vscode": {
|
||||
"languageId": "plaintext"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Step 4: Print the result\n",
|
||||
"print(response.choices[0].message.content)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"language_info": {
|
||||
"name": "python"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
290
week1/community-contributions/Day-2_exercise_with_ollama3.ipynb
Normal file
290
week1/community-contributions/Day-2_exercise_with_ollama3.ipynb
Normal file
@@ -0,0 +1,290 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "135717e7",
|
||||
"metadata": {
|
||||
"vscode": {
|
||||
"languageId": "plaintext"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# imports\n",
|
||||
"\n",
|
||||
"import os\n",
|
||||
"import requests\n",
|
||||
"from dotenv import load_dotenv\n",
|
||||
"from bs4 import BeautifulSoup\n",
|
||||
"from IPython.display import Markdown, display\n",
|
||||
"from openai import OpenAI\n",
|
||||
"import ollama"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "29a9e634",
|
||||
"metadata": {
|
||||
"vscode": {
|
||||
"languageId": "plaintext"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# OPTION 1\n",
|
||||
"# using openai\n",
|
||||
"\n",
|
||||
"# message = \"Hello, GPT! This is my first ever message to you! Hi!\"\n",
|
||||
"# client = OpenAI(base_url=\"http://localhost:11434/v1\", api_key=\"not-needed\")\n",
|
||||
"# response = openai.chat.completions.create(model=`<name of model>`, messages=[{\"role\":\"user\", \"content\":message}])\n",
|
||||
"# print(response.choices[0].message.content)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "306993ed",
|
||||
"metadata": {
|
||||
"vscode": {
|
||||
"languageId": "plaintext"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# OPTION 2\n",
|
||||
"# using Ollama\n",
|
||||
"\n",
|
||||
"message = \"Hello, GPT! This is my first ever message to you! Hi!\"\n",
|
||||
"model=\"llama3\"\n",
|
||||
"response=ollama.chat(model=model,messages=[{\"role\":\"user\",\"content\":message}])\n",
|
||||
"print(response[\"message\"][\"content\"])\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "856f767b",
|
||||
"metadata": {
|
||||
"vscode": {
|
||||
"languageId": "plaintext"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# A class to represent a Webpage\n",
|
||||
"# If you're not familiar with Classes, check out the \"Intermediate Python\" notebook\n",
|
||||
"\n",
|
||||
"# Some websites need you to use proper headers when fetching them:\n",
|
||||
"headers = {\n",
|
||||
" \"User-Agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36\"\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"class Website:\n",
|
||||
"\n",
|
||||
" def __init__(self, url):\n",
|
||||
" \"\"\"\n",
|
||||
" Create this Website object from the given url using the BeautifulSoup library\n",
|
||||
" \"\"\"\n",
|
||||
" self.url = url\n",
|
||||
" response = requests.get(url, headers=headers)\n",
|
||||
" soup = BeautifulSoup(response.content, 'html.parser')\n",
|
||||
" self.title = soup.title.string if soup.title else \"No title found\"\n",
|
||||
" for irrelevant in soup.body([\"script\", \"style\", \"img\", \"input\"]):\n",
|
||||
" irrelevant.decompose()\n",
|
||||
" self.text = soup.body.get_text(separator=\"\\n\", strip=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "4ce558dc",
|
||||
"metadata": {
|
||||
"vscode": {
|
||||
"languageId": "plaintext"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Let's try one out. Change the website and add print statements to follow along.\n",
|
||||
"\n",
|
||||
"ed = Website(\"https://edwarddonner.com\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "5e3956f8",
|
||||
"metadata": {
|
||||
"vscode": {
|
||||
"languageId": "plaintext"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Define our system prompt - you can experiment with this later, changing the last sentence to 'Respond in markdown in Spanish.\"\n",
|
||||
"\n",
|
||||
"system_prompt = \"You are an assistant that analyzes the contents of a website \\\n",
|
||||
"and provides a short summary, ignoring text that might be navigation related. \\\n",
|
||||
"Respond in markdown.\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "99d791b4",
|
||||
"metadata": {
|
||||
"vscode": {
|
||||
"languageId": "plaintext"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# A function that writes a User Prompt that asks for summaries of websites:\n",
|
||||
"\n",
|
||||
"def user_prompt_for(website):\n",
|
||||
" user_prompt = f\"You are looking at a website titled {website.title}\"\n",
|
||||
" user_prompt += \"\\nThe contents of this website is as follows; \\\n",
|
||||
"please provide a short summary of this website in markdown. \\\n",
|
||||
"If it includes news or announcements, then summarize these too.\\n\\n\"\n",
|
||||
" user_prompt += website.text\n",
|
||||
" return user_prompt"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "5d89b748",
|
||||
"metadata": {
|
||||
"vscode": {
|
||||
"languageId": "plaintext"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# See how this function creates exactly the format above\n",
|
||||
"\n",
|
||||
"def messages_for(website):\n",
|
||||
" return [\n",
|
||||
" {\"role\": \"system\", \"content\": system_prompt},\n",
|
||||
" {\"role\": \"user\", \"content\": user_prompt_for(website)}\n",
|
||||
" ]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "9a97d3e2",
|
||||
"metadata": {
|
||||
"vscode": {
|
||||
"languageId": "plaintext"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# And now: call the OpenAI API. You will get very familiar with this!\n",
|
||||
"\n",
|
||||
"def summarize(url):\n",
|
||||
" website = Website(url)\n",
|
||||
" response=ollama.chat(model=model,messages=messages_for(website))\n",
|
||||
" return(response[\"message\"][\"content\"])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "ec13fe0a",
|
||||
"metadata": {
|
||||
"vscode": {
|
||||
"languageId": "plaintext"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"summarize(\"https://edwarddonner.com\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"id": "e3ade092",
|
||||
"metadata": {
|
||||
"vscode": {
|
||||
"languageId": "plaintext"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# A function to display this nicely in the Jupyter output, using markdown\n",
|
||||
"\n",
|
||||
"def display_summary(url):\n",
|
||||
" summary = summarize(url)\n",
|
||||
" display(Markdown(summary))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "be2d49e6",
|
||||
"metadata": {
|
||||
"vscode": {
|
||||
"languageId": "plaintext"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"display_summary(\"https://edwarddonner.com\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "1ccbf33b",
|
||||
"metadata": {
|
||||
"vscode": {
|
||||
"languageId": "plaintext"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"display_summary(\"https://cnn.com\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "ae3d0eae",
|
||||
"metadata": {
|
||||
"vscode": {
|
||||
"languageId": "plaintext"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"display_summary(\"https://anthropic.com\")"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.13"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -0,0 +1,115 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "4e2a9393-7767-488e-a8bf-27c12dca35bd",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# imports\n",
|
||||
"\n",
|
||||
"import os\n",
|
||||
"from dotenv import load_dotenv\n",
|
||||
"from openai import OpenAI\n",
|
||||
"\n",
|
||||
"# If you get an error running this cell, then please head over to the troubleshooting notebook!"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "7b87cadb-d513-4303-baee-a37b6f938e4d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Load environment variables in a file called .env\n",
|
||||
"\n",
|
||||
"load_dotenv(override=True)\n",
|
||||
"api_key = os.getenv('OPENAI_API_KEY')\n",
|
||||
"\n",
|
||||
"# Check the key\n",
|
||||
"\n",
|
||||
"if not api_key:\n",
|
||||
" print(\"No API key was found - please head over to the troubleshooting notebook in this folder to identify & fix!\")\n",
|
||||
"elif not api_key.startswith(\"sk-proj-\"):\n",
|
||||
" print(\"An API key was found, but it doesn't start sk-proj-; please check you're using the right key - see troubleshooting notebook\")\n",
|
||||
"elif api_key.strip() != api_key:\n",
|
||||
" print(\"An API key was found, but it looks like it might have space or tab characters at the start or end - please remove them - see troubleshooting notebook\")\n",
|
||||
"else:\n",
|
||||
" print(\"API key found and looks good so far!\")\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "019974d9-f3ad-4a8a-b5f9-0a3719aea2d3",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"openai = OpenAI()\n",
|
||||
"\n",
|
||||
"# If this doesn't work, try Kernel menu >> Restart Kernel and Clear Outputs Of All Cells, then run the cells from the top of this notebook down.\n",
|
||||
"# If it STILL doesn't work (horrors!) then please see the Troubleshooting notebook in this folder for full instructions"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "00743dac-0e70-45b7-879a-d7293a6f68a6",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Step 1: Create your prompts\n",
|
||||
"\n",
|
||||
"system_prompt = \"Eres un analista acostumbrado a trabajar con correos electrónicos que contiene un gran conocimiento sobre la mejor manera de resumir contenido releveante \\\n",
|
||||
"dejando de lado cualquier información que no despierte interés o no sea el tema principal del correo. Tu función será leer contenido de correos y definir un listado de las 3 mejores opciones con el formato: Opción *numero de la opción*: *sujeto* Motivo: *que palabras clave dentro del texto has utilizado para llegar a esa conclusion y la relación semántica con tu idea\"\n",
|
||||
"user_prompt = \"\"\"\n",
|
||||
"Tengo un correo que le quiero enviar a mi profesor pero no se muy bien como llamarlo, ayudame. El correo es el siguiente:\n",
|
||||
"Hola profe,\n",
|
||||
"Ultimamente estoy disfrutando mucho sus clases y la información que presenta me parece muy importante. Este fin de semana me voy de vacaciones y no podré\n",
|
||||
"ir a sus clases la semana que viene. Me gustaría si pudiera pasarme los pdfs de la siguiente semana para echarle un vistazo por mi cuenta durante mi ausencia en Francia.\n",
|
||||
"\n",
|
||||
"Un saludo,\n",
|
||||
"Daniel.\n",
|
||||
"\"\"\"\n",
|
||||
"\n",
|
||||
"# Step 2: Make the messages list\n",
|
||||
"\n",
|
||||
"messages = [{\"role\" : \"system\" , \"content\": system_prompt},\n",
|
||||
" {\"role\": \"user\", \"content\": user_prompt}]\n",
|
||||
"\n",
|
||||
"# Step 3: Call OpenAI\n",
|
||||
"\n",
|
||||
"response = openai.chat.completions.create( \n",
|
||||
" model = \"gpt-4o-mini\",\n",
|
||||
" messages = messages)\n",
|
||||
"\n",
|
||||
"# Step 4: print the result\n",
|
||||
"\n",
|
||||
"print(response.choices[0].message.content)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.13"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -0,0 +1,260 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "2588fbba",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Website Analysis and Summarization with Selenium and OpenAI\n",
|
||||
"\n",
|
||||
"> This notebook demonstrates how to extract and summarize the main content of any website using Selenium for dynamic extraction and OpenAI for generating concise summaries in Mexican Spanish.\n",
|
||||
"\n",
|
||||
"## Overview\n",
|
||||
"This notebook provides a workflow to automatically analyze websites, extract relevant text, and generate a short summary using a language model. Navigation elements are ignored, focusing on news, announcements, and main content.\n",
|
||||
"\n",
|
||||
"## Features\n",
|
||||
"- Extracts relevant text from web pages using Selenium and BeautifulSoup.\n",
|
||||
"- Generates automatic summaries using OpenAI's language models.\n",
|
||||
"- Presents results in markdown format.\n",
|
||||
"\n",
|
||||
"## Requirements\n",
|
||||
"- Python 3.8+\n",
|
||||
"- Google Chrome browser installed\n",
|
||||
"- The following Python packages:\n",
|
||||
" - selenium\n",
|
||||
" - webdriver-manager\n",
|
||||
" - beautifulsoup4\n",
|
||||
" - openai\n",
|
||||
" - python-dotenv\n",
|
||||
" - requests\n",
|
||||
"- An OpenAI API key (project key, starting with `sk-proj-`)\n",
|
||||
"- Internet connection\n",
|
||||
"\n",
|
||||
"## How to Use\n",
|
||||
"1. Install the required packages:\n",
|
||||
" ```bash\n",
|
||||
" pip install selenium webdriver-manager undetected-chromedriver beautifulsoup4 openai python-dotenv requests\n",
|
||||
" ```\n",
|
||||
"2. Add your OpenAI API key to a `.env` file as `OPENAI_API_KEY`.\n",
|
||||
"3. Run the notebook cells in order. You can change the target website URL in the code to analyze different sites.\n",
|
||||
"4. The summary will be displayed in markdown format below the code cell.\n",
|
||||
"\n",
|
||||
"**Note:** Some websites may block automated access. The notebook includes options to simulate a real user and avoid bot detection, but results may vary depending on the site's protections.\n",
|
||||
"\n",
|
||||
"---"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "dc7c2ade",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Imports\n",
|
||||
"import os\n",
|
||||
"import requests\n",
|
||||
"from dotenv import load_dotenv\n",
|
||||
"from bs4 import BeautifulSoup\n",
|
||||
"from IPython.display import Markdown, display\n",
|
||||
"from openai import OpenAI\n",
|
||||
"\n",
|
||||
"from selenium import webdriver\n",
|
||||
"from selenium.webdriver.chrome.service import Service\n",
|
||||
"from selenium.webdriver.common.by import By\n",
|
||||
"from selenium.webdriver.chrome.options import Options\n",
|
||||
"from selenium.webdriver.support.ui import WebDriverWait\n",
|
||||
"from selenium.webdriver.support import expected_conditions as EC\n",
|
||||
"from webdriver_manager.chrome import ChromeDriverManager\n",
|
||||
"import undetected_chromedriver as uc"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "a2d21987",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Load the environment variables from .env\n",
|
||||
"load_dotenv(override=True)\n",
|
||||
"api_key = os.getenv('OPENAI_API_KEY')\n",
|
||||
"\n",
|
||||
"# Check the key\n",
|
||||
"\n",
|
||||
"if not api_key:\n",
|
||||
" print(\"No API key was found - please head over to the troubleshooting notebook in this folder to identify & fix!\")\n",
|
||||
"elif not api_key.startswith(\"sk-proj-\"):\n",
|
||||
" print(\"An API key was found, but it doesn't start sk-proj-; please check you're using the right key - see troubleshooting notebook\")\n",
|
||||
"elif api_key.strip() != api_key:\n",
|
||||
" print(\"An API key was found, but it looks like it might have space or tab characters at the start or end - please remove them - see troubleshooting notebook\")\n",
|
||||
"else:\n",
|
||||
" print(\"API key found and looks good so far!\")\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "bbb3a8ed",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"openai = OpenAI()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "5313aa64",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"class Website:\n",
|
||||
" def __init__(self, url, headless=True, wait_time=10):\n",
|
||||
" self.url = url # Website URL to analyze\n",
|
||||
" self.title = None # Title of the website\n",
|
||||
" self.text = None # Extracted text from the website\n",
|
||||
" \n",
|
||||
" # Chrome options configuration for Selenium\n",
|
||||
" options = Options()\n",
|
||||
" if headless:\n",
|
||||
" options.add_argument(\"--headless=new\") # Run Chrome in headless mode (no window)\n",
|
||||
" options.add_argument(\"--disable-gpu\") # Disable GPU acceleration\n",
|
||||
" options.add_argument(\"--no-sandbox\") # Disable Chrome sandbox (required for some environments)\n",
|
||||
" options.add_argument(\"--window-size=1920,1080\") # Set window size to simulate a real user\n",
|
||||
" # Simulate a real user-agent to avoid bot detection\n",
|
||||
" options.add_argument(\"user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36\")\n",
|
||||
" \n",
|
||||
" # Initialize Chrome WebDriver\n",
|
||||
" self.driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)\n",
|
||||
" self.driver.get(url) # Open the URL in the browser\n",
|
||||
" \n",
|
||||
" try:\n",
|
||||
" # Wait until the <body> element is present in the page\n",
|
||||
" WebDriverWait(self.driver, wait_time).until(EC.presence_of_element_located((By.TAG_NAME, \"body\")))\n",
|
||||
" html = self.driver.page_source # Get the full HTML of the page\n",
|
||||
" soup = BeautifulSoup(html, 'html.parser') # Parse HTML with BeautifulSoup\n",
|
||||
" self.title = soup.title.string if soup.title else 'No title found' # Extract the title\n",
|
||||
" if soup.body:\n",
|
||||
" # Remove irrelevant elements from the body\n",
|
||||
" for irrelevant in soup.body([\"script\", \"style\", \"img\", \"input\"]):\n",
|
||||
" irrelevant.decompose()\n",
|
||||
" # Extract clean text from the body\n",
|
||||
" self.text = soup.body.get_text(separator='\\n', strip=True)\n",
|
||||
" else:\n",
|
||||
" self.text = \"No body found\" # If no body is found, indicate it\n",
|
||||
" except Exception as e:\n",
|
||||
" print(f\"Error accessing the site: {e}\") # Print error to console\n",
|
||||
" self.text = \"Error accessing the site\" # Store error in the attribute\n",
|
||||
" finally:\n",
|
||||
" self.driver.quit() # Always close the browser, whether or not an error occurred"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "e902c6b2",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"system_prompt = \"You are an assistant that analyzes the contents of a website \\\n",
|
||||
"and provides a short summary, ignoring text that might be navigation related. \\\n",
|
||||
"Respond in markdown in Mexican Spanish.\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "eaee8f36",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# A function that writes a User Prompt that asks for summaries of websites:\n",
|
||||
"\n",
|
||||
"def user_prompt_for(website):\n",
|
||||
" user_prompt = f\"You are looking at a website titled {website.title}\"\n",
|
||||
" user_prompt += \"\\nThe contents of this website is as follows; \\\n",
|
||||
"please provide a short summary of this website in markdown. \\\n",
|
||||
"If it includes news or announcements, then summarize these too.\\n\\n\"\n",
|
||||
" user_prompt += website.text\n",
|
||||
" return user_prompt"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "9ac4ed8b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Creates messages for the OpenAI API\n",
|
||||
"def messages_for(website):\n",
|
||||
" return [\n",
|
||||
" {\"role\": \"system\", \"content\": system_prompt},\n",
|
||||
" {\"role\": \"user\", \"content\": user_prompt_for(website)}\n",
|
||||
" ]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "1536d537",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Creates a summary for the given URL\n",
|
||||
"def summarize(url):\n",
|
||||
" website = Website(url)\n",
|
||||
" response = openai.chat.completions.create(\n",
|
||||
" model = \"gpt-4o-mini\",\n",
|
||||
" messages = messages_for(website)\n",
|
||||
" )\n",
|
||||
" return response.choices[0].message.content"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "fe135339",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Shows the summary for the given URL\n",
|
||||
"def display_summary(url):\n",
|
||||
" summary = summarize(url)\n",
|
||||
" display(Markdown(summary))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "a301ab4e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"display_summary(\"https://openai.com/\")"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "venv",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.9"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -0,0 +1,211 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "d955d75d-4970-48fe-983e-a2a850cecfc5",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"import requests\n",
|
||||
"from dotenv import load_dotenv\n",
|
||||
"from bs4 import BeautifulSoup\n",
|
||||
"from IPython.display import Markdown, display\n",
|
||||
"from openai import OpenAI\n",
|
||||
"\n",
|
||||
"import PyPDF2\n",
|
||||
"from selenium import webdriver\n",
|
||||
"from selenium.webdriver.chrome.options import Options\n",
|
||||
"from selenium.webdriver.chrome.service import Service\n",
|
||||
"from webdriver_manager.chrome import ChromeDriverManager\n",
|
||||
"from bs4 import BeautifulSoup\n",
|
||||
"import time"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "6e1e5dd3-f91a-466b-8fd4-2dbf4eedf101",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"load_dotenv(override = True)\n",
|
||||
"api_key = os.getenv(\"OPENAI_API_KEY\")\n",
|
||||
"\n",
|
||||
"if not api_key:\n",
|
||||
" print(\"No API key\")\n",
|
||||
"elif not api_key.startswith(\"sk-proj-\"):\n",
|
||||
" print(\"API key doesn't look correct, check it\")\n",
|
||||
"elif api_key.strip() != api_key:\n",
|
||||
" print(\"It looks like API key has an extra space - check it\")\n",
|
||||
"else:\n",
|
||||
" print(\"API key looks good, moving on!\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "67a6e583-1ef7-4b77-8886-c0e8c619933c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"openai = OpenAI()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "34a07806-dd68-4a86-8b6e-e1b2aaf0daa1",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# path to the CV\n",
|
||||
"path = \"/Users/yanasklar/Documents/For applying/CV/СV_YanaSklyar_c.pdf\"\n",
|
||||
"headers = {\n",
|
||||
" \"User-Agent\": \"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/139.0.0.0 Safari/537.36\"\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"class Vacancy:\n",
|
||||
" def __init__(self, url, instructions = \"\"):\n",
|
||||
" self.url = url\n",
|
||||
" \n",
|
||||
" # configure Chrome settings\n",
|
||||
" options = Options()\n",
|
||||
" # options.add_argument(\"--headless\") \n",
|
||||
" \"\"\"\n",
|
||||
" Headless mode runs the browser in the background (invisible).\n",
|
||||
" However, some websites (like openai.com) block headless browsers.\n",
|
||||
" So if this line is active, the page may not load correctly and you may not get the full content.\n",
|
||||
" \"\"\"\n",
|
||||
" options.add_argument(\"--disable-gpu\")\n",
|
||||
" options.add_argument(\"--no-sandbox\")\n",
|
||||
" options.add_argument(\"--window-size=1920x1080\")\n",
|
||||
"\n",
|
||||
" # use webdriver-manager to manage ChromeDriver\n",
|
||||
" service = Service(ChromeDriverManager().install())\n",
|
||||
" driver = webdriver.Chrome(service=service, options=options)\n",
|
||||
" driver.get(url)\n",
|
||||
" time.sleep(3) # let the page load\n",
|
||||
"\n",
|
||||
" # take the source of the page\n",
|
||||
" page_source = driver.page_source\n",
|
||||
" driver.quit()\n",
|
||||
"\n",
|
||||
" # analyse with BeautifulSoup\n",
|
||||
" soup = BeautifulSoup(page_source, 'html.parser')\n",
|
||||
"\n",
|
||||
" self.title = soup.title.string if soup.title else \"No title found\"\n",
|
||||
" for irrelevant in soup.body([\"img\", \"script\", \"style\", \"input\"]):\n",
|
||||
" irrelevant.decompose()\n",
|
||||
" self.text = soup.body.get_text(separator='\\n', strip=True)\n",
|
||||
"\n",
|
||||
" # read CV\n",
|
||||
" with open(path, 'rb') as f:\n",
|
||||
" reader = PyPDF2.PdfReader(f)\n",
|
||||
" cv_text = \"\"\n",
|
||||
" for page in reader.pages:\n",
|
||||
" text = page.extract_text()\n",
|
||||
" if text:\n",
|
||||
" cv_text += text + \"\\n\"\n",
|
||||
" self.cv_text = cv_text\n",
|
||||
"\n",
|
||||
" # summarise and print the description of the job\n",
|
||||
" message = f\"\"\"Here is the content of a webpage: {self.text}.\n",
|
||||
" Find job description on that page,\n",
|
||||
" summarise it, include the list requirements and other important details.\n",
|
||||
" \"\"\"\n",
|
||||
" messages = [{\"role\":\"user\", \"content\":message}]\n",
|
||||
" response = openai.chat.completions.create(model='gpt-4o-mini', messages = messages)\n",
|
||||
" print(\"The job description: \", response.choices[0].message.content)\n",
|
||||
"\n",
|
||||
" # create prompts\n",
|
||||
" self.system_prompt = \"\"\"You are a career assistant specializing in writing cover letter.\n",
|
||||
" Your tasks:\n",
|
||||
" 1. Read the candidate's CV (provided as text).\n",
|
||||
" 2. Read the job description (provided from a webpage).\n",
|
||||
" 3. Write a concise and compelling cover letter, that:\n",
|
||||
" - Hightlights the most relevant experience and skills from the CV,\n",
|
||||
" - Aligns directly wit the requirements in the job description,\n",
|
||||
" - Adapts to cultural and professional norms in Israel.\n",
|
||||
" The letter should be no longer than half a page, persuasive and tailored to make the applicant stand out.\n",
|
||||
" \"\"\"\n",
|
||||
"\n",
|
||||
" user_prompt = f\"\"\"\n",
|
||||
" Here is my CV:\n",
|
||||
" {self.cv_text}\n",
|
||||
" \n",
|
||||
" The job vacancy is from the website {self.title}.\n",
|
||||
" Here is the decription of the vacancy:\n",
|
||||
" {self.text}\n",
|
||||
" Please write a cover letter that connects my background to this vacancy.\n",
|
||||
" Make it persuasive and suitable for Israeli job market.\n",
|
||||
" \"\"\"\n",
|
||||
" \n",
|
||||
" if instructions:\n",
|
||||
" user_prompt += f\"Additional instructions: {instructions}\"\n",
|
||||
" self.user_prompt = user_prompt"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "9160b9f5-177b-4477-8e54-3a212f275a22",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def cover_letter(url, instructions = \"\"):\n",
|
||||
" vacancy = Vacancy(url, instructions)\n",
|
||||
" messages = [\n",
|
||||
" {\"role\":\"system\", \"content\":vacancy.system_prompt},\n",
|
||||
" {\"role\":\"user\", \"content\":vacancy.user_prompt}\n",
|
||||
" ]\n",
|
||||
" response = openai.chat.completions.create(model='gpt-4o-mini', messages=messages)\n",
|
||||
" if not response:\n",
|
||||
" print(\"smt went wrong\")\n",
|
||||
" print(response.choices[0].message.content)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "1de4b55c-a8da-445f-9865-c7a8bafdbc3c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"a = \"https://www.linkedin.com/jobs/view/4285898438/?alternateChannel=search&eBP=CwEAAAGY3R5LOabDLOVTy6xvBcSlWyAkIXQz8IRkSM3rgsqTPtvcEvUSnq980O7oLV2Hh_ldTpc2cBBmRq1IRnLtp7TzEcUvndFEXeCuviA5yo7oFYfW7KoEp4SPNzmf3D9LtnSgk9Iudy3skk6n3hVOtyDpx8Zm0AiTWPvdwCaZ_w5Xu8lAG797NRNDco71ynm99LmCOC9Go7DdDQ2eLewamc4SOsA4xWcXy0GmZVy3kBF1AprK3ylAYR2wrm5-hp4lRpbbfUxXjkEOG6H_GbPpKtN-N8mYnMd9w_cej5qQmTFX86gqSi6HuXFtK0h46TbOS5r-YQksVd1Yb4kYZnDznWXPLbxp04xVJSPzsHoa05wQdOfZ2UUSoMTJmic3n3qfV2u9Bp8n4sLYtINpzKdvm4eADGGkN-nR3O2oPeas9XjGbBwNdjXHAcX_PJoRwlFdQ1gVkYQEF1T7qAfXUJoUt-fv4oLxGnIgV6yJuMgw&refId=9NA7Bvt%2FhCqDkFNRGu1dPA%3D%3D&trackingId=W11hvpcIjHA%2FjU%2FFZ%2B1uAA%3D%3D\"\n",
|
||||
"b = \"The style of the cover letter should informal, as if i talked to a friend about my background\"\n",
|
||||
"cover_letter(a, b)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "0feb3cbe-686a-4a97-9ca3-a0cb32a24c5d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python (llms)",
|
||||
"language": "python",
|
||||
"name": "llms"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.13"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -0,0 +1,123 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "3ba06289-d17a-4ccd-85f5-2b79956d4e59",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install selenium"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "935fe7b1-1807-4f75-863d-4c118e425a19",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"pip show selenium"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "eabbbc62-1de1-4883-9b3e-9c90145ea6c5",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from selenium import webdriver\n",
|
||||
"from selenium.webdriver.edge.options import Options as EdgeOptions # Import EdgeOptions\n",
|
||||
"from selenium.webdriver.edge.service import Service as EdgeService # Import EdgeService\n",
|
||||
"from bs4 import BeautifulSoup\n",
|
||||
"import time\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"class Website:\n",
|
||||
" def __init__(self, url, driver_path=None, wait_time=3):\n",
|
||||
" self.url = url\n",
|
||||
" self.wait_time = wait_time\n",
|
||||
"\n",
|
||||
" # Headless Edge settings\n",
|
||||
" options = EdgeOptions() # Use EdgeOptions\n",
|
||||
" # options.add_argument(\"--headless\")\n",
|
||||
" options.add_argument(\"--disable-gpu\")\n",
|
||||
" options.add_argument(\"--no-sandbox\")\n",
|
||||
" options.add_argument(\"--window-size=1920x1080\")\n",
|
||||
"\n",
|
||||
" # Driver path\n",
|
||||
" if driver_path:\n",
|
||||
" # For Edge, you might need to specify the path to msedgedriver\n",
|
||||
" # For driver download, https://developer.microsoft.com/en-us/microsoft-edge/tools/webdriver/?form=MA13LH#downloads\n",
|
||||
" service = EdgeService(executable_path=driver_path) # Use EdgeService\n",
|
||||
" else:\n",
|
||||
" # If msedgedriver.exe is in your system's PATH, you can omit executable_path\n",
|
||||
" service = EdgeService()\n",
|
||||
"\n",
|
||||
" # Start browser\n",
|
||||
" # Use webdriver.Edge() for Microsoft Edge\n",
|
||||
" driver = webdriver.Edge(service=service, options=options)\n",
|
||||
" driver.get(url)\n",
|
||||
"\n",
|
||||
" # Wait for the loading page\n",
|
||||
" time.sleep(self.wait_time)\n",
|
||||
"\n",
|
||||
" # Take page source\n",
|
||||
" html = driver.page_source\n",
|
||||
" driver.quit()\n",
|
||||
"\n",
|
||||
" # Analysis with BeautifulSoup \n",
|
||||
" soup = BeautifulSoup(html, 'html.parser')\n",
|
||||
" self.title = soup.title.string if soup.title else \"No title found\"\n",
|
||||
"\n",
|
||||
" # Clean irrelevant tags\n",
|
||||
" for irrelevant in soup.body([\"script\", \"style\", \"img\", \"input\"]):\n",
|
||||
" irrelevant.decompose()\n",
|
||||
"\n",
|
||||
" self.text = soup.body.get_text(separator=\"\\n\", strip=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "852c52e2-bd4d-4bb9-94ef-e498c33f1a89",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"site = Website(\"https://openai.com\", driver_path=\"/Users/klee/Documents/edgedriver_mac64_m1/msedgedriver\")\n",
|
||||
"print(\"Title:\", site.title)\n",
|
||||
"print(\"\\nFirst 500 character:\\n\", site.text[:500])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "7620c685-c35c-4d6b-aaf1-a3da98f19ca7",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.13"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
319
week1/community-contributions/day2_exercise_using_input.ipynb
Normal file
319
week1/community-contributions/day2_exercise_using_input.ipynb
Normal file
@@ -0,0 +1,319 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d15d8294-3328-4e07-ad16-8a03e9bbfdb9",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Welcome to your first assignment!\n",
|
||||
"\n",
|
||||
"Instructions are below. Please give this a try, and look in the solutions folder if you get stuck (or feel free to ask me!)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "4e2a9393-7767-488e-a8bf-27c12dca35bd",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# imports\n",
|
||||
"\n",
|
||||
"import requests\n",
|
||||
"from bs4 import BeautifulSoup\n",
|
||||
"from IPython.display import Markdown, display"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "29ddd15d-a3c5-4f4e-a678-873f56162724",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Constants\n",
|
||||
"\n",
|
||||
"OLLAMA_API = \"http://localhost:11434/api/chat\"\n",
|
||||
"HEADERS = {\"Content-Type\": \"application/json\"}\n",
|
||||
"MODEL = \"llama3.2\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "dac0a679-599c-441f-9bf2-ddc73d35b940",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Create a messages list using the same format that we used for OpenAI\n",
|
||||
"\n",
|
||||
"messages = [\n",
|
||||
" {\"role\": \"user\", \"content\": \"Describe some of the business applications of Generative AI\"}\n",
|
||||
"]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "7bb9c624-14f0-4945-a719-8ddb64f66f47",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"payload = {\n",
|
||||
" \"model\": MODEL,\n",
|
||||
" \"messages\": messages,\n",
|
||||
" \"stream\": False\n",
|
||||
" }"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "7745b9c4-57dc-4867-9180-61fa5db55eb8",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import ollama\n",
|
||||
"\n",
|
||||
"response = ollama.chat(model=MODEL, messages=messages)\n",
|
||||
"print(response['message']['content'])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "a4704e10-f5fb-4c15-a935-f046c06fb13d",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Alternative approach - using OpenAI python library to connect to Ollama"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "23057e00-b6fc-4678-93a9-6b31cb704bff",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# There's actually an alternative approach that some people might prefer\n",
|
||||
"# You can use the OpenAI client python library to call Ollama:\n",
|
||||
"\n",
|
||||
"from openai import OpenAI\n",
|
||||
"ollama_via_openai = OpenAI(base_url='http://localhost:11434/v1', api_key='ollama')\n",
|
||||
"\n",
|
||||
"response = ollama_via_openai.chat.completions.create(\n",
|
||||
" model=MODEL,\n",
|
||||
" messages=messages\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"print(response.choices[0].message.content)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1622d9bb-5c68-4d4e-9ca4-b492c751f898",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# NOW the exercise for you\n",
|
||||
"\n",
|
||||
"Take the code from day1 and incorporate it here, to build a website summarizer that uses Llama 3.2 running locally instead of OpenAI; use either of the above approaches."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"id": "0c1f84c4-4cc0-4085-8ea5-871a8ca46a47",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# imports\n",
|
||||
"\n",
|
||||
"import ollama"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"id": "890852ab-2cd4-41dc-b168-6bd1360b967a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"MODEL = \"llama3.2\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"id": "6de38216-6d1c-48c4-877b-86d403f4e0f8",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# A class to represent a Webpage\n",
|
||||
"\n",
|
||||
"# Some websites need you to use proper headers when fetching them:\n",
|
||||
"headers = {\n",
|
||||
" \"User-Agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36\"\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"class Website:\n",
|
||||
"\n",
|
||||
" def __init__(self, url):\n",
|
||||
" \"\"\"\n",
|
||||
" Create this Website object from the given url using the BeautifulSoup library\n",
|
||||
" \"\"\"\n",
|
||||
" self.url = url\n",
|
||||
" response = requests.get(url, headers=headers)\n",
|
||||
" soup = BeautifulSoup(response.content, 'html.parser')\n",
|
||||
" self.title = soup.title.string if soup.title else \"No title found\"\n",
|
||||
" for irrelevant in soup.body([\"script\", \"style\", \"img\", \"input\"]):\n",
|
||||
" irrelevant.decompose()\n",
|
||||
" self.text = soup.body.get_text(separator=\"\\n\", strip=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"id": "9d398f9a-c66e-42b5-91b4-5417944b8408",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def user_prompt_generator(website) -> str:\n",
|
||||
" user_prompt = f\"You will act as a website summarizer with knowledge of Web Content Accessibility Guidelines. You will look into the web: {website.title} and \"\n",
|
||||
" user_prompt += \"break down the relevant information about it in this categories: What is the website about, \\\n",
|
||||
" to whom the website belongs and what practises should improve to have a better user experience. \\n\\n\"\n",
|
||||
" user_prompt += website.text\n",
|
||||
"\n",
|
||||
" return user_prompt"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 23,
|
||||
"id": "156d7c67-b714-4156-9f69-faf0c50aaf13",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def messages_generator(user_prompt : str) -> list[dict[str, str]]:\n",
|
||||
" messages = [{\"role\" : \"user\", \"content\" : user_prompt}]\n",
|
||||
"\n",
|
||||
" return messages"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 21,
|
||||
"id": "f07c4143-6cc5-4d28-846c-a373564e9264",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def user_request_reader() -> str:\n",
|
||||
" while True:\n",
|
||||
" website_url = input(\"Define what website you want to summarize by giving the url: \")\n",
|
||||
" if website_url.lower().startswith(\"http\"):\n",
|
||||
" return website_url\n",
|
||||
" print(\"URL not valid. Please provide a full url.\\n\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 19,
|
||||
"id": "94933255-2ca8-40b5-8f74-865d3e781058",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def summarizer_bot():\n",
|
||||
" website_url = user_request_reader()\n",
|
||||
" website = Website(website_url)\n",
|
||||
" \n",
|
||||
" user_prompt = user_prompt_generator(website)\n",
|
||||
" messages = messages_generator(user_prompt)\n",
|
||||
"\n",
|
||||
" response = ollama.chat(model=MODEL, messages=messages)\n",
|
||||
" print(response['message']['content'])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 24,
|
||||
"id": "2d81faa4-25b3-4d5d-8f36-93772e449b5c",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdin",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Define what website you want to summarize by giving the url: test.com\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"URL not valid. Please provide a full url.\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdin",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Define what website you want to summarize by giving the url: https://edwarddonner.com\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"**Summary:**\n",
|
||||
"\n",
|
||||
"The website \"Home - Edward Donner\" belongs to Edward Donner, a co-founder and CTO of Nebula.io, an AI startup. The website is about Edward's interests in writing code, experimenting with Large Language Models (LLMs), and DJing, as well as his work in applying AI to help people discover their potential.\n",
|
||||
"\n",
|
||||
"**Categories:**\n",
|
||||
"\n",
|
||||
"### What is the website about?\n",
|
||||
"\n",
|
||||
"The website is primarily about Edward Donner's personal brand, showcasing his expertise in AI and LLMs. It includes information about his work at Nebula.io, which applies AI to talent management. The website also features a \"Connect Four\" arena where LLMs compete against each other, as well as sections for learning more about LLMs and staying up-to-date with Edward's courses and publications.\n",
|
||||
"\n",
|
||||
"### To whom does the website belong?\n",
|
||||
"\n",
|
||||
"The website belongs to Edward Donner, a co-founder and CTO of Nebula.io. It appears to be a personal website or blog, showcasing his expertise and interests in AI and LLMs.\n",
|
||||
"\n",
|
||||
"### Practices to improve for better user experience:\n",
|
||||
"\n",
|
||||
"1. **Clearer navigation**: The website's menu is simple but not intuitive. Adding clear categories or sections would help users quickly find the information they're looking for.\n",
|
||||
"2. **More detailed about section**: The \"About\" section provides a brief overview of Edward's work and interests, but it could be more detailed and comprehensive.\n",
|
||||
"3. **Improved accessibility**: While the website is likely following general web accessibility guidelines, there are no clear indications of this on the page. Adding alt text to images, providing a clear font size and color scheme, and ensuring sufficient contrast between background and foreground would improve the user experience for people with disabilities.\n",
|
||||
"4. **Better calls-to-action (CTAs)**: The website could benefit from more prominent CTAs, guiding users towards specific actions such as signing up for courses or following Edward on social media.\n",
|
||||
"5. **SEO optimization**: The website's content and meta tags appear to be optimized for search engines, but a more thorough SEO analysis would help identify areas for improvement.\n",
|
||||
"\n",
|
||||
"Overall, the website provides a clear overview of Edward Donner's interests and expertise in AI and LLMs, but could benefit from some tweaks to improve accessibility, navigation, and CTAs.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# The call\n",
|
||||
"summarizer_bot()"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.13"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -0,0 +1,329 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "9ab446e4-219c-4589-aa8f-9386adcf5c60",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"## Project Overview\n",
|
||||
"This project combines web scraping with OpenAI’s GPT models to summarize online training content. It extracts material from Microsoft’s **Quantum Computing Fundamentals** learning path, cleans it, and generates concise summaries per lesson as well as an overall course summary. \n",
|
||||
"\n",
|
||||
"## Key Features\n",
|
||||
"- Fetches and parses webpages using **requests** and **BeautifulSoup** \n",
|
||||
"- Produces summaries in multiple languages (e.g., English, Spanish, or any language) and at varying levels of detail (short, medium, detailed) \n",
|
||||
"- Summarizes individual lessons on demand or processes entire learning paths \n",
|
||||
"- Presents results as clean, structured **Markdown** directly in the notebook \n",
|
||||
"\n",
|
||||
"## Tech Stack\n",
|
||||
"- **Model**: GPT-4o-mini \n",
|
||||
"- **Language**: Python \n",
|
||||
"- **Libraries**: BeautifulSoup, OpenAI \n",
|
||||
"\n",
|
||||
"## Purpose\n",
|
||||
"This project demonstrates how AI can streamline the understanding of technical documentation and online courses by generating multilingual, customizable summaries. \n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "4e2a9393-7767-488e-a8bf-27c12dca35bd",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# imports\n",
|
||||
"\n",
|
||||
"import os\n",
|
||||
"import requests\n",
|
||||
"from dotenv import load_dotenv\n",
|
||||
"from bs4 import BeautifulSoup\n",
|
||||
"from IPython.display import Markdown, display\n",
|
||||
"from openai import OpenAI\n",
|
||||
"\n",
|
||||
"# If you get an error running this cell, then please head over to the troubleshooting notebook!"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "7b87cadb-d513-4303-baee-a37b6f938e4d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Load environment variables from .env file (not included)\n",
|
||||
"\n",
|
||||
"load_dotenv(override=True)\n",
|
||||
"api_key = os.getenv('OPENAI_API_KEY')\n",
|
||||
"\n",
|
||||
"# Check the key\n",
|
||||
"\n",
|
||||
"if not api_key:\n",
|
||||
" print(\"No API key was found\")\n",
|
||||
"elif not api_key.startswith(\"sk-proj-\"):\n",
|
||||
" print(\"An API key was found, but it doesn't start sk-proj-; please check you're using the right key\")\n",
|
||||
"elif api_key.strip() != api_key:\n",
|
||||
" print(\"An API key was found, but it looks like it might have space or tab characters at the start or end - please remove them\")\n",
|
||||
"else:\n",
|
||||
" print(\"API key found and looks good so far!\")\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "019974d9-f3ad-4a8a-b5f9-0a3719aea2d3",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"openai = OpenAI()\n",
|
||||
"\n",
|
||||
"# If this doesn't work, try Kernel menu >> Restart Kernel and Clear Outputs Of All Cells, then run the cells from the top of this notebook down.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "c5e793b2-6775-426a-a139-4848291d0463",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# A class to represent a Webpage\n",
|
||||
"\n",
|
||||
"# Some websites need you to use proper headers when fetching them:\n",
|
||||
"headers = {\n",
|
||||
" \"User-Agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36\"\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"class Website:\n",
|
||||
"\n",
|
||||
" def __init__(self, url):\n",
|
||||
" \"\"\"\n",
|
||||
" Create this Website object from the given url using the BeautifulSoup library\n",
|
||||
" \"\"\"\n",
|
||||
" self.url = url\n",
|
||||
" response = requests.get(url, headers=headers)\n",
|
||||
" soup = BeautifulSoup(response.content, 'html.parser')\n",
|
||||
" self.title = soup.title.string if soup.title else \"No title found\"\n",
|
||||
" for irrelevant in soup.body([\"script\", \"style\", \"img\", \"input\"]):\n",
|
||||
" irrelevant.decompose()\n",
|
||||
" self.text = soup.body.get_text(separator=\"\\n\", strip=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "2ef960cf-6dc2-4cda-afb3-b38be12f4c97",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"\n",
|
||||
"\n",
|
||||
"training_website = Website(\"https://learn.microsoft.com/en-us/training/paths/quantum-computing-fundamentals/\")\n",
|
||||
"print(training_website.title)\n",
|
||||
"print(training_website.text)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "abdb8417-c5dc-44bc-9bee-2e059d162699",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Create a system prompt function that can use different language and length \n",
|
||||
"\n",
|
||||
"def build_system_prompt(language=\"Spanish\", length=\"short\"):\n",
|
||||
" return f\"\"\"You are an assistant that analyzes the contents of a website and provides a {length} summary, ignoring text that might be navigation related.\n",
|
||||
" Respond in 20 words or less markdown, and respond in {language}.\n",
|
||||
" \"\"\"\n",
|
||||
" \n",
|
||||
" \n",
|
||||
" "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "987c95a6-6618-4d22-a2c3-3038a9d3f154",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Create a function that writes a User Prompt that asks for summaries of websites:\n",
|
||||
"\n",
|
||||
"def user_prompt_for(website):\n",
|
||||
" user_prompt = f\"You are looking at a website titled {website.title}\"\n",
|
||||
" user_prompt += \"\\nThe contents of this website is as follows; \\\n",
|
||||
"please provide a short summary in {language} of this website in markdown. \\\n",
|
||||
"If it includes news or announcements, then summarize these too.\\n\\n\"\n",
|
||||
" user_prompt += website.text\n",
|
||||
" return user_prompt"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "8a846c89-81d8-4f48-9d62-7744d76694e2",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(user_prompt_for(training_website))\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "26448ec4-5c00-4204-baec-7df91d11ff2e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(user_prompt_for(training_website))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d06e8d78-ce4c-4b05-aa8e-17050c82bb47",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## And now let's build useful messages for GPT-4o-mini, using a function"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "0134dfa4-8299-48b5-b444-f2a8c3403c88",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"\n",
|
||||
"def messages_for(website, language=\"Spanish\", length=\"short\"):\n",
|
||||
" return [\n",
|
||||
" {\"role\": \"system\", \"content\": build_system_prompt(language, length)},\n",
|
||||
" {\"role\": \"user\", \"content\": user_prompt_for(website)}\n",
|
||||
" ]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "16f49d46-bf55-4c3e-928f-68fc0bf715b0",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Time to bring it together - the API for OpenAI is very simple!"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "425214b8-c5c5-4d7a-8b79-f9e151c9d54f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "905b9919-aba7-45b5-ae65-81b3d1d78e34",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#call the OpenAI API. \n",
|
||||
"\n",
|
||||
"def summarize(url, language=\"Spanish\", length=\"short\"):\n",
|
||||
" website = Website(url)\n",
|
||||
" response = openai.chat.completions.create(\n",
|
||||
" model=\"gpt-4o-mini\",\n",
|
||||
" messages=messages_for(website, language, length)\n",
|
||||
" )\n",
|
||||
" return response.choices[0].message.content\n",
|
||||
" "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "1c437357-d004-49f5-95c3-fce38aefcb5c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#Summarize all the lessons in microsoft quantum computer training, having the option to summarize by lesson, or the training as a whole\n",
|
||||
"\n",
|
||||
"def summarize_training(path_url, language=\"Spanish\", length=\"short\"):\n",
|
||||
" links = get_links_from_path(path_url)\n",
|
||||
" print(f\"Found {len(links)} lessons\")\n",
|
||||
"\n",
|
||||
" all_summaries = []\n",
|
||||
"\n",
|
||||
" for link in links:\n",
|
||||
" print(f\"Summarizing {link}...\")\n",
|
||||
" summary = summarize(link, language, length)\n",
|
||||
" all_summaries.append(f\"### {link}\\n{summary}\\n\")\n",
|
||||
"\n",
|
||||
" combined_prompt = \"Here are summaries of each lesson:\\n\\n\" + \"\\n\".join(all_summaries)\n",
|
||||
" response = openai.chat.completions.create(\n",
|
||||
" model=\"gpt-4o-mini\",\n",
|
||||
" messages=[\n",
|
||||
" {\"role\": \"system\", \"content\": build_system_prompt(language, length)},\n",
|
||||
" {\"role\": \"user\", \"content\": \"Please summarize the entire training path based on these lesson summaries:\\n\\n\" + combined_prompt}\n",
|
||||
" ]\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" return \"\\n\".join(all_summaries) + \"\\n\\n## General Course Summary\\n\" + response.choices[0].message.content\n",
|
||||
" "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "05e38d41-dfa4-4b20-9c96-c46ea75d9fb5",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"summarize(\"https://learn.microsoft.com/en-us/training/paths/quantum-computing-fundamentals/\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "3d926d59-450e-4609-92ba-2d6f244f1342",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# A function to display this nicely in the Jupyter output, using markdown\n",
|
||||
"\n",
|
||||
"def display_summary(url):\n",
|
||||
" summary = summarize(url)\n",
|
||||
" display(Markdown(summary))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "3018853a-445f-41ff-9560-d925d1774b2f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"display_summary(\"https://learn.microsoft.com/en-us/training/paths/quantum-computing-fundamentals/\")"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.13"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
337
week1/community-contributions/week-1_exercise.ipynb
Normal file
337
week1/community-contributions/week-1_exercise.ipynb
Normal file
@@ -0,0 +1,337 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "64d2e4a0",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# End of Week 1 Exercise\n",
|
||||
"\n",
|
||||
"To demonstrate your familiarity with OpenAI API, and also Ollama, build a tool that takes a technical question,\n",
|
||||
"and responds with an explanation. This is a tool that you will be able to use yourself during the course!"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "e62b915e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from openai import OpenAI\n",
|
||||
"import ollama\n",
|
||||
"from dotenv import load_dotenv\n",
|
||||
"import os\n",
|
||||
"from IPython.display import display, update_display, Markdown"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "8bdfc47a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"MODEL_GPT = 'gpt-4o-mini'\n",
|
||||
"MODEL_LLAMA = 'llama3'\n",
|
||||
"load_dotenv()\n",
|
||||
"\n",
|
||||
"api_key = os.getenv('OPENAI_API_KEY')\n",
|
||||
"\n",
|
||||
"openai=OpenAI()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "57983d03",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def create_messages(prompt=\"Describe some of the business applications of Generative AI\"):\n",
|
||||
" \"\"\"Create properly formatted messages for API calls\"\"\"\n",
|
||||
" messages = [\n",
|
||||
" {\n",
|
||||
" \"role\": \"system\",\n",
|
||||
" \"content\": \"You are a helpful technical assistant that provides clear, detailed explanations for technical questions.\"\n",
|
||||
" },\n",
|
||||
" {\"role\": \"user\", \"content\": prompt}\n",
|
||||
" ]\n",
|
||||
" return messages"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "a6bcb94d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def answer_with_openai(prompt=\"Describe some of the business applications of Generative AI\"):\n",
|
||||
" \"\"\"Get answer using OpenAI API and print in stream\"\"\"\n",
|
||||
" try:\n",
|
||||
" messages = create_messages(prompt)\n",
|
||||
" stream = openai.chat.completions.create(\n",
|
||||
" model=MODEL_GPT,\n",
|
||||
" messages=messages,\n",
|
||||
" temperature=0.7,\n",
|
||||
" stream=True\n",
|
||||
" )\n",
|
||||
" answer = \"\"\n",
|
||||
" display_handle = display(Markdown(\"\"), display_id=True)\n",
|
||||
" for chunk in stream:\n",
|
||||
" if chunk.choices[0].delta.content:\n",
|
||||
" answer += chunk.choices[0].delta.content\n",
|
||||
" # Clean up markdown formatting for display\n",
|
||||
" clean_answer = answer.replace(\"```\", \"\").replace(\"markdown\", \"\")\n",
|
||||
" update_display(Markdown(clean_answer), display_id=display_handle.display_id)\n",
|
||||
" return answer\n",
|
||||
" except Exception as e:\n",
|
||||
" return f\"Error with OpenAI: {str(e)}\"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"id": "e96159ab",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def answer_with_ollama(prompt=\"Describe some of the business applications of Generative AI\"):\n",
|
||||
" \"\"\"Get answer using Ollama API and print in stream\"\"\"\n",
|
||||
" try:\n",
|
||||
" messages = create_messages(prompt)\n",
|
||||
" stream = ollama.chat(\n",
|
||||
" model=MODEL_LLAMA,\n",
|
||||
" messages=messages,\n",
|
||||
" stream=True\n",
|
||||
" )\n",
|
||||
" answer = \"\"\n",
|
||||
" display_handle = display(Markdown(\"\"), display_id=True)\n",
|
||||
" for chunk in stream:\n",
|
||||
" if chunk['message']['content']:\n",
|
||||
" answer += chunk['message']['content']\n",
|
||||
" # Clean up markdown formatting for display\n",
|
||||
" clean_answer = answer.replace(\"```\", \"\").replace(\"markdown\", \"\")\n",
|
||||
" update_display(Markdown(clean_answer), display_id=display_handle.display_id)\n",
|
||||
" return answer\n",
|
||||
" except Exception as e:\n",
|
||||
" return f\"Error with Ollama: {str(e)}\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"id": "ab72f8b6",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def technical_qa_tool(question, use_openai=True, use_ollama=True):\n",
|
||||
" \"\"\"Main function to get technical explanations from both APIs\"\"\"\n",
|
||||
" print(f\"Question: {question}\")\n",
|
||||
" print(\"=\" * 80)\n",
|
||||
" \n",
|
||||
" if use_openai:\n",
|
||||
" print(\"\\n🤖 OpenAI Response:\")\n",
|
||||
" print(\"-\" * 40)\n",
|
||||
" answer_with_openai(question)\n",
|
||||
" \n",
|
||||
" if use_ollama:\n",
|
||||
" print(\"\\n🦙 Ollama Response:\")\n",
|
||||
" print(\"-\" * 40)\n",
|
||||
" answer_with_ollama(question)\n",
|
||||
" # display(Markdown(ollama_answer))\n",
|
||||
" \n",
|
||||
" print(\"\\n\" + \"=\" * 80)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"id": "1a6aa4a2",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Question: What is the difference between supervised and unsupervised machine learning?\n",
|
||||
"================================================================================\n",
|
||||
"\n",
|
||||
"🤖 OpenAI Response:\n",
|
||||
"----------------------------------------\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/markdown": [
|
||||
"Supervised and unsupervised machine learning are two primary categories of machine learning techniques, and they differ mainly in how they learn from data and the type of problems they are used to solve. Here’s a detailed explanation of each:\n",
|
||||
"\n",
|
||||
"### Supervised Machine Learning\n",
|
||||
"\n",
|
||||
"**Definition**: In supervised learning, the model is trained on a labeled dataset, meaning that each training example is paired with an output label. The goal is to learn a mapping from inputs (features) to the output labels.\n",
|
||||
"\n",
|
||||
"**Characteristics**:\n",
|
||||
"- **Labeled Data**: Requires a dataset that includes both the input features and the corresponding output labels.\n",
|
||||
"- **Objective**: The objective is to predict the output for new, unseen data based on the learned mapping from the training data.\n",
|
||||
"- **Common Techniques**:\n",
|
||||
" - **Regression**: For predicting continuous values (e.g., predicting house prices).\n",
|
||||
" - **Classification**: For predicting discrete labels (e.g., spam detection in emails).\n",
|
||||
"- **Examples**:\n",
|
||||
" - Predicting whether an email is spam or not based on various features (classification).\n",
|
||||
" - Forecasting sales figures based on historical sales data (regression).\n",
|
||||
"\n",
|
||||
"### Unsupervised Machine Learning\n",
|
||||
"\n",
|
||||
"**Definition**: In unsupervised learning, the model is trained on data that is not labeled, meaning that it does not have predefined output labels. The goal is to discover patterns, groupings, or structures within the data.\n",
|
||||
"\n",
|
||||
"**Characteristics**:\n",
|
||||
"- **Unlabeled Data**: Works with datasets that only have input features without any associated output labels.\n",
|
||||
"- **Objective**: The objective is to explore the data and find hidden patterns or intrinsic structures without specific guidance.\n",
|
||||
"- **Common Techniques**:\n",
|
||||
" - **Clustering**: Grouping similar data points together (e.g., customer segmentation).\n",
|
||||
" - **Dimensionality Reduction**: Reducing the number of features while retaining essential information (e.g., PCA - Principal Component Analysis).\n",
|
||||
"- **Examples**:\n",
|
||||
" - Grouping customers into segments based on purchasing behavior (clustering).\n",
|
||||
" - Reducing the dimensionality of a dataset to visualize it in two or three dimensions (dimensionality reduction).\n",
|
||||
"\n",
|
||||
"### Key Differences\n",
|
||||
"\n",
|
||||
"1. **Data Type**:\n",
|
||||
" - Supervised Learning: Requires labeled data.\n",
|
||||
" - Unsupervised Learning: Works with unlabeled data.\n",
|
||||
"\n",
|
||||
"2. **Goal**:\n",
|
||||
" - Supervised Learning: To learn a function that maps inputs to the correct outputs.\n",
|
||||
" - Unsupervised Learning: To identify patterns or groupings in the input data.\n",
|
||||
"\n",
|
||||
"3. **Applications**:\n",
|
||||
" - Supervised Learning: Typically used in scenarios where past data with known outcomes is available (e.g., fraud detection, image classification).\n",
|
||||
" - Unsupervised Learning: Used for exploratory data analysis or when the outcome is not known (e.g., market basket analysis, anomaly detection).\n",
|
||||
"\n",
|
||||
"In summary, the primary difference between supervised and unsupervised machine learning lies in the presence or absence of labeled data and the objectives of the learning process. Supervised learning aims to predict outcomes based on existing labels, while unsupervised learning seeks to identify hidden structures in data without predefined labels."
|
||||
],
|
||||
"text/plain": [
|
||||
"<IPython.core.display.Markdown object>"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"🦙 Ollama Response:\n",
|
||||
"----------------------------------------\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/markdown": [
|
||||
"In machine learning, there are two main categories: supervised and unsupervised learning. The key difference lies in the type of data used to train the model and the goal of the learning process.\n",
|
||||
"\n",
|
||||
"**Supervised Learning**\n",
|
||||
"\n",
|
||||
"In supervised learning, you have a labeled dataset that contains both input data (features) and corresponding output labels or target variables. The goal is to learn a mapping between the input data and the output labels so that the model can make accurate predictions on new, unseen data.\n",
|
||||
"\n",
|
||||
"Here are some characteristics of supervised learning:\n",
|
||||
"\n",
|
||||
"1. Labeled training data: You have a dataset with input data and corresponding output labels.\n",
|
||||
"2. Specific goal: You want to predict the output label for a given input instance.\n",
|
||||
"3. Model evaluation: You evaluate the performance of your model using metrics like accuracy, precision, recall, F1 score, etc.\n",
|
||||
"\n",
|
||||
"Examples of supervised learning tasks include:\n",
|
||||
"\n",
|
||||
"* Image classification (e.g., recognizing dogs vs. cats)\n",
|
||||
"* Sentiment analysis (e.g., determining if text is positive or negative)\n",
|
||||
"* Regression problems (e.g., predicting house prices based on features like number of bedrooms and square footage)\n",
|
||||
"\n",
|
||||
"**Unsupervised Learning**\n",
|
||||
"\n",
|
||||
"In unsupervised learning, you have an unlabeled dataset, and the goal is to discover patterns, relationships, or structure in the data without a specific target variable. This type of learning is often used for exploratory data analysis, feature selection, and dimensionality reduction.\n",
|
||||
"\n",
|
||||
"Here are some characteristics of unsupervised learning:\n",
|
||||
"\n",
|
||||
"1. Unlabeled training data: You have a dataset with only input features (no output labels).\n",
|
||||
"2. No specific goal: You want to find interesting patterns or structure in the data.\n",
|
||||
"3. Model evaluation: You evaluate the performance of your model using metrics like silhouette score, Calinski-Harabasz index, etc.\n",
|
||||
"\n",
|
||||
"Examples of unsupervised learning tasks include:\n",
|
||||
"\n",
|
||||
"* Clustering (e.g., grouping customers based on their purchase history)\n",
|
||||
"* Dimensionality reduction (e.g., reducing the number of features in a dataset while preserving important information)\n",
|
||||
"* Anomaly detection (e.g., identifying unusual behavior or outliers in financial transactions)\n",
|
||||
"\n",
|
||||
"In summary, supervised learning involves training a model to make predictions based on labeled data, whereas unsupervised learning aims to discover patterns and relationships in unlabeled data."
|
||||
],
|
||||
"text/plain": [
|
||||
"<IPython.core.display.Markdown object>"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"================================================================================\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Test the tool with a technical question\n",
|
||||
"technical_question = \"What is the difference between supervised and unsupervised machine learning?\"\n",
|
||||
"technical_qa_tool(technical_question)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "0a976ce1",
|
||||
"metadata": {
|
||||
"vscode": {
|
||||
"languageId": "plaintext"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "9b0a539e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Interactive version - uncomment to use\n",
|
||||
"# user_question = input(\"Enter your technical question: \")\n",
|
||||
"# technical_qa_tool(user_question)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.13"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
216
week1/community-contributions/youtube_video_summarize.ipynb
Normal file
216
week1/community-contributions/youtube_video_summarize.ipynb
Normal file
@@ -0,0 +1,216 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 36,
|
||||
"id": "8ca2e60d-17c0-40fc-91c6-c16915b39c06",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import re, html, json\n",
|
||||
"import requests\n",
|
||||
"from urllib.error import HTTPError\n",
|
||||
"from openai import OpenAI\n",
|
||||
"from IPython.display import Markdown, display, update_display\n",
|
||||
"from youtube_transcript_api import YouTubeTranscriptApi, NoTranscriptFound, TranscriptsDisabled, VideoUnavailable\n",
|
||||
"\n",
|
||||
"OLLAMA_API = \"http://localhost:11434/api/chat\"\n",
|
||||
"HEADERS = {\"Content-Type\": \"application/json\"}\n",
|
||||
"MODEL = \"llama3.2\"\n",
|
||||
"api_key='ollama'\n",
|
||||
"\n",
|
||||
"def yt_title_desc_transcript(url: str, lang=\"en\"):\n",
|
||||
" \"\"\"\n",
|
||||
" Returns {\"title\": str|None, \"description\": str|None, \"transcript\": str|None}.\n",
|
||||
" - Title via oEmbed (no API key).\n",
|
||||
" - Description scraped from the watch page (shortDescription).\n",
|
||||
" - Transcript via youtube-transcript-api, gracefully handling 400/disabled.\n",
|
||||
" \"\"\"\n",
|
||||
" # --- extract 11-char video id ---\n",
|
||||
" m = re.search(r\"(?:v=|/)([0-9A-Za-z_-]{11})|^([0-9A-Za-z_-]{11})$\", url)\n",
|
||||
" vid = (m.group(1) or m.group(2)) if m else None\n",
|
||||
" if not vid:\n",
|
||||
" return {\"title\": None, \"description\": None, \"transcript\": None}\n",
|
||||
"\n",
|
||||
" # --- title via oEmbed (very robust) ---\n",
|
||||
" title = None\n",
|
||||
" try:\n",
|
||||
" r = requests.get(\"https://www.youtube.com/oembed\",\n",
|
||||
" params={\"url\": f\"https://www.youtube.com/watch?v={vid}\", \"format\": \"json\"},\n",
|
||||
" timeout=10)\n",
|
||||
" if r.ok:\n",
|
||||
" title = r.json().get(\"title\")\n",
|
||||
" except Exception:\n",
|
||||
" pass\n",
|
||||
"\n",
|
||||
" # --- description from watch page (shortDescription in initial JSON) ---\n",
|
||||
" description = None\n",
|
||||
" try:\n",
|
||||
" page = requests.get(f\"https://www.youtube.com/watch?v={vid}\", timeout=10).text\n",
|
||||
" # Look for ytInitialPlayerResponse JSON\n",
|
||||
" jmatch = re.search(r\"ytInitialPlayerResponse\\s*=\\s*({.*?});\", page, re.DOTALL)\n",
|
||||
" if jmatch:\n",
|
||||
" data = json.loads(jmatch.group(1))\n",
|
||||
" desc = data.get(\"videoDetails\", {}).get(\"shortDescription\")\n",
|
||||
" if desc:\n",
|
||||
" description = html.unescape(desc)\n",
|
||||
" except Exception:\n",
|
||||
" pass\n",
|
||||
"\n",
|
||||
" # --- transcript (handle 400 cleanly) ---\n",
|
||||
" transcript_text = None\n",
|
||||
" try:\n",
|
||||
" items = YouTubeTranscriptApi.get_transcript(vid, languages=[lang])\n",
|
||||
" transcript_text = \" \".join(ch[\"text\"].strip() for ch in items if ch.get(\"text\"))\n",
|
||||
" except (NoTranscriptFound, TranscriptsDisabled, VideoUnavailable, HTTPError):\n",
|
||||
" # HTTPError covers the \"HTTP Error 400: Bad Request\" case\n",
|
||||
" transcript_text = None\n",
|
||||
" except Exception:\n",
|
||||
" transcript_text = None\n",
|
||||
"\n",
|
||||
" return {\"title\": title, \"description\": description, \"transcript\": transcript_text}\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 32,
|
||||
"id": "ad9be496-4e91-4562-90f3-54d11208da55",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"\n",
|
||||
"system_prompt = '''\n",
|
||||
"You are an assistant that generates detailed yet concise summaries of YouTube videos.\n",
|
||||
"When the user provides a title and description of a YouTube video, your task is to write a coherent, engaging, and informative summary of around 500 words.\n",
|
||||
"The summary should:\n",
|
||||
"\n",
|
||||
"Capture the main themes and key points the video likely covers.\n",
|
||||
"\n",
|
||||
"Expand on the description logically, providing context and flow.\n",
|
||||
"\n",
|
||||
"Stay neutral, factual, and clear (no personal opinions).\n",
|
||||
"\n",
|
||||
"Be self-contained so it makes sense without needing to watch the video.\n",
|
||||
"'''"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 33,
|
||||
"id": "dd4be0bc-df1f-47e0-9e03-9b734117f80a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def user_prompt(title, description):\n",
|
||||
" prompt = '''Provide me the YouTube video title and description.\\n\n",
|
||||
" I will generate a clear, engaging, and concise summary of the video content in around 500 words,\\n\n",
|
||||
" highlighting the main ideas, key points, and important details.\\n'''\n",
|
||||
" prompt += f'here is the title : {title} \\n Description : {description} '\n",
|
||||
" return prompt"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 34,
|
||||
"id": "46896ad3-db1e-448a-8a03-036b9568c69f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def stream_youtube(yt_url):\n",
|
||||
" ollama = OpenAI(base_url='http://localhost:11434/v1', api_key='ollama')\n",
|
||||
" video_metadata = yt_title_desc_transcript(yt_url)\n",
|
||||
" stream = ollama.chat.completions.create(\n",
|
||||
" model=MODEL,\n",
|
||||
" messages = [\n",
|
||||
" {\"role\":\"system\", \"content\": system_prompt},\n",
|
||||
" {\"role\":\"user\", \"content\": user_prompt(video_metadata['title'], video_metadata['description'])}\n",
|
||||
" ],\n",
|
||||
" stream=True\n",
|
||||
" \n",
|
||||
" )\n",
|
||||
" response = \"\"\n",
|
||||
" display_handle = display(Markdown(\"\"), display_id=True)\n",
|
||||
" for chunk in stream:\n",
|
||||
" response += chunk.choices[0].delta.content or ''\n",
|
||||
" response = response.replace(\"```\",\"\").replace(\"markdown\", \"\")\n",
|
||||
" update_display(Markdown(response), display_id=display_handle.display_id)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 37,
|
||||
"id": "b59f8773-c13e-4050-ad3c-b578d07ef5e7",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/markdown": [
|
||||
"Here is a summary of the YouTube video:\n",
|
||||
"\n",
|
||||
"**Monta Re: A Baul-Inspired Tribute to the Mystic Guru Shankaracharya**\n",
|
||||
"\n",
|
||||
"The music video for \"Monta Re\" by Amit Trivedi, featuring Swanand Kirkire and Amitabh Bhattacharya, is a soulful tribute to the mystic guru Shankaracharya. Set in the Bengali folk music tradition, this song brings to life the ancient tales of Shankaracharya's spiritual journey.\n",
|
||||
"\n",
|
||||
"With elegant lyrics penned by Amitabh Bhattacharya, \"Monta Re\" transports listeners to the banks of the Ganges River, where Shankaracharya wandered in search of wisdom and inner peace. The song's haunting melodies and emotive vocals evoke a sense of longing and introspection, perfectly capturing the mystic guru's spiritual essence.\n",
|
||||
"\n",
|
||||
"The music video beautifully illustrates the baul-inspired style, with intricate traditional dance movements performed by a group of energetic dancers. The choreography seamlessly blends elements of Bengal's folk heritage with modern sensibilities, making the song an engaging watch for audience members interested in Indian classical music.\n",
|
||||
"\n",
|
||||
"**Music and Lyric Credit:**\n",
|
||||
"Amit Trivedi handles the music composition, ensuring that the melody complements the song's themes without overpowering them. Amitabh Bhattacharya takes credit for the lyrics, which tell stunning stories of Shankaracharya's spiritual adventures. The song features Swanand Kirkire and Amitabh Bhattacharya as vocalists, further enriching its emotional impact.\n",
|
||||
"\n",
|
||||
"**Relevance to Bengali Culture:**\n",
|
||||
"\"Monta Re\" is a heartwarming tribute to Bengal's rich cultural heritage. Inspired by the baul traditions of the region, this song honors Shankaracharya's life and spiritual journey without diminishing his significance in modern times. By showcasing these folk roots, \"Monta Re\" provides fans with an enriching sensory experience.\n",
|
||||
"\n",
|
||||
"You can listen to \"Monta Re\" along with other T-Series music videos released by Amit Trivedi at the links provided below:\n",
|
||||
"\n",
|
||||
"- Watch \"Ankahee\"\n",
|
||||
"- Check out \"Sawaar Loon\"\n",
|
||||
"- Explore \"Zinda Hoon\"\n",
|
||||
"\n",
|
||||
"Follow the official T-SERIES YouTube channel for an ever-growing variety of original music tracks!\n",
|
||||
"\n",
|
||||
"By embracing the richness of Bengali folk traditions, \"Monta Re\" embodies a musical reflection of Shankaracharya's extraordinary journey as both spiritual guide and symbol of timeless wisdom."
|
||||
],
|
||||
"text/plain": [
|
||||
"<IPython.core.display.Markdown object>"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"stream_youtube('https://youtu.be/99NUJ1cLbBI?list=RDdJ6_aU6auZc')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "649287ca-aff8-4b59-91b7-731c007e83a7",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.13"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
Reference in New Issue
Block a user