Merge branch 'main' of github.com:ed-donner/llm_engineering
This commit is contained in:
@@ -0,0 +1,87 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "5b568f38-7a64-453d-a88c-2f132801a084",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import requests\n",
|
||||
"import ollama\n",
|
||||
"from bs4 import BeautifulSoup\n",
|
||||
"from IPython.display import Markdown, display\n",
|
||||
"\n",
|
||||
"headers = {\n",
|
||||
" \"User-Agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36\"\n",
|
||||
"}\n",
|
||||
"class Website:\n",
|
||||
"\n",
|
||||
" def __init__(self, url):\n",
|
||||
" \"\"\"\n",
|
||||
" Create this Website object from the given url using the BeautifulSoup library\n",
|
||||
" \"\"\"\n",
|
||||
" self.url = url\n",
|
||||
" response = requests.get(url, headers=headers)\n",
|
||||
" soup = BeautifulSoup(response.content, 'html.parser')\n",
|
||||
" self.title = soup.title.string if soup.title else \"No title found\"\n",
|
||||
" for irrelevant in soup.body([\"script\", \"style\", \"img\", \"input\"]):\n",
|
||||
" irrelevant.decompose()\n",
|
||||
" self.text = soup.body.get_text(separator=\"\\n\", strip=True)\n",
|
||||
" \n",
|
||||
"system_prompt = \"You are an assistant that analyzes the contents of a website \\\n",
|
||||
"and provides a short summary, ignoring text that might be navigation related. \\\n",
|
||||
"Respond in markdown.\"\n",
|
||||
"\n",
|
||||
"def user_prompt_for(website):\n",
|
||||
" user_prompt = f\"You are looking at a website titled {website.title}\"\n",
|
||||
" user_prompt += \"\\nThe contents of this website is as follows; \\\n",
|
||||
"please provide a short summary of this website in markdown. \\\n",
|
||||
"If it includes news or announcements, then summarize these too.\\n\\n\"\n",
|
||||
" user_prompt += website.text\n",
|
||||
" return user_prompt\n",
|
||||
"\t\n",
|
||||
"def messages_for(website):\n",
|
||||
" return [\n",
|
||||
" {\"role\": \"system\", \"content\": system_prompt},\n",
|
||||
" {\"role\": \"user\", \"content\": user_prompt_for(website)}\n",
|
||||
" ]\n",
|
||||
"\t\n",
|
||||
"def summarize(url):\n",
|
||||
" website = Website(url)\n",
|
||||
" response = ollama.chat(\n",
|
||||
" model = \"llama3.2\",\n",
|
||||
" messages = messages_for(website)\n",
|
||||
" )\n",
|
||||
" return response['message']['content']\n",
|
||||
"\t\n",
|
||||
"def display_summary(url):\n",
|
||||
" summary = summarize(url)\n",
|
||||
" display(Markdown(summary))\n",
|
||||
"\t\n",
|
||||
"display_summary(\"http://news.google.com/\")"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
76
week1/community-contributions/ag-w1d1-site-summary.py
Normal file
76
week1/community-contributions/ag-w1d1-site-summary.py
Normal file
@@ -0,0 +1,76 @@
|
||||
import os
|
||||
import requests
|
||||
from dotenv import load_dotenv
|
||||
from bs4 import BeautifulSoup
|
||||
from IPython.display import Markdown, display
|
||||
from openai import OpenAI
|
||||
|
||||
#Function to get API key for OpanAI from .env file
|
||||
def get_api_key():
|
||||
load_dotenv(override=True)
|
||||
api_key = os.getenv("OPENAI_API_KEY")
|
||||
if not api_key:
|
||||
print("No API Key found")
|
||||
elif not api_key.startswith("sk-"):
|
||||
print("Invalid API Key. Should start with sk-")
|
||||
elif api_key.strip() != api_key:
|
||||
print("Remove leading and trailing spaces fron the key")
|
||||
else:
|
||||
print("API Key found and looks good!")
|
||||
return api_key
|
||||
|
||||
#load API key and OpenAI class
|
||||
api_key = get_api_key()
|
||||
openai = OpenAI()
|
||||
|
||||
#headers and class for website to summarize
|
||||
headers = {
|
||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
|
||||
}
|
||||
class Website:
|
||||
def __init__(self, url):
|
||||
self.url = url
|
||||
response = requests.get(url, headers=headers)
|
||||
soup = BeautifulSoup(response.content, 'html.parser')
|
||||
self.title = soup.title.string if soup.title else "No title found"
|
||||
for irrelevant in soup.body(["script", "style", "img", "input"]):
|
||||
irrelevant.decompose()
|
||||
self.text = soup.body.get_text(separator="\n", strip=True)
|
||||
|
||||
#define prompts
|
||||
system_prompt = "You are an assistant that analyzes the contents of a website \
|
||||
and provides a short summary, ignoring text that might be navigation related. \
|
||||
Respond in markdown."
|
||||
|
||||
def user_prompt_for(website):
|
||||
user_prompt = f"You are looking at a website titled {website.title}"
|
||||
user_prompt += "\nThe contents of this website is as follows; \
|
||||
please provide a short summary of this website in markdown. \
|
||||
If it includes news or announcements, then summarize these too.\n\n"
|
||||
user_prompt += website.text
|
||||
return user_prompt
|
||||
|
||||
#prepare message for use in OpenAI call
|
||||
def messages_for(website):
|
||||
return [
|
||||
{"role": "system", "content": system_prompt},
|
||||
{"role": "user", "content": user_prompt_for(website)}
|
||||
]
|
||||
|
||||
#define function to summarize a given website
|
||||
def summarize(url):
|
||||
website = Website(url)
|
||||
response = openai.chat.completions.create(
|
||||
model = "gpt-4o-mini",
|
||||
messages = messages_for(website)
|
||||
)
|
||||
return response.choices[0].message.content
|
||||
|
||||
#function to display summary in markdown format
|
||||
def display_summary(url):
|
||||
summary = summarize(url)
|
||||
display(Markdown(summary))
|
||||
print(summary)
|
||||
|
||||
url = "https://edwarddonner.com"
|
||||
display_summary(url)
|
||||
BIN
week1/community-contributions/datasheets/part_new.pdf
Normal file
BIN
week1/community-contributions/datasheets/part_new.pdf
Normal file
Binary file not shown.
BIN
week1/community-contributions/datasheets/part_old.pdf
Normal file
BIN
week1/community-contributions/datasheets/part_old.pdf
Normal file
Binary file not shown.
233
week1/community-contributions/day-1-travel-recommendation.ipynb
Normal file
233
week1/community-contributions/day-1-travel-recommendation.ipynb
Normal file
@@ -0,0 +1,233 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "50ed5733",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# imports\n",
|
||||
"\n",
|
||||
"import os\n",
|
||||
"import requests\n",
|
||||
"from dotenv import load_dotenv\n",
|
||||
"from bs4 import BeautifulSoup\n",
|
||||
"from IPython.display import Markdown, display\n",
|
||||
"from openai import OpenAI\n",
|
||||
"\n",
|
||||
"# If you get an error running this cell, then please head over to the troubleshooting notebook!"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "a3b173a9",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"API key found and looks good so far!\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Load environment variables in a file called .env\n",
|
||||
"\n",
|
||||
"load_dotenv(override=True)\n",
|
||||
"api_key = os.getenv('OPENAI_API_KEY')\n",
|
||||
"\n",
|
||||
"# Check the key\n",
|
||||
"\n",
|
||||
"if not api_key:\n",
|
||||
" print(\"No API key was found - please head over to the troubleshooting notebook in this folder to identify & fix!\")\n",
|
||||
"elif not api_key.startswith(\"sk-proj-\"):\n",
|
||||
" print(\"An API key was found, but it doesn't start sk-proj-; please check you're using the right key - see troubleshooting notebook\")\n",
|
||||
"elif api_key.strip() != api_key:\n",
|
||||
" print(\"An API key was found, but it looks like it might have space or tab characters at the start or end - please remove them - see troubleshooting notebook\")\n",
|
||||
"else:\n",
|
||||
" print(\"API key found and looks good so far!\")\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "191c7214",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"openai = OpenAI()\n",
|
||||
"\n",
|
||||
"# If this doesn't work, try Kernel menu >> Restart Kernel and Clear Outputs Of All Cells, then run the cells from the top of this notebook down.\n",
|
||||
"# If it STILL doesn't work (horrors!) then please see the Troubleshooting notebook in this folder for full instructions"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "50adea39",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"system_prompt = \"\"\"Generate a detailed travel recommendation.Include the following information: \\\n",
|
||||
" 1.**Overview**: A brief introduction to the destination, highlighting its unique characteristics and appeal.\\\n",
|
||||
" 2.**Cost Breakdown**: - Average cost of accommodation (budget, mid-range, luxury options).\\\n",
|
||||
" - Estimated daily expenses (food, transportation, activities).\\\n",
|
||||
" - Total estimated cost for a typical 5-day trip for a solo traveler and a family of four.\\\n",
|
||||
" 3.**Best Time to Visit**: \\\n",
|
||||
" - Identify the peak, shoulder, and off-peak seasons.\\\n",
|
||||
" - Highlight the pros and cons of visiting during each season, including weather conditions and local events.\\\n",
|
||||
" 4.**Hidden Gems**: - List at least five lesser-known attractions or experiences that are must-sees.\\\n",
|
||||
" - Provide a brief description of each hidden gem, including why it is special and any tips for visiting.\\\n",
|
||||
" 5.**Local Tips**: \\\n",
|
||||
" - Suggest local customs or etiquette that travelers should be aware of.\\\n",
|
||||
" - Recommend local dishes to try and where to find them.Make sure the recommendation is engaging and informative, appealing to a diverse range of travelers.\"\"\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "aaac13d8",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def messages_for(user_prompt):\n",
|
||||
" return [\n",
|
||||
" {\"role\": \"system\", \"content\": system_prompt},\n",
|
||||
" {\"role\": \"user\", \"content\": user_prompt }\n",
|
||||
" ]\n",
|
||||
"\n",
|
||||
"def recommender():\n",
|
||||
" response = openai.chat.completions.create(\n",
|
||||
" model = \"gpt-4o-mini\",\n",
|
||||
" messages = messages_for(f\"Create a travel recommendation for couple in the Netherlands\")\n",
|
||||
" )\n",
|
||||
" return response.choices[0].message.content"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "efad902a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def display_result():\n",
|
||||
" recommendendation = recommender()\n",
|
||||
" display(Markdown(recommendendation))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "5564c22c",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/markdown": [
|
||||
"### Travel Recommendation: Exploring the Netherlands as a Couple\n",
|
||||
"\n",
|
||||
"#### Overview \n",
|
||||
"The Netherlands, with its charming canals, stunning tulip fields, and vibrant cities, is a romantic destination for couples seeking beauty and culture. Whether you're wandering hand-in-hand through the cobbled streets of Amsterdam, enjoying a serene boat ride in Giethoorn, or indulging in local delicacies at quaint cafés, the Netherlands combines history, art, and picturesque landscapes in a unique blend. The country not only boasts iconic landmarks such as the Rijksmuseum and the Anne Frank House but also an extensive network of cycling paths that allow you to discover hidden treasures together.\n",
|
||||
"\n",
|
||||
"#### Cost Breakdown \n",
|
||||
"- **Accommodation**:\n",
|
||||
" - **Budget**: €50-€100 per night (Hostels like Stayokay Amsterdam Stadsdoelen)\n",
|
||||
" - **Mid-Range**: €100-€200 per night (Hotels such as Hotel Estheréa in Amsterdam)\n",
|
||||
" - **Luxury**: €200-€500+ per night (Luxury options like The Dylan in Amsterdam)\n",
|
||||
"\n",
|
||||
"- **Estimated Daily Expenses**:\n",
|
||||
" - **Food**: €30-€70 per person (Cafés and local restaurants)\n",
|
||||
" - **Transportation**: €10-€20 per person (Train or bike rental)\n",
|
||||
" - **Activities**: €15-€50 per person (Entry to museums, parks, and attractions)\n",
|
||||
" \n",
|
||||
"- **Total Estimated Cost**:\n",
|
||||
" - **Solo Traveler (5-day trip)**: Approx. €500-€1,250\n",
|
||||
" - Accommodation: €250-€1,000\n",
|
||||
" - Daily expenses: €250-€500\n",
|
||||
" \n",
|
||||
" - **Family of Four (5-day trip)**: Approx. €1,800-€3,500\n",
|
||||
" - Accommodation: €500-€1,500\n",
|
||||
" - Daily expenses: €1,300-€2,000\n",
|
||||
"\n",
|
||||
"#### Best Time to Visit\n",
|
||||
"- **Peak Season (June-August)**:\n",
|
||||
" - **Pros**: Warm weather, lively festivals, and vibrant outdoor activities.\n",
|
||||
" - **Cons**: Crowded tourist spots and higher prices.\n",
|
||||
" \n",
|
||||
"- **Shoulder Season (April-May & September-October)**:\n",
|
||||
" - **Pros**: Mild weather, stunning tulip blooms (April), fewer crowds, and lower prices.\n",
|
||||
" - **Cons**: Possible rain and some attractions may have reduced hours.\n",
|
||||
" \n",
|
||||
"- **Off-Peak Season (November-March)**:\n",
|
||||
" - **Pros**: Lower prices, festive holiday vibes, fewer tourists.\n",
|
||||
" - **Cons**: Cold and wet weather which might limit outdoor activities.\n",
|
||||
"\n",
|
||||
"#### Hidden Gems\n",
|
||||
"1. **Giethoorn**: Often called the \"Venice of the North,\" Giethoorn is a picturesque village without roads. Rent a \"whisper boat\" for a serene experience gliding through the canals and enjoy the quaint thatched-roof houses.\n",
|
||||
"\n",
|
||||
"2. **Zaanse Schans**: Located near Amsterdam, this charming neighborhood showcases traditional Dutch windmills, wooden houses, and artisan workshops. Spend a day wandering and even tour a functioning windmill.\n",
|
||||
"\n",
|
||||
"3. **Haarlem**: Only 15 minutes from Amsterdam, Haarlem is a historic city with stunning architecture, cozy cafés, and the impressive Frans Hals Museum that houses works from the Dutch Golden Age.\n",
|
||||
"\n",
|
||||
"4. **Edam**: Famous for its cheese, the lovely town of Edam invites you to taste samples at local markets and explore cobbled streets lined with historical buildings. Don’t miss the Edam Museum for a taste of local history.\n",
|
||||
"\n",
|
||||
"5. **Kinderdijk**: A UNESCO World Heritage site known for its iconic windmills, Kinderdijk offers a scenic bike ride and walking trails amidst the charming countryside. Visiting at sunset can be particularly romantic.\n",
|
||||
"\n",
|
||||
"#### Local Tips\n",
|
||||
"- **Customs and Etiquette**: The Dutch are known for being direct but polite. Keep conversations respectful and avoid raising your voice. It’s customary to greet people with a handshake or a friendly smile.\n",
|
||||
"\n",
|
||||
"- **Local Dishes to Try**: \n",
|
||||
" - **Stroopwafels**: A beloved Dutch treat; find them fresh from markets.\n",
|
||||
" - **Haring**: Raw herring fish served with onions and pickles; try it at local fish stalls in Amsterdam.\n",
|
||||
" - **Bitterballen**: A popular Dutch snack; pair them with a local beer at a cozy café.\n",
|
||||
" - **Poffertjes**: Small fluffy pancakes, perfect for sharing as a dessert or snack; find them at street vendors or markets.\n",
|
||||
" \n",
|
||||
"By choosing the Netherlands as your travel destination, you will immerse yourselves in a tapestry of art, history, and picturesque landscapes while creating unforgettable memories. Happy travels!"
|
||||
],
|
||||
"text/plain": [
|
||||
"<IPython.core.display.Markdown object>"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"display_result()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "c66b461d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "llms",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
132
week1/community-contributions/day1 email checker.ipynb
Normal file
132
week1/community-contributions/day1 email checker.ipynb
Normal file
@@ -0,0 +1,132 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "82b3f7d7-a628-4824-b0b5-26c78b833b7f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# imports\n",
|
||||
"\n",
|
||||
"import os\n",
|
||||
"import requests\n",
|
||||
"from dotenv import load_dotenv\n",
|
||||
"from bs4 import BeautifulSoup\n",
|
||||
"from IPython.display import Markdown, display\n",
|
||||
"from openai import OpenAI\n",
|
||||
"\n",
|
||||
"# If you get an error running this cell, then please head over to the troubleshooting notebook!"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "7bb45eea-2ae0-4550-a9c8-fb42ff6a5f55",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Load environment variables in a file called .env\n",
|
||||
"\n",
|
||||
"load_dotenv(override=True)\n",
|
||||
"api_key = os.getenv('OPENAI_API_KEY')\n",
|
||||
"\n",
|
||||
"# Check the key\n",
|
||||
"\n",
|
||||
"if not api_key:\n",
|
||||
" print(\"No API key was found - please head over to the troubleshooting notebook in this folder to identify & fix!\")\n",
|
||||
"elif not api_key.startswith(\"sk-proj-\"):\n",
|
||||
" print(\"An API key was found, but it doesn't start sk-proj-; please check you're using the right key - see troubleshooting notebook\")\n",
|
||||
"elif api_key.strip() != api_key:\n",
|
||||
" print(\"An API key was found, but it looks like it might have space or tab characters at the start or end - please remove them - see troubleshooting notebook\")\n",
|
||||
"else:\n",
|
||||
" print(\"API key looks good!\")\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "a10c24ce-c334-4424-8a2d-ae79ad3eb393",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"openai = OpenAI()\n",
|
||||
"\n",
|
||||
"# working on assumption that this is OK"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "d9dff1ca-4e0a-44ca-acd6-0bc4004ffc3c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Step 1: Create your prompts\n",
|
||||
"# As you can probably tell I am a University lecturer who deals with some dreadful assessment submissions and have to email students telling them why they got the marks they did.\n",
|
||||
"# This AI Assisstemt would help immensely as we could write what we want to say and let the reviewer fix it for us !\n",
|
||||
"# It is based on the day1_email_reviewer notebook\n",
|
||||
"# MV\n",
|
||||
"\n",
|
||||
"system_prompt = \"You are an AI email reviewer that checks the content of emails sent out to higher education under graduate students. You must identify the meaning of the context in the text given, run a spell check in UK English and provide the subject line and email and rewrite to make the email more professional. and in the end of text, please provide the tone info.\"\n",
|
||||
"user_prompt = \"\"\"\n",
|
||||
" Dear John,\n",
|
||||
"You asked for the reasons why you received the marks that you did in your recently submitted assessment. I have looked over your submission again, bear in mind the fact that you are only 1 student out of a cohort of over 350 and have nagged me for a quick response, and your work was awful.\n",
|
||||
"You submitted work of an appalling standard and obvously did not actually put much work into your submission, you were givben the chance to have feedback on what you were going to submit but you could not be bothered to get this feedback.\n",
|
||||
"You did not bother to turn up to many of the lessons then submitted work with the most basic errors that anyone who had put the right level of effort into their studies would have been able to identify easily and not had such a low mark when they submitted. I think I put more work into marking this rubbish than you did in writing it.\n",
|
||||
"\n",
|
||||
"Best regards,\n",
|
||||
"Dr Doe\n",
|
||||
"\"\"\"\n",
|
||||
"\n",
|
||||
"# Step 2: Make the messages list\n",
|
||||
"\n",
|
||||
"messages = [\n",
|
||||
" {\"role\":\"system\", \"content\": system_prompt},\n",
|
||||
" {\"role\":\"user\", \"content\": user_prompt}\n",
|
||||
" \n",
|
||||
"] # fill this in\n",
|
||||
"\n",
|
||||
"# Step 3: Call OpenAI\n",
|
||||
"\n",
|
||||
"response = openai.chat.completions.create(\n",
|
||||
" model=\"gpt-4o-mini\",\n",
|
||||
" messages=messages\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# Step 4: print the result\n",
|
||||
"\n",
|
||||
"display(Markdown(response.choices[0].message.content))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "5a65b65f-4b3f-41f5-894a-0f8e81f0ba27",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -0,0 +1,234 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "4e2a9393-7767-488e-a8bf-27c12dca35bd",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# imports\n",
|
||||
"\n",
|
||||
"import os\n",
|
||||
"import requests\n",
|
||||
"from dotenv import load_dotenv\n",
|
||||
"from bs4 import BeautifulSoup\n",
|
||||
"from IPython.display import Markdown, display\n",
|
||||
"from openai import OpenAI\n",
|
||||
"\n",
|
||||
"# If you get an error running this cell, then please head over to the troubleshooting notebook!"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "7b87cadb-d513-4303-baee-a37b6f938e4d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Load environment variables in a file called .env\n",
|
||||
"\n",
|
||||
"load_dotenv(override=True)\n",
|
||||
"api_key = os.getenv('OPENAI_API_KEY')\n",
|
||||
"\n",
|
||||
"# Check the key\n",
|
||||
"\n",
|
||||
"if not api_key:\n",
|
||||
" print(\"No API key was found - please head over to the troubleshooting notebook in this folder to identify & fix!\")\n",
|
||||
"elif not api_key.startswith(\"sk-proj-\"):\n",
|
||||
" print(\"An API key was found, but it doesn't start sk-proj-; please check you're using the right key - see troubleshooting notebook\")\n",
|
||||
"elif api_key.strip() != api_key:\n",
|
||||
" print(\"An API key was found, but it looks like it might have space or tab characters at the start or end - please remove them - see troubleshooting notebook\")\n",
|
||||
"else:\n",
|
||||
" print(\"API key found and looks good so far!\")\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "019974d9-f3ad-4a8a-b5f9-0a3719aea2d3",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"openai = OpenAI()\n",
|
||||
"\n",
|
||||
"# If this doesn't work, try Kernel menu >> Restart Kernel and Clear Outputs Of All Cells, then run the cells from the top of this notebook down.\n",
|
||||
"# If it STILL doesn't work (horrors!) then please see the Troubleshooting notebook in this folder for full instructions"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "abdb8417-c5dc-44bc-9bee-2e059d162699",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Define our system prompt - you can experiment with this later, changing the last sentence to 'Respond in markdown in Spanish.\"\n",
|
||||
"\n",
|
||||
"system_prompt = \"You are a high profile professional resume analyst and assist users with highlighting gaps in a very formed resume and provide direction to make the resume eye catching to the recruiters \\\n",
|
||||
"and employers.\"\n",
|
||||
"\n",
|
||||
"user_prompt = \"\"\"Analyze the resume details to do the following: \\\n",
|
||||
"1. Assess the resume to highlight areas of improvement. \\ \n",
|
||||
"2. Create a well formed resume.\n",
|
||||
"\n",
|
||||
"Name: Sam Burns\n",
|
||||
"\n",
|
||||
"PROFESSIONAL SUMMARY\n",
|
||||
"Experienced Data and AI Architect with over 10 years of expertise designing scalable data platforms, integrating cloud-native solutions, and deploying AI/ML systems across enterprise environments. Proven track record of aligning data architecture with business strategy, leading cross-functional teams, and delivering high-impact AI-driven insights.\n",
|
||||
"\n",
|
||||
"CORE SKILLS\n",
|
||||
"\n",
|
||||
"Data Architecture: Lakehouse, Data Mesh, Delta Lake, Data Vault\n",
|
||||
"\n",
|
||||
"Cloud Platforms: Azure (Data Factory, Synapse, ML Studio), AWS (S3, Glue, SageMaker), Databricks\n",
|
||||
"\n",
|
||||
"Big Data & Streaming: Spark, Kafka, Hive, Hadoop\n",
|
||||
"\n",
|
||||
"ML/AI Tooling: MLflow, TensorFlow, Scikit-learn, Hugging Face Transformers\n",
|
||||
"\n",
|
||||
"Programming: Python, SQL, PySpark, Scala, Terraform\n",
|
||||
"\n",
|
||||
"DevOps: CI/CD (GitHub Actions, Azure DevOps), Docker, Kubernetes\n",
|
||||
"\n",
|
||||
"Governance: Data Lineage, Cataloging, RBAC, GDPR, Responsible AI\n",
|
||||
"\n",
|
||||
"PROFESSIONAL EXPERIENCE\n",
|
||||
"\n",
|
||||
"Senior Data & AI Architect\n",
|
||||
"ABC Tech Solutions — New York, NY\n",
|
||||
"Jan 2021 – Present\n",
|
||||
"\n",
|
||||
"Designed and implemented a company-wide lakehouse architecture on Databricks, integrating AWS S3, Redshift, and real-time ingestion from Kafka.\n",
|
||||
"\n",
|
||||
"Led architecture for a predictive maintenance platform using sensor data (IoT), Spark streaming, and MLflow-managed experiments.\n",
|
||||
"\n",
|
||||
"Developed enterprise ML governance framework ensuring reproducibility, fairness, and compliance with GDPR.\n",
|
||||
"\n",
|
||||
"Mentored 6 data engineers and ML engineers; led architectural reviews and technical roadmap planning.\n",
|
||||
"\n",
|
||||
"Data Architect / AI Specialist\n",
|
||||
"Global Insights Inc. — Boston, MA\n",
|
||||
"Jun 2017 – Dec 2020\n",
|
||||
"\n",
|
||||
"Modernized legacy data warehouse to Azure Synapse-based analytics platform, reducing ETL latency by 40%.\n",
|
||||
"\n",
|
||||
"Built MLOps pipelines for customer churn prediction models using Azure ML and ADF.\n",
|
||||
"\n",
|
||||
"Collaborated with business units to define semantic layers for self-service analytics in Power BI.\n",
|
||||
"\n",
|
||||
"Data Engineer\n",
|
||||
"NextGen Analytics — Remote\n",
|
||||
"Jul 2013 – May 2017\n",
|
||||
"\n",
|
||||
"Developed ETL pipelines in PySpark to transform raw web traffic into structured analytics dashboards.\n",
|
||||
"\n",
|
||||
"Integrated NLP models into customer support workflows using spaCy and early versions of Hugging Face.\n",
|
||||
"\n",
|
||||
"Contributed to open-source tools for Jupyter-based analytics and data catalog integration.\n",
|
||||
"\n",
|
||||
"EDUCATION\n",
|
||||
"M.S. in Computer Science – Carnegie Mellon University\n",
|
||||
"B.S. in Information Systems – Rutgers University\n",
|
||||
"\n",
|
||||
"CERTIFICATIONS\n",
|
||||
"\n",
|
||||
"Databricks Certified Data Engineer Professional\n",
|
||||
"\n",
|
||||
"Azure Solutions Architect Expert\n",
|
||||
"\n",
|
||||
"AWS Certified Machine Learning – Specialty\n",
|
||||
"\n",
|
||||
"PROJECTS & CONTRIBUTIONS\n",
|
||||
"\n",
|
||||
"llm_engineering (GitHub): Developed and maintained hands-on LLM course materials and community contributions framework.\n",
|
||||
"\n",
|
||||
"Real-time AI PoC: Designed Kafka-Spark pipeline with Azure OpenAI Service for anomaly detection on IoT streams.\n",
|
||||
"\n",
|
||||
"Contributor to Hugging Face Transformers – integration examples for inference pipelines\n",
|
||||
"\"\"\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "4e6a8730-c3ad-4243-a045-0acba2b5ebcf",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"messages = [\n",
|
||||
" {\"role\": \"system\", \"content\": system_prompt},\n",
|
||||
" {\"role\": \"user\", \"content\": user_prompt}\n",
|
||||
"]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "21ed95c5-7001-47de-a36d-1d6673b403ce",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# To give you a preview -- calling OpenAI with system and user messages:\n",
|
||||
"\n",
|
||||
"response = openai.chat.completions.create(model=\"gpt-4o-mini\", messages=messages)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "3d926d59-450e-4609-92ba-2d6f244f1342",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# A function to display this nicely in the Jupyter output, using markdown\n",
|
||||
"\n",
|
||||
"display(Markdown(response.choices[0].message.content))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "eeab24dc-5f90-4570-b542-b0585aca3eb6",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Sharing your code\n",
|
||||
"\n",
|
||||
"I'd love it if you share your code afterwards so I can share it with others! You'll notice that some students have already made changes (including a Selenium implementation) which you will find in the community-contributions folder. If you'd like add your changes to that folder, submit a Pull Request with your new versions in that folder and I'll merge your changes.\n",
|
||||
"\n",
|
||||
"If you're not an expert with git (and I am not!) then GPT has given some nice instructions on how to submit a Pull Request. It's a bit of an involved process, but once you've done it once it's pretty clear. As a pro-tip: it's best if you clear the outputs of your Jupyter notebooks (Edit >> Clean outputs of all cells, and then Save) for clean notebooks.\n",
|
||||
"\n",
|
||||
"Here are good instructions courtesy of an AI friend: \n",
|
||||
"https://chatgpt.com/share/677a9cb5-c64c-8012-99e0-e06e88afd293"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "f4484fcf-8b39-4c3f-9674-37970ed71988",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -0,0 +1,60 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "2c4ce468",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# imports\n",
|
||||
"from openai import OpenAI\n",
|
||||
"\n",
|
||||
"openai = OpenAI(base_url='http://localhost:11434/v1', api_key='ollama')\n",
|
||||
"\n",
|
||||
"# Step 1: Create your prompts\n",
|
||||
"\n",
|
||||
"system_prompt = \"You are a sports journalist.\"\n",
|
||||
"user_prompt = \"\"\"\n",
|
||||
" Write a sports article in less than 500 words describing the FIFA World Cup Final 2022.\n",
|
||||
"\"\"\"\n",
|
||||
"\n",
|
||||
"# Step 2: Make the messages list\n",
|
||||
"\n",
|
||||
"messages = [\n",
|
||||
" {\"role\": \"system\", \"content\": system_prompt},\n",
|
||||
" {\"role\": \"user\", \"content\": user_prompt}\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"# Step 3: Call OpenAI\n",
|
||||
"\n",
|
||||
"response = openai.chat.completions.create(model=\"llama3.2\", messages=messages)\n",
|
||||
"\n",
|
||||
"# Step 4: print the result\n",
|
||||
"\n",
|
||||
"print(response.choices[0].message.content)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "llms",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.12.10"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
180
week1/community-contributions/day1_fitness_fun.ipynb
Normal file
180
week1/community-contributions/day1_fitness_fun.ipynb
Normal file
@@ -0,0 +1,180 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "638074cc-212f-4d03-8518-ad6b3233d6ca",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d15d8294-3328-4e07-ad16-8a03e9bbfdb9",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Some Fitness Fun\n",
|
||||
"\n",
|
||||
"## Let's Get Pumped!\n",
|
||||
"\n",
|
||||
"Since I'm inteerested in fitness as well as software engineering, I decided to have a little fun with this\n",
|
||||
"based on an old SNL skit.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "15144b50-99e3-479f-8247-b79e0fcdba76",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"API key found and looks good so far!\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"load_dotenv(override=True)\n",
|
||||
"api_key = os.getenv('OPENAI_API_KEY')\n",
|
||||
"\n",
|
||||
"# Check the key\n",
|
||||
"\n",
|
||||
"if not api_key:\n",
|
||||
" print(\"No API key was found - please head over to the troubleshooting notebook in this folder to identify & fix!\")\n",
|
||||
"elif not api_key.startswith(\"sk-proj-\"):\n",
|
||||
" print(\"An API key was found, but it doesn't start sk-proj-; please check you're using the right key - see troubleshooting notebook\")\n",
|
||||
"elif api_key.strip() != api_key:\n",
|
||||
" print(\"An API key was found, but it looks like it might have space or tab characters at the start or end - please remove them - see troubleshooting notebook\")\n",
|
||||
"else:\n",
|
||||
" print(\"API key found and looks good so far!\")\n",
|
||||
"\n",
|
||||
"openai = OpenAI()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "4e2a9393-7767-488e-a8bf-27c12dca35bd",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# imports\n",
|
||||
"\n",
|
||||
"import os\n",
|
||||
"import requests\n",
|
||||
"from dotenv import load_dotenv\n",
|
||||
"from bs4 import BeautifulSoup\n",
|
||||
"from IPython.display import Markdown, display\n",
|
||||
"from openai import OpenAI\n",
|
||||
"\n",
|
||||
"# If you get an error running this cell, then please head over to the troubleshooting notebook!"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "00743dac-0e70-45b7-879a-d7293a6f68a6",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/markdown": [
|
||||
"# Hey Arnold, Time to Get Those \"Goals\" Sorted Out! 💪\n",
|
||||
"\n",
|
||||
"Well, well, well! Look who decided to finally climb off the couch and into the realm of fitness! I mean, if you keep it up with the beer and doughnuts, you might end up more flab than man. Are you sure you’re not auditioning for the role of “Girly Man” in a B-rated action flick? \n",
|
||||
"\n",
|
||||
"## Here’s the Game Plan: \n",
|
||||
"\n",
|
||||
"### 1. **Ditch the Doughnuts (and the beer, and the pasta...)**\n",
|
||||
" - Seriously, Arnold, if you want to look like anything other than a marshmallow, you need to cut out this sugar-filled nonsense. Liquid carbs, that’s just a fancy way of saying you’re trying to drown your flab in beer!\n",
|
||||
"\n",
|
||||
"### 2. **Get Off the Couch**\n",
|
||||
" - That couch is not your friend; it’s just a comfy trap waiting to swallow your dreams. Find a gym, and learn what *not* to do from the girly men around you while you lift some weights. Spoiler alert: they probably will lift more than you do!\n",
|
||||
"\n",
|
||||
"### 3. **Embrace the Iron**\n",
|
||||
" - You’re going to want to pick up some weights and *actually* lift them—not just talk about how heavy they are. Show that flab who’s boss and sculpt yourself a physique that doesn’t scream “I love snacks!”\n",
|
||||
"\n",
|
||||
"### 4. **Train Like You Mean It**\n",
|
||||
" - Start with a solid workout routine. Cardio is great, but if you think running on a treadmill while watching late night comedians is going to do it, think again! Train hard or go home, buddy!\n",
|
||||
"\n",
|
||||
"### 5. **Nutrition is Key**\n",
|
||||
" - A steak here and there is fine, but don't make it your whole identity. Toss in some vegetables, lean proteins, and *gasp* maybe squeeze in a salad! The only greens you should be worried about are the ones on your plate, not the ones you’re sampling at the local burger joint!\n",
|
||||
"\n",
|
||||
"### 6. **Set Real Goals**\n",
|
||||
" - Lastly, figure out what you actually want. Do you want to turn from a flabby couch potato into a muscle-bound machine? Or do you want to stay an eternal “girly man”? Because we can make you into a beast, but you’ve got to want it!\n",
|
||||
"\n",
|
||||
"---\n",
|
||||
"\n",
|
||||
"So, are you ready to say “hasta la vista” to your old lifestyle? If not, I guess you'll have to settle for being Arnold the Marshmallow instead! Let's get to work! 💪😎"
|
||||
],
|
||||
"text/plain": [
|
||||
"<IPython.core.display.Markdown object>"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Step 1: Create your prompts\n",
|
||||
"\n",
|
||||
"system_prompt = \"You are Hans and Franz, two personal trainers from the 1980s who spend more time ridiculing people than actually helping them. \\\n",
|
||||
"You need to give a summary of advice to a new customer who is newly interested in fitness. Be snarky and be sure to mention flab and girly men.\\\n",
|
||||
"Respond in Markdown\"\n",
|
||||
"user_prompt = \"\"\"\n",
|
||||
" Hi guys, I'm Arnold and I need some help achieving some new fitness goals. I live beer, pasta, doughnuts, and a good steak.\n",
|
||||
" I also like sitting on the couch and watching late night comedy shows\n",
|
||||
"\"\"\"\n",
|
||||
"\n",
|
||||
"# Step 2: Make the messages list\n",
|
||||
"\n",
|
||||
"messages = [\n",
|
||||
" { \"role\": \"system\", \"content\": system_prompt},\n",
|
||||
" { \"role\": \"user\", \"content\": user_prompt}\n",
|
||||
"] \n",
|
||||
"\n",
|
||||
"# Step 3: Call OpenAI\n",
|
||||
"\n",
|
||||
"raw_response = openai.chat.completions.create(\n",
|
||||
" model = \"gpt-4o-mini\",\n",
|
||||
" messages = messages\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# Step 4: print the result\n",
|
||||
"response = raw_response.choices[0].message.content\n",
|
||||
"display(Markdown(response))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "5004ed3a-dd29-4a56-a182-dc531452a88a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.12.3"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
245
week1/community-contributions/day1_narrate_football_game.ipynb
Normal file
245
week1/community-contributions/day1_narrate_football_game.ipynb
Normal file
@@ -0,0 +1,245 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "31d3c4a4-5442-4074-b812-42d60e0a0c04",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-04-26T11:54:29.195103Z",
|
||||
"start_time": "2025-04-26T11:54:29.192394Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# In this example we read a footbal (soccer) game stat and we create a narration about the game as we are running a podcast\n",
|
||||
"# use this website as an example: https://understat.com/match/27683"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "cf45e9d5-4913-416c-9880-5be60a96c0e6",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-04-26T11:54:30.218768Z",
|
||||
"start_time": "2025-04-26T11:54:30.215752Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"import requests\n",
|
||||
"from dotenv import load_dotenv\n",
|
||||
"from IPython.display import Markdown, display\n",
|
||||
"from bs4 import BeautifulSoup\n",
|
||||
"from openai import OpenAI"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "af8fea69-60aa-430c-a16c-8757b487e07a",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-04-26T11:54:31.218616Z",
|
||||
"start_time": "2025-04-26T11:54:31.214154Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"load_dotenv(override=True)\n",
|
||||
"api_key = os.getenv('OPENAI_API_KEY')\n",
|
||||
"\n",
|
||||
"# Check the key\n",
|
||||
"\n",
|
||||
"if not api_key:\n",
|
||||
" print(\"No API key was found - please head over to the troubleshooting notebook in this folder to identify & fix!\")\n",
|
||||
"elif not api_key.startswith(\"sk-proj-\"):\n",
|
||||
" print(\n",
|
||||
" \"An API key was found, but it doesn't start sk-proj-; please check you're using the right key - see troubleshooting notebook\")\n",
|
||||
"elif api_key.strip() != api_key:\n",
|
||||
" print(\n",
|
||||
" \"An API key was found, but it looks like it might have space or tab characters at the start or end - please remove them - see troubleshooting notebook\")\n",
|
||||
"else:\n",
|
||||
" print(\"API key found and looks good so far!\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "daee94d2-f82b-43f0-95d1-15370eda1bc7",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-04-26T11:54:32.216785Z",
|
||||
"start_time": "2025-04-26T11:54:32.183600Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"openai = OpenAI()\n",
|
||||
"url = \"https://understat.com/match/27683\"\n",
|
||||
"\n",
|
||||
"# If this doesn't work, try Kernel menu >> Restart Kernel and Clear Outputs Of All Cells, then run the cells from the top of this notebook down.\n",
|
||||
"# If it STILL doesn't work (horrors!) then please see the Troubleshooting notebook in this folder for full instructions"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "0712dd1d-b6bc-41c6-84ec-d965f696f7aa",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-04-26T11:54:33.025841Z",
|
||||
"start_time": "2025-04-26T11:54:33.023289Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"system_prompt = (\"You are a football (soccer) analyst. Yuo are used to read stats of football \\\n",
|
||||
" games and extract relevant information. You are asked to be a podcast host and \\\n",
|
||||
" you need to create a narration of the game based on the stats you read and based \\\n",
|
||||
" on the play by play moves (the one with minutes upfront). You're talking to the \\\n",
|
||||
" general audience so try to use a easy language and do not be too much telegraphic\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "70c972a6-8af6-4ff2-a338-6d7ba90e2045",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-04-26T11:54:33.730097Z",
|
||||
"start_time": "2025-04-26T11:54:33.725360Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Some websites need you to use proper headers when fetching them:\n",
|
||||
"headers = {\n",
|
||||
" \"User-Agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36\"\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"class Website:\n",
|
||||
" def __init__(self, url):\n",
|
||||
" \"\"\"\n",
|
||||
" Create this Website object from the given url using the BeautifulSoup library\n",
|
||||
" \"\"\"\n",
|
||||
" self.url = url\n",
|
||||
" response = requests.get(url, headers=headers)\n",
|
||||
" soup = BeautifulSoup(response.content, 'html.parser')\n",
|
||||
" self.title = soup.title.string if soup.title else \"No title found\"\n",
|
||||
" for irrelevant in soup.body([\"script\", \"style\", \"img\", \"input\"]):\n",
|
||||
" irrelevant.decompose()\n",
|
||||
" self.text = soup.body.get_text(separator=\"\\n\", strip=True)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "4ccc1ba81c76ffb9",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-04-26T11:54:40.042357Z",
|
||||
"start_time": "2025-04-26T11:54:40.040384Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def create_user_prompt(game):\n",
|
||||
" user_prompt = f\"You are looking at {game.title} football game\"\n",
|
||||
" user_prompt += \"\\nThis is the entire webpage of the game \\\n",
|
||||
" Please provide a narration of the game in markdown. \\\n",
|
||||
" Focus only on what happened on the game and the stats and ignore all the standings and anything else.\\n\\n\"\n",
|
||||
" user_prompt += game.text\n",
|
||||
" return user_prompt\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "e729956758b4d7b5",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-04-26T11:54:40.699042Z",
|
||||
"start_time": "2025-04-26T11:54:40.696698Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "82b71c1a-895a-48e7-a945-13e615bb0096",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-04-26T11:54:41.316244Z",
|
||||
"start_time": "2025-04-26T11:54:41.314110Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Define messages with system_prompt and user_prompt\n",
|
||||
"def messages_for(system_prompt_input, user_prompt_input):\n",
|
||||
" return [\n",
|
||||
" {\"role\": \"system\", \"content\": system_prompt_input},\n",
|
||||
" {\"role\": \"user\", \"content\": user_prompt_input}\n",
|
||||
" ]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "854dc42e-2bbd-493b-958f-c20484908300",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-04-26T11:54:55.239164Z",
|
||||
"start_time": "2025-04-26T11:54:41.987168Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# And now: call the OpenAI API.\n",
|
||||
"game = Website(url)\n",
|
||||
"\n",
|
||||
"response = openai.chat.completions.create(\n",
|
||||
" model=\"gpt-4o-mini\",\n",
|
||||
" messages=messages_for(system_prompt, create_user_prompt(game))\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# Response is provided in Markdown and displayed accordingly\n",
|
||||
"display(Markdown(response.choices[0].message.content))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "758d2cbe-0f80-4572-8724-7cba77f701dd",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -0,0 +1,499 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d15d8294-3328-4e07-ad16-8a03e9bbfdb9",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# YOUR FIRST LAB\n",
|
||||
"### Please read this section. This is valuable to get you prepared, even if it's a long read -- it's important stuff.\n",
|
||||
"\n",
|
||||
"## Your first Frontier LLM Project\n",
|
||||
"\n",
|
||||
"Let's build a useful LLM solution - in a matter of minutes.\n",
|
||||
"\n",
|
||||
"By the end of this course, you will have built an autonomous Agentic AI solution with 7 agents that collaborate to solve a business problem. All in good time! We will start with something smaller...\n",
|
||||
"\n",
|
||||
"Our goal is to code a new kind of Web Browser. Give it a URL, and it will respond with a summary. The Reader's Digest of the internet!!\n",
|
||||
"\n",
|
||||
"Before starting, you should have completed the setup for [PC](../SETUP-PC.md) or [Mac](../SETUP-mac.md) and you hopefully launched this jupyter lab from within the project root directory, with your environment activated.\n",
|
||||
"\n",
|
||||
"## If you're new to Jupyter Lab\n",
|
||||
"\n",
|
||||
"Welcome to the wonderful world of Data Science experimentation! Once you've used Jupyter Lab, you'll wonder how you ever lived without it. Simply click in each \"cell\" with code in it, such as the cell immediately below this text, and hit Shift+Return to execute that cell. As you wish, you can add a cell with the + button in the toolbar, and print values of variables, or try out variations. \n",
|
||||
"\n",
|
||||
"I've written a notebook called [Guide to Jupyter](Guide%20to%20Jupyter.ipynb) to help you get more familiar with Jupyter Labs, including adding Markdown comments, using `!` to run shell commands, and `tqdm` to show progress.\n",
|
||||
"\n",
|
||||
"## If you're new to the Command Line\n",
|
||||
"\n",
|
||||
"Please see these excellent guides: [Command line on PC](https://chatgpt.com/share/67b0acea-ba38-8012-9c34-7a2541052665) and [Command line on Mac](https://chatgpt.com/canvas/shared/67b0b10c93a081918210723867525d2b). \n",
|
||||
"\n",
|
||||
"## If you'd prefer to work in IDEs\n",
|
||||
"\n",
|
||||
"If you're more comfortable in IDEs like VSCode or Pycharm, they both work great with these lab notebooks too. \n",
|
||||
"If you'd prefer to work in VSCode, [here](https://chatgpt.com/share/676f2e19-c228-8012-9911-6ca42f8ed766) are instructions from an AI friend on how to configure it for the course.\n",
|
||||
"\n",
|
||||
"## If you'd like to brush up your Python\n",
|
||||
"\n",
|
||||
"I've added a notebook called [Intermediate Python](Intermediate%20Python.ipynb) to get you up to speed. But you should give it a miss if you already have a good idea what this code does: \n",
|
||||
"`yield from {book.get(\"author\") for book in books if book.get(\"author\")}`\n",
|
||||
"\n",
|
||||
"## I am here to help\n",
|
||||
"\n",
|
||||
"If you have any problems at all, please do reach out. \n",
|
||||
"I'm available through the platform, or at ed@edwarddonner.com, or at https://www.linkedin.com/in/eddonner/ if you'd like to connect (and I love connecting!) \n",
|
||||
"And this is new to me, but I'm also trying out X/Twitter at [@edwarddonner](https://x.com/edwarddonner) - if you're on X, please show me how it's done 😂 \n",
|
||||
"\n",
|
||||
"## More troubleshooting\n",
|
||||
"\n",
|
||||
"Please see the [troubleshooting](troubleshooting.ipynb) notebook in this folder to diagnose and fix common problems. At the very end of it is a diagnostics script with some useful debug info.\n",
|
||||
"\n",
|
||||
"## If this is old hat!\n",
|
||||
"\n",
|
||||
"If you're already comfortable with today's material, please hang in there; you can move swiftly through the first few labs - we will get much more in depth as the weeks progress.\n",
|
||||
"\n",
|
||||
"<table style=\"margin: 0; text-align: left;\">\n",
|
||||
" <tr>\n",
|
||||
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
||||
" <img src=\"../important.jpg\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
||||
" </td>\n",
|
||||
" <td>\n",
|
||||
" <h2 style=\"color:#900;\">Please read - important note</h2>\n",
|
||||
" <span style=\"color:#900;\">The way I collaborate with you may be different to other courses you've taken. I prefer not to type code while you watch. Rather, I execute Jupyter Labs, like this, and give you an intuition for what's going on. My suggestion is that you carefully execute this yourself, <b>after</b> watching the lecture. Add print statements to understand what's going on, and then come up with your own variations. If you have a Github account, use this to showcase your variations. Not only is this essential practice, but it demonstrates your skills to others, including perhaps future clients or employers...</span>\n",
|
||||
" </td>\n",
|
||||
" </tr>\n",
|
||||
"</table>\n",
|
||||
"<table style=\"margin: 0; text-align: left;\">\n",
|
||||
" <tr>\n",
|
||||
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
||||
" <img src=\"../resources.jpg\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
||||
" </td>\n",
|
||||
" <td>\n",
|
||||
" <h2 style=\"color:#f71;\">Treat these labs as a resource</h2>\n",
|
||||
" <span style=\"color:#f71;\">I push updates to the code regularly. When people ask questions or have problems, I incorporate it in the code, adding more examples or improved commentary. As a result, you'll notice that the code below isn't identical to the videos. Everything from the videos is here; but in addition, I've added more steps and better explanations, and occasionally added new models like DeepSeek. Consider this like an interactive book that accompanies the lectures.\n",
|
||||
" </span>\n",
|
||||
" </td>\n",
|
||||
" </tr>\n",
|
||||
"</table>\n",
|
||||
"<table style=\"margin: 0; text-align: left;\">\n",
|
||||
" <tr>\n",
|
||||
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
||||
" <img src=\"../business.jpg\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
||||
" </td>\n",
|
||||
" <td>\n",
|
||||
" <h2 style=\"color:#181;\">Business value of these exercises</h2>\n",
|
||||
" <span style=\"color:#181;\">A final thought. While I've designed these notebooks to be educational, I've also tried to make them enjoyable. We'll do fun things like have LLMs tell jokes and argue with each other. But fundamentally, my goal is to teach skills you can apply in business. I'll explain business implications as we go, and it's worth keeping this in mind: as you build experience with models and techniques, think of ways you could put this into action at work today. Please do contact me if you'd like to discuss more or if you have ideas to bounce off me.</span>\n",
|
||||
" </td>\n",
|
||||
" </tr>\n",
|
||||
"</table>"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "4e2a9393-7767-488e-a8bf-27c12dca35bd",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# imports\n",
|
||||
"\n",
|
||||
"import os\n",
|
||||
"import requests\n",
|
||||
"from dotenv import load_dotenv\n",
|
||||
"from bs4 import BeautifulSoup\n",
|
||||
"from IPython.display import Markdown, display\n",
|
||||
"from openai import OpenAI\n",
|
||||
"\n",
|
||||
"# If you get an error running this cell, then please head over to the troubleshooting notebook!"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "6900b2a8-6384-4316-8aaa-5e519fca4254",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Connecting to OpenAI (or Ollama)\n",
|
||||
"\n",
|
||||
"The next cell is where we load in the environment variables in your `.env` file and connect to OpenAI. \n",
|
||||
"\n",
|
||||
"If you'd like to use free Ollama instead, please see the README section \"Free Alternative to Paid APIs\", and if you're not sure how to do this, there's a full solution in the solutions folder (day1_with_ollama.ipynb).\n",
|
||||
"\n",
|
||||
"## Troubleshooting if you have problems:\n",
|
||||
"\n",
|
||||
"Head over to the [troubleshooting](troubleshooting.ipynb) notebook in this folder for step by step code to identify the root cause and fix it!\n",
|
||||
"\n",
|
||||
"If you make a change, try restarting the \"Kernel\" (the python process sitting behind this notebook) by Kernel menu >> Restart Kernel and Clear Outputs of All Cells. Then try this notebook again, starting at the top.\n",
|
||||
"\n",
|
||||
"Or, contact me! Message me or email ed@edwarddonner.com and we will get this to work.\n",
|
||||
"\n",
|
||||
"Any concerns about API costs? See my notes in the README - costs should be minimal, and you can control it at every point. You can also use Ollama as a free alternative, which we discuss during Day 2."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "7b87cadb-d513-4303-baee-a37b6f938e4d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Load environment variables in a file called .env\n",
|
||||
"\n",
|
||||
"load_dotenv(override=True)\n",
|
||||
"api_key = os.getenv('OPENAI_API_KEY')\n",
|
||||
"\n",
|
||||
"# Check the key\n",
|
||||
"\n",
|
||||
"if not api_key:\n",
|
||||
" print(\"No API key was found - please head over to the troubleshooting notebook in this folder to identify & fix!\")\n",
|
||||
"elif not api_key.startswith(\"sk-proj-\"):\n",
|
||||
" print(\"An API key was found, but it doesn't start sk-proj-; please check you're using the right key - see troubleshooting notebook\")\n",
|
||||
"elif api_key.strip() != api_key:\n",
|
||||
" print(\"An API key was found, but it looks like it might have space or tab characters at the start or end - please remove them - see troubleshooting notebook\")\n",
|
||||
"else:\n",
|
||||
" print(\"API key found and looks good so far!\")\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "019974d9-f3ad-4a8a-b5f9-0a3719aea2d3",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"openai = OpenAI(base_url=\"http://localhost:11434/v1\", api_key=\"ollama\")\n",
|
||||
"\n",
|
||||
"# If this doesn't work, try Kernel menu >> Restart Kernel and Clear Outputs Of All Cells, then run the cells from the top of this notebook down.\n",
|
||||
"# If it STILL doesn't work (horrors!) then please see the Troubleshooting notebook in this folder for full instructions"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "442fc84b-0815-4f40-99ab-d9a5da6bda91",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Let's make a quick call to a Frontier model to get started, as a preview!"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c951be1a-7f1b-448f-af1f-845978e47e2c",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<table style=\"margin: 0; text-align: left;\">\n",
|
||||
" <tr>\n",
|
||||
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
||||
" <img src=\"../business.jpg\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
||||
" </td>\n",
|
||||
" <td>\n",
|
||||
" <h2 style=\"color:#181;\">Business applications</h2>\n",
|
||||
" <span style=\"color:#181;\">In this exercise, you experienced calling the Cloud API of a Frontier Model (a leading model at the frontier of AI) for the first time. We will be using APIs like OpenAI at many stages in the course, in addition to building our own LLMs.\n",
|
||||
"\n",
|
||||
"More specifically, we've applied this to Summarization - a classic Gen AI use case to make a summary. This can be applied to any business vertical - summarizing the news, summarizing financial performance, summarizing a resume in a cover letter - the applications are limitless. Consider how you could apply Summarization in your business, and try prototyping a solution.</span>\n",
|
||||
" </td>\n",
|
||||
" </tr>\n",
|
||||
"</table>\n",
|
||||
"\n",
|
||||
"<table style=\"margin: 0; text-align: left;\">\n",
|
||||
" <tr>\n",
|
||||
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
||||
" <img src=\"../important.jpg\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
||||
" </td>\n",
|
||||
" <td>\n",
|
||||
" <h2 style=\"color:#900;\">Before you continue - now try yourself</h2>\n",
|
||||
" <span style=\"color:#900;\">Use the cell below to make your own simple commercial example. Stick with the summarization use case for now. Here's an idea: write something that will take the contents of an email, and will suggest an appropriate short subject line for the email. That's the kind of feature that might be built into a commercial email tool.</span>\n",
|
||||
" </td>\n",
|
||||
" </tr>\n",
|
||||
"</table>"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "00743dac-0e70-45b7-879a-d7293a6f68a6",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Step 1: Create your prompts\n",
|
||||
"\n",
|
||||
"system_prompt = \"You're an AI assistant who suggests subject line for the given email content \\\n",
|
||||
" by ignoring greetings, sign-offs, and other irrelevant text. You suggest 5 best subject lines, starting with best fitting\" \\\n",
|
||||
"\"\"\n",
|
||||
"user_prompt = \"\"\"\n",
|
||||
" Suggest 3 subject lines for the given email content in markdown. \\\n",
|
||||
" Give the fit percentage of each subject line as well. \\\n",
|
||||
" Give tone of the mail, action items, purpose of the mail.\\n\\n\"\n",
|
||||
"\"\"\"\n",
|
||||
"\n",
|
||||
"# Step 2: Make the messages list\n",
|
||||
"\n",
|
||||
"messages = [\"\"\"Dear Sir/Madam,\n",
|
||||
"\n",
|
||||
"I am Ankit Kumari, currently pursuing my Online MCA from Lovely Professional University. I am writing this email to express my concern regarding the scheduling of the online classes for the current semester.\n",
|
||||
"\n",
|
||||
"During the time of admission, it was conveyed to us that the online classes for the program would be conducted on weekends to ensure that working professionals like me can easily manage their work and studies. However, to my surprise, the classes for this semester have been scheduled on weekdays, which is not convenient for students who are working or have businesses.\n",
|
||||
"\n",
|
||||
"As a working professional, I find it difficult to balance my job responsibilities and attend the classes regularly on weekdays. Similarly, there are many students who are facing a similar issue. Therefore, I would like to request you to kindly reschedule the classes and conduct them on weekends as was initially promised during the admission process.\n",
|
||||
"\n",
|
||||
"I believe that conducting the classes on weekends would help students like me to balance their work and studies in a better way, and would also result in better attendance and improved learning outcomes.\n",
|
||||
"\n",
|
||||
"I hope that my request would be taken into consideration, and appropriate steps would be taken to ensure that the classes are conducted on weekends as promised during the admission process.\n",
|
||||
"\n",
|
||||
"Thank you for your understanding.\n",
|
||||
"\n",
|
||||
"Sincerely,\n",
|
||||
"\n",
|
||||
"Ankit Kumar \"\"\",\n",
|
||||
"\"\"\"Hi team,\n",
|
||||
"It is to inform you that i've studied computer science in my graduation i.e. bsc physical science eoth computer science, but still i'm seeing bridge courses i.e. ecap010 and acap011 in my timetable.\n",
|
||||
"Therefore, I knidly request you to look into this matter.\n",
|
||||
"\n",
|
||||
"Best Regards\n",
|
||||
"Ankit Kumar\n",
|
||||
"\"\"\",] # fill this in\n",
|
||||
"\n",
|
||||
"# Step 3: Call OpenAI\n",
|
||||
"\n",
|
||||
"responses = [openai.chat.completions.create(\n",
|
||||
" model=\"llama3.2\",\n",
|
||||
" messages=[\n",
|
||||
" {\"role\": \"system\", \"content\": system_prompt},\n",
|
||||
" {\"role\": \"user\", \"content\": user_prompt+message},\n",
|
||||
" ]\n",
|
||||
") for message in messages\n",
|
||||
"]\n",
|
||||
"# Step 4: print the result\n",
|
||||
"responses = [response.choices[0].message.content for response in responses]\n",
|
||||
"for response in responses:\n",
|
||||
" display(Markdown(response))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "36ed9f14-b349-40e9-a42c-b367e77f8bda",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## An extra exercise for those who enjoy web scraping\n",
|
||||
"\n",
|
||||
"You may notice that if you try `display_summary(\"https://openai.com\")` - it doesn't work! That's because OpenAI has a fancy website that uses Javascript. There are many ways around this that some of you might be familiar with. For example, Selenium is a hugely popular framework that runs a browser behind the scenes, renders the page, and allows you to query it. If you have experience with Selenium, Playwright or similar, then feel free to improve the Website class to use them. In the community-contributions folder, you'll find an example Selenium solution from a student (thank you!)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "eeab24dc-5f90-4570-b542-b0585aca3eb6",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Sharing your code\n",
|
||||
"\n",
|
||||
"I'd love it if you share your code afterwards so I can share it with others! You'll notice that some students have already made changes (including a Selenium implementation) which you will find in the community-contributions folder. If you'd like add your changes to that folder, submit a Pull Request with your new versions in that folder and I'll merge your changes.\n",
|
||||
"\n",
|
||||
"If you're not an expert with git (and I am not!) then GPT has given some nice instructions on how to submit a Pull Request. It's a bit of an involved process, but once you've done it once it's pretty clear. As a pro-tip: it's best if you clear the outputs of your Jupyter notebooks (Edit >> Clean outputs of all cells, and then Save) for clean notebooks.\n",
|
||||
"\n",
|
||||
"Here are good instructions courtesy of an AI friend: \n",
|
||||
"https://chatgpt.com/share/677a9cb5-c64c-8012-99e0-e06e88afd293"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "175ca116",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from selenium import webdriver\n",
|
||||
"from selenium.webdriver.chrome.options import Options\n",
|
||||
"from selenium.webdriver.chrome.service import Service\n",
|
||||
"from selenium.webdriver.support.ui import WebDriverWait\n",
|
||||
"from selenium.webdriver.support import expected_conditions as EC\n",
|
||||
"from selenium.webdriver.common.by import By\n",
|
||||
"from bs4 import BeautifulSoup\n",
|
||||
"from openai import OpenAI\n",
|
||||
"from IPython.display import Markdown, display\n",
|
||||
"import platform\n",
|
||||
"\n",
|
||||
"class JSWebsite:\n",
|
||||
" def __init__(self, url, model=\"llama3.2\", headless=True, wait_time=5):\n",
|
||||
" \"\"\"\n",
|
||||
" @Param url: The URL of the website to scrape\n",
|
||||
" @Param model: The model to use for summarization. Valid values are \"gpt-4o-mini\" and \"llama3.2\"\n",
|
||||
" @Param headless: Whether to run the browser in headless mode\n",
|
||||
" @Param wait_time: Additional seconds to wait for JavaScript content to load\n",
|
||||
" \"\"\"\n",
|
||||
" self.url = url\n",
|
||||
" self.model = model\n",
|
||||
" self.wait_time = wait_time\n",
|
||||
" \n",
|
||||
" # Validate model choice\n",
|
||||
" assert model in [\"gpt-4o-mini\", \"llama3.2\"], f\"Invalid model: {model}. Valid models are 'gpt-4o-mini' and 'llama3.2'.\"\n",
|
||||
" \n",
|
||||
" # Initialize appropriate API client\n",
|
||||
" if \"gpt\" in model:\n",
|
||||
" self.openai = OpenAI()\n",
|
||||
" elif \"llama\" in model:\n",
|
||||
" self.openai = OpenAI(base_url=\"http://localhost:11434/v1\", api_key=\"ollama\")\n",
|
||||
" \n",
|
||||
" # Set up Chrome options with platform-specific settings\n",
|
||||
" options = Options()\n",
|
||||
" \n",
|
||||
" if headless:\n",
|
||||
" # Use appropriate headless setting based on platform\n",
|
||||
" if platform.system() == \"Darwin\": # macOS\n",
|
||||
" options.add_argument(\"--headless=chrome\") # macOS-friendly headless mode\n",
|
||||
" else:\n",
|
||||
" options.add_argument(\"--headless=new\") # Modern headless for other platforms\n",
|
||||
" \n",
|
||||
" # These settings help with headless JavaScript rendering\n",
|
||||
" options.add_argument(\"--disable-web-security\")\n",
|
||||
" options.add_argument(\"--allow-running-insecure-content\")\n",
|
||||
" options.add_argument(\"--disable-setuid-sandbox\")\n",
|
||||
" \n",
|
||||
" # Add a user agent to look more like a real browser\n",
|
||||
" # options.add_argument(\"--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36\")\n",
|
||||
" # options.add_argument(\"--user-agent=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.7103.49 Safari/537.36\")\n",
|
||||
" options.add_argument(\"--user-agent=Mozilla/5.0 (Macintosh; Apple Silicon Mac OS X 14_3_1) AppleWebKit/605.1.15 (KHTML, like Gecko) Chrome/136.0.7103.49 Safari/537.36\")\n",
|
||||
"\n",
|
||||
"\n",
|
||||
" \n",
|
||||
" options.add_argument(\"--disable-gpu\")\n",
|
||||
" options.add_argument(\"--window-size=1920,1080\")\n",
|
||||
" options.add_argument(\"--disable-blink-features=AutomationControlled\")\n",
|
||||
" options.add_argument(\"--disable-infobars\")\n",
|
||||
" options.add_argument(\"--disable-extensions\")\n",
|
||||
" options.add_argument(\"--start-maximized\")\n",
|
||||
" options.add_argument(\"--no-sandbox\")\n",
|
||||
" options.add_argument(\"--disable-dev-shm-usage\")\n",
|
||||
" \n",
|
||||
" try:\n",
|
||||
" # Initialize Chrome driver\n",
|
||||
" driver = webdriver.Chrome(options=options)\n",
|
||||
" driver.get(url)\n",
|
||||
" \n",
|
||||
" # Wait for the page to load\n",
|
||||
" WebDriverWait(driver, 10).until(\n",
|
||||
" EC.presence_of_element_located((By.TAG_NAME, \"body\"))\n",
|
||||
" )\n",
|
||||
" \n",
|
||||
" # Get the page source and close the browser\n",
|
||||
" html = driver.page_source\n",
|
||||
" driver.quit()\n",
|
||||
" \n",
|
||||
" # Parse HTML with BeautifulSoup\n",
|
||||
" soup = BeautifulSoup(html, 'html.parser')\n",
|
||||
" self.title = soup.title.string if soup.title else \"No title found\"\n",
|
||||
" \n",
|
||||
" # Remove irrelevant elements\n",
|
||||
" if soup.body:\n",
|
||||
" for irrelevant in soup.body([\"script\", \"style\", \"img\", \"input\"]):\n",
|
||||
" irrelevant.decompose()\n",
|
||||
" self.text = soup.body.get_text(separator=\"\\n\", strip=True)\n",
|
||||
" # Check if content is too short, which might indicate loading issues\n",
|
||||
" if len(self.text.strip()) < 100:\n",
|
||||
" self.has_content_error = True\n",
|
||||
" print(\"Warning: Page content seems too short or empty\")\n",
|
||||
" else:\n",
|
||||
" self.has_content_error = False\n",
|
||||
" else:\n",
|
||||
" self.text = \"No body content found\"\n",
|
||||
" self.has_content_error = True\n",
|
||||
" \n",
|
||||
" except Exception as e:\n",
|
||||
" print(f\"Error processing page: {e}\")\n",
|
||||
" self.title = \"Error loading page\"\n",
|
||||
" self.text = f\"Failed to process page: {str(e)}\"\n",
|
||||
" self.has_content_error = True\n",
|
||||
"\n",
|
||||
" def summarize(self):\n",
|
||||
" \"\"\"Generate a summary of the website content using the specified AI model.\"\"\"\n",
|
||||
" # Check if page was loaded with errors\n",
|
||||
" if hasattr(self, 'has_content_error') and self.has_content_error:\n",
|
||||
" self.summary = \"Cannot summarize due to page loading or content errors.\"\n",
|
||||
" return self.summary\n",
|
||||
" \n",
|
||||
" try:\n",
|
||||
" response = self.openai.chat.completions.create(\n",
|
||||
" model=self.model,\n",
|
||||
" messages=self.messages_for()\n",
|
||||
" )\n",
|
||||
" self.summary = response.choices[0].message.content\n",
|
||||
" return self.summary\n",
|
||||
" except Exception as e:\n",
|
||||
" self.summary = f\"Error generating summary: {str(e)}\"\n",
|
||||
" return self.summary\n",
|
||||
"\n",
|
||||
" def messages_for(self):\n",
|
||||
" \"\"\"Create the message structure for the AI model.\"\"\"\n",
|
||||
" self.system_prompt = (\n",
|
||||
" \"You are an assistant that analyzes the contents of a website \"\n",
|
||||
" \"and provides a short summary, ignoring text that might be navigation related. \"\n",
|
||||
" \"Respond in markdown.\"\n",
|
||||
" )\n",
|
||||
" return [\n",
|
||||
" {\"role\": \"system\", \"content\": self.system_prompt},\n",
|
||||
" {\"role\": \"user\", \"content\": self.user_prompt_for()}\n",
|
||||
" ]\n",
|
||||
"\n",
|
||||
" def display_summary(self):\n",
|
||||
" \"\"\"Display the summary in markdown format.\"\"\"\n",
|
||||
" if hasattr(self, 'summary'):\n",
|
||||
" display(Markdown(self.summary))\n",
|
||||
" else:\n",
|
||||
" print(\"Please run the summarize() method first.\")\n",
|
||||
"\n",
|
||||
" def user_prompt_for(self):\n",
|
||||
" \"\"\"Create the user prompt for the AI model.\"\"\"\n",
|
||||
" user_prompt = f\"You are looking at a website titled {self.title}\\n\"\n",
|
||||
" user_prompt += (\n",
|
||||
" \"The contents of this website is as follows; \"\n",
|
||||
" \"please provide a short summary of this website in markdown. \"\n",
|
||||
" \"If it includes news or announcements, then summarize these too.\\n\\n\"\n",
|
||||
" )\n",
|
||||
" user_prompt += self.text\n",
|
||||
" return user_prompt\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# Example usage\n",
|
||||
"if __name__ == \"__main__\":\n",
|
||||
" # Site to test\n",
|
||||
" site = JSWebsite(\"https://openai.com\", model=\"llama3.2\", headless=True, wait_time=15)\n",
|
||||
" \n",
|
||||
" # Only attempt to summarize if there were no content errors\n",
|
||||
" summary = site.summarize()\n",
|
||||
" \n",
|
||||
" # Display results\n",
|
||||
" if hasattr(site, 'has_content_error') and site.has_content_error:\n",
|
||||
" print(\"Skipped summarization due to page loading or content errors.\")\n",
|
||||
" print(\"Try with headless=False to see what's happening in the browser.\")\n",
|
||||
" else:\n",
|
||||
" site.display_summary()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "102d19b6",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "llms",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
218
week1/community-contributions/day2_narrate_football_game.ipynb
Normal file
218
week1/community-contributions/day2_narrate_football_game.ipynb
Normal file
@@ -0,0 +1,218 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "31d3c4a4-5442-4074-b812-42d60e0a0c04",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-04-26T11:54:29.195103Z",
|
||||
"start_time": "2025-04-26T11:54:29.192394Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# In this example we read a footbal (soccer) game stat and we create a narration about the game as we are running a podcast\n",
|
||||
"# use this website as an example: https://understat.com/match/27683"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "cf45e9d5-4913-416c-9880-5be60a96c0e6",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-04-26T11:54:30.218768Z",
|
||||
"start_time": "2025-04-26T11:54:30.215752Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"import requests\n",
|
||||
"from dotenv import load_dotenv\n",
|
||||
"from IPython.display import Markdown, display\n",
|
||||
"from bs4 import BeautifulSoup\n",
|
||||
"import ollama"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "af8fea69-60aa-430c-a16c-8757b487e07a",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-04-26T11:54:31.218616Z",
|
||||
"start_time": "2025-04-26T11:54:31.214154Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"load_dotenv(override=True)\n",
|
||||
"api_key = os.getenv('OPENAI_API_KEY')\n",
|
||||
"\n",
|
||||
"# Check the key\n",
|
||||
"\n",
|
||||
"if not api_key:\n",
|
||||
" print(\"No API key was found - please head over to the troubleshooting notebook in this folder to identify & fix!\")\n",
|
||||
"elif not api_key.startswith(\"sk-proj-\"):\n",
|
||||
" print(\n",
|
||||
" \"An API key was found, but it doesn't start sk-proj-; please check you're using the right key - see troubleshooting notebook\")\n",
|
||||
"elif api_key.strip() != api_key:\n",
|
||||
" print(\n",
|
||||
" \"An API key was found, but it looks like it might have space or tab characters at the start or end - please remove them - see troubleshooting notebook\")\n",
|
||||
"else:\n",
|
||||
" print(\"API key found and looks good so far!\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "daee94d2-f82b-43f0-95d1-15370eda1bc7",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-04-26T11:54:32.216785Z",
|
||||
"start_time": "2025-04-26T11:54:32.183600Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"url = \"https://understat.com/match/27683\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "0712dd1d-b6bc-41c6-84ec-d965f696f7aa",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-04-26T11:54:33.025841Z",
|
||||
"start_time": "2025-04-26T11:54:33.023289Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"system_prompt = (\"You are a football (soccer) analyst. Yuo are used to read stats of football \\\n",
|
||||
" games and extract relevant information. You are asked to be a podcast host and \\\n",
|
||||
" you need to create a narration of the game based on the stats you read and based \\\n",
|
||||
" on the play by play moves (the one with minutes upfront). You're talking to the \\\n",
|
||||
" general audience so try to use a easy language and do not be too much telegraphic\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "70c972a6-8af6-4ff2-a338-6d7ba90e2045",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-04-26T11:54:33.730097Z",
|
||||
"start_time": "2025-04-26T11:54:33.725360Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Some websites need you to use proper headers when fetching them:\n",
|
||||
"headers = {\n",
|
||||
" \"User-Agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36\"\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"class Website:\n",
|
||||
" def __init__(self, url):\n",
|
||||
" \"\"\"\n",
|
||||
" Create this Website object from the given url using the BeautifulSoup library\n",
|
||||
" \"\"\"\n",
|
||||
" self.url = url\n",
|
||||
" response = requests.get(url, headers=headers)\n",
|
||||
" soup = BeautifulSoup(response.content, 'html.parser')\n",
|
||||
" self.title = soup.title.string if soup.title else \"No title found\"\n",
|
||||
" for irrelevant in soup.body([\"script\", \"style\", \"img\", \"input\"]):\n",
|
||||
" irrelevant.decompose()\n",
|
||||
" self.text = soup.body.get_text(separator=\"\\n\", strip=True)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "4ccc1ba81c76ffb9",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-04-26T11:54:40.042357Z",
|
||||
"start_time": "2025-04-26T11:54:40.040384Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def create_user_prompt(game):\n",
|
||||
" user_prompt = f\"You are looking at {game.title} football game\"\n",
|
||||
" user_prompt += \"\\nThis is the entire webpage of the game \\\n",
|
||||
" Please provide a narration of the game in markdown. \\\n",
|
||||
" Focus only on what happened on the game and the stats and ignore all the standings and anything else.\\n\\n\"\n",
|
||||
" user_prompt += game.text\n",
|
||||
" return user_prompt\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "82b71c1a-895a-48e7-a945-13e615bb0096",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-04-26T11:54:41.316244Z",
|
||||
"start_time": "2025-04-26T11:54:41.314110Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Define messages with system_prompt and user_prompt\n",
|
||||
"def messages_for(system_prompt_input, user_prompt_input):\n",
|
||||
" return [\n",
|
||||
" {\"role\": \"system\", \"content\": system_prompt_input},\n",
|
||||
" {\"role\": \"user\", \"content\": user_prompt_input}\n",
|
||||
" ]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "854dc42e-2bbd-493b-958f-c20484908300",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-04-26T11:54:55.239164Z",
|
||||
"start_time": "2025-04-26T11:54:41.987168Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# And now: call the OpenAI API.\n",
|
||||
"game = Website(url)\n",
|
||||
"\n",
|
||||
"response = ollama.chat(model=\"llama3.2\", messages=messages_for(system_prompt, create_user_prompt(game)))\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# Response is provided in Markdown and displayed accordingly\n",
|
||||
"display(Markdown(response['message']['content']))"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -0,0 +1,433 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "4e2a9393-7767-488e-a8bf-27c12dca35bd",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# imports\n",
|
||||
"\n",
|
||||
"import requests\n",
|
||||
"from bs4 import BeautifulSoup\n",
|
||||
"from IPython.display import Markdown, display"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "29ddd15d-a3c5-4f4e-a678-873f56162724",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Constants\n",
|
||||
"\n",
|
||||
"OLLAMA_API = \"http://localhost:11434/api/chat\"\n",
|
||||
"HEADERS = {\"Content-Type\": \"application/json\"}\n",
|
||||
"MODEL = \"llama3.2\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "dac0a679-599c-441f-9bf2-ddc73d35b940",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Create a messages list using the same format that we used for OpenAI\n",
|
||||
"\n",
|
||||
"messages = [\n",
|
||||
" {\"role\": \"user\", \"content\": \"Describe some of the business applications of Generative AI\"}\n",
|
||||
"]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "7bb9c624-14f0-4945-a719-8ddb64f66f47",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"payload = {\n",
|
||||
" \"model\": MODEL,\n",
|
||||
" \"messages\": messages,\n",
|
||||
" \"stream\": False\n",
|
||||
" }"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "479ff514-e8bd-4985-a572-2ea28bb4fa40",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\u001b[?2026h\u001b[?25l\u001b[1Gpulling manifest ⠋ \u001b[K\u001b[?25h\u001b[?2026l\u001b[?2026h\u001b[?25l\u001b[1Gpulling manifest ⠙ \u001b[K\u001b[?25h\u001b[?2026l\u001b[?2026h\u001b[?25l\u001b[1Gpulling manifest ⠹ \u001b[K\u001b[?25h\u001b[?2026l\u001b[?2026h\u001b[?25l\u001b[1Gpulling manifest ⠸ \u001b[K\u001b[?25h\u001b[?2026l\u001b[?2026h\u001b[?25l\u001b[1Gpulling manifest ⠼ \u001b[K\u001b[?25h\u001b[?2026l\u001b[?2026h\u001b[?25l\u001b[1Gpulling manifest ⠴ \u001b[K\u001b[?25h\u001b[?2026l\u001b[?2026h\u001b[?25l\u001b[1Gpulling manifest ⠦ \u001b[K\u001b[?25h\u001b[?2026l\u001b[?2026h\u001b[?25l\u001b[1Gpulling manifest ⠧ \u001b[K\u001b[?25h\u001b[?2026l\u001b[?2026h\u001b[?25l\u001b[1Gpulling manifest ⠇ \u001b[K\u001b[?25h\u001b[?2026l\u001b[?2026h\u001b[?25l\u001b[1Gpulling manifest ⠏ \u001b[K\u001b[?25h\u001b[?2026l\u001b[?2026h\u001b[?25l\u001b[1Gpulling manifest ⠋ \u001b[K\u001b[?25h\u001b[?2026l\u001b[?2026h\u001b[?25l\u001b[1Gpulling manifest ⠙ \u001b[K\u001b[?25h\u001b[?2026l\u001b[?2026h\u001b[?25l\u001b[1Gpulling manifest ⠹ \u001b[K\u001b[?25h\u001b[?2026l\u001b[?2026h\u001b[?25l\u001b[1Gpulling manifest ⠸ \u001b[K\u001b[?25h\u001b[?2026l\u001b[?2026h\u001b[?25l\u001b[1Gpulling manifest ⠼ \u001b[K\u001b[?25h\u001b[?2026l\u001b[?2026h\u001b[?25l\u001b[1Gpulling manifest \u001b[K\n",
|
||||
"pulling dde5aa3fc5ff: 100% ▕██████████████████▏ 2.0 GB \u001b[K\n",
|
||||
"pulling 966de95ca8a6: 100% ▕██████████████████▏ 1.4 KB \u001b[K\n",
|
||||
"pulling fcc5a6bec9da: 100% ▕██████████████████▏ 7.7 KB \u001b[K\n",
|
||||
"pulling a70ff7e570d9: 100% ▕██████████████████▏ 6.0 KB \u001b[K\n",
|
||||
"pulling 56bb8bd477a5: 100% ▕██████████████████▏ 96 B \u001b[K\n",
|
||||
"pulling 34bb5ab01051: 100% ▕██████████████████▏ 561 B \u001b[K\n",
|
||||
"verifying sha256 digest \u001b[K\n",
|
||||
"writing manifest \u001b[K\n",
|
||||
"success \u001b[K\u001b[?25h\u001b[?2026l\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Let's just make sure the model is loaded\n",
|
||||
"\n",
|
||||
"!ollama pull llama3.2"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "42b9f644-522d-4e05-a691-56e7658c0ea9",
|
||||
"metadata": {
|
||||
"scrolled": true
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Generative AI has numerous business applications across various industries, including:\n",
|
||||
"\n",
|
||||
"1. **Content Creation**: Generative AI can be used to create high-quality content such as articles, social media posts, product descriptions, and more. This helps businesses save time and resources on content creation while maintaining consistency and quality.\n",
|
||||
"2. **Marketing Automation**: Generative AI can generate personalized marketing materials, such as email templates, ad copy, and product descriptions, based on customer data and behavior.\n",
|
||||
"3. **Customer Service Chatbots**: Generative AI-powered chatbots can provide 24/7 support to customers, answering common questions and routing complex issues to human representatives.\n",
|
||||
"4. **Product Design**: Generative AI can help designers create new products, such as 3D models, prototypes, and even entire product lines, using machine learning algorithms to optimize design parameters.\n",
|
||||
"5. **Virtual Assistants**: Generative AI-powered virtual assistants can be integrated into businesses' IT systems to automate tasks, provide personalized recommendations, and offer customer support.\n",
|
||||
"6. **Data Analysis**: Generative AI can help analyze large datasets, identify patterns, and make predictions about future trends and outcomes.\n",
|
||||
"7. **Supply Chain Optimization**: Generative AI can optimize supply chain operations by predicting demand, managing inventory, and optimizing logistics.\n",
|
||||
"8. **Sales Forecasting**: Generative AI can analyze historical sales data, market trends, and external factors to predict future sales performance and identify areas for improvement.\n",
|
||||
"9. **Creative Writing**: Generative AI can be used to generate creative content such as poetry, music, or even entire scripts for films and TV shows.\n",
|
||||
"10. **Music Generation**: Generative AI can create original music tracks, beats, or melodies based on user input or style preferences.\n",
|
||||
"11. **Image and Video Generation**: Generative AI can create realistic images and videos that can be used in various applications such as advertising, product photography, or even entertainment.\n",
|
||||
"12. **Language Translation**: Generative AI-powered language translation tools can help businesses communicate with customers and clients who speak different languages.\n",
|
||||
"\n",
|
||||
"Some notable companies that are leveraging Generative AI for business applications include:\n",
|
||||
"\n",
|
||||
"* Google (Google DeepMind)\n",
|
||||
"* Amazon (Amazon SageMaker)\n",
|
||||
"* Microsoft (Microsoft Azure Machine Learning)\n",
|
||||
"* IBM (IBM Watson Studio)\n",
|
||||
"* Salesforce (Salesforce Einstein)\n",
|
||||
"\n",
|
||||
"These applications of Generative AI can help businesses gain a competitive edge, improve efficiency, and enhance customer experiences.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# If this doesn't work for any reason, try the 2 versions in the following cells\n",
|
||||
"# And double check the instructions in the 'Recap on installation of Ollama' at the top of this lab\n",
|
||||
"# And if none of that works - contact me!\n",
|
||||
"\n",
|
||||
"response = requests.post(OLLAMA_API, json=payload, headers=HEADERS)\n",
|
||||
"print(response.json()['message']['content'])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "6a021f13-d6a1-4b96-8e18-4eae49d876fe",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Introducing the ollama package\n",
|
||||
"\n",
|
||||
"And now we'll do the same thing, but using the elegant ollama python package instead of a direct HTTP call.\n",
|
||||
"\n",
|
||||
"Under the hood, it's making the same call as above to the ollama server running at localhost:11434"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "7745b9c4-57dc-4867-9180-61fa5db55eb8",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Generative AI has numerous business applications across various industries. Here are some examples:\n",
|
||||
"\n",
|
||||
"1. **Content Creation**: Generative AI can be used to generate high-quality content such as articles, social media posts, product descriptions, and even entire books. This can help businesses reduce the time and cost associated with content creation.\n",
|
||||
"2. **Product Design**: Generative AI can be used to design new products, such as jewelry, fashion items, or household goods. This can help businesses quickly prototype and test new designs without the need for extensive human involvement.\n",
|
||||
"3. **Marketing and Advertising**: Generative AI can be used to generate personalized ads, product recommendations, and even entire marketing campaigns. This can help businesses tailor their marketing efforts to specific customer segments.\n",
|
||||
"4. **Customer Service Chatbots**: Generative AI can be used to create chatbots that can understand and respond to customer inquiries in a more human-like way. This can help businesses provide better customer service without the need for human agents.\n",
|
||||
"5. **Data Analysis and Visualization**: Generative AI can be used to analyze large datasets and generate visualizations, such as charts and graphs, that can help businesses gain insights into their data.\n",
|
||||
"6. **Predictive Maintenance**: Generative AI can be used to predict when equipment is likely to fail, allowing businesses to schedule maintenance and reduce downtime.\n",
|
||||
"7. **Personalized Recommendations**: Generative AI can be used to generate personalized product recommendations based on customer behavior and preferences.\n",
|
||||
"8. **Music Composition**: Generative AI can be used to compose music for various applications, such as film scores, advertisements, or even entire albums.\n",
|
||||
"9. **Image and Video Generation**: Generative AI can be used to generate high-quality images and videos that can be used in various business contexts, such as product photography or marketing materials.\n",
|
||||
"10. **Supply Chain Optimization**: Generative AI can be used to optimize supply chain operations, such as predicting demand, managing inventory, and identifying bottlenecks.\n",
|
||||
"\n",
|
||||
"Some specific industries where generative AI is being applied include:\n",
|
||||
"\n",
|
||||
"* **Finance**: Generative AI can be used to analyze financial data, generate investment recommendations, and even create personalized financial plans.\n",
|
||||
"* **Healthcare**: Generative AI can be used to analyze medical images, generate diagnostic reports, and even develop personalized treatment plans.\n",
|
||||
"* **Education**: Generative AI can be used to create personalized learning plans, generate educational content, and even grade student assignments.\n",
|
||||
"\n",
|
||||
"These are just a few examples of the many business applications of generative AI. As the technology continues to evolve, we can expect to see even more innovative uses across various industries.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import ollama\n",
|
||||
"\n",
|
||||
"response = ollama.chat(model=MODEL, messages=messages)\n",
|
||||
"print(response['message']['content'])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "a4704e10-f5fb-4c15-a935-f046c06fb13d",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Alternative approach - using OpenAI python library to connect to Ollama"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "23057e00-b6fc-4678-93a9-6b31cb704bff",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Generative AI has numerous business applications across various industries. Here are some examples:\n",
|
||||
"\n",
|
||||
"1. **Content Creation**: Generative AI can be used to automate content creation, such as generating news articles, product descriptions, and social media posts. This can help businesses save time and resources while maintaining consistency in their content.\n",
|
||||
"2. **Digital Marketing**: Generative AI can be used to optimize online ads, generate ad copy, and create personalized email campaigns. It can also help analyze customer data and predict their behavior, enabling more effective marketing strategies.\n",
|
||||
"3. **Design and Prototyping**: Generative AI can be used to generate designs for products, such as product labels, packaging, and branding materials. It can also create prototypes and simulations, reducing the need for physical prototyping and iterative design processes.\n",
|
||||
"4. **Creative Writing and Storytelling**: Generative AI can be used to co-create stories, articles, and blog posts with human writers, helping to generate ideas, outlines, and even entire pieces of content.\n",
|
||||
"5. **Music Composition and Generation**: Generative AI can be used to compose music, generate sound effects, and create personalized playlists. This can help businesses like music streaming services and content creators generate original content without having to rely on human composers.\n",
|
||||
"6. **Image and Video Generation**: Generative AI can be used to create high-quality images and videos for various applications, including advertising, media production, and film and television studios.\n",
|
||||
"7. **Predictive Analytics and Risk Analysis**: Generative AI can be used to analyze large datasets, identify patterns, and predict outcomes, helping businesses make informed decisions about investments, customers, products, and resource allocation.\n",
|
||||
"8. **Chatbots and Virtual Assistants**: Generative AI can be used to create conversational interfaces that simulate human-like interactions, making it easier for businesses to engage with their customers, provide customer support, and automate routine tasks.\n",
|
||||
"9. **Materials Science and Product Development**: Generative AI can be used to design new materials, predict material behavior, and optimize product performance, enabling faster and more accurate product development cycles.\n",
|
||||
"10. **Supply Chain Management and Logistics**: Generative AI can be used to analyze supply chain data, predict demand, and optimize logistics operations, helping businesses reduce costs, improve efficiency, and increase delivery times.\n",
|
||||
"\n",
|
||||
"These are just a few examples of the business applications of Generative AI. As the technology continues to evolve, we can expect to see even more innovative uses across various industries and sectors.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# There's actually an alternative approach that some people might prefer\n",
|
||||
"# You can use the OpenAI client python library to call Ollama:\n",
|
||||
"\n",
|
||||
"from openai import OpenAI\n",
|
||||
"ollama_via_openai = OpenAI(base_url='http://localhost:11434/v1', api_key='ollama')\n",
|
||||
"\n",
|
||||
"response = ollama_via_openai.chat.completions.create(\n",
|
||||
" model=MODEL,\n",
|
||||
" messages=messages\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"print(response.choices[0].message.content)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1622d9bb-5c68-4d4e-9ca4-b492c751f898",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# NOW the exercise for you\n",
|
||||
"\n",
|
||||
"Take the code from day1 and incorporate it here, to build a website summarizer that uses Llama 3.2 running locally instead of OpenAI; use either of the above approaches."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "6de38216-6d1c-48c4-877b-86d403f4e0f8",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Some websites need you to use proper headers when fetching them:\n",
|
||||
"headers = {\n",
|
||||
" \"User-Agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36\"\n",
|
||||
"}\n",
|
||||
"# A class to represent a Webpage\n",
|
||||
"class Website:\n",
|
||||
"\n",
|
||||
" def __init__(self, url):\n",
|
||||
" \"\"\"\n",
|
||||
" Create this Website object from the given url using the BeautifulSoup library\n",
|
||||
" \"\"\"\n",
|
||||
" self.url = url\n",
|
||||
" response = requests.get(url, headers=headers)\n",
|
||||
" soup = BeautifulSoup(response.content, 'html.parser')\n",
|
||||
" self.title = soup.title.string if soup.title else \"No title found\"\n",
|
||||
" for irrelevant in soup.body([\"script\", \"style\", \"img\", \"input\"]):\n",
|
||||
" irrelevant.decompose()\n",
|
||||
" self.text = soup.body.get_text(separator=\"\\n\", strip=True)\n",
|
||||
"\n",
|
||||
"# Define our system prompt - you can experiment with this later, changing the last sentence to 'Respond in markdown in Spanish.\"\n",
|
||||
"system_prompt = \"You are an assistant that analyzes the contents of a website \\\n",
|
||||
"and provides a short summary, ignoring text that might be navigation related. \\\n",
|
||||
"Respond in markdown.\"\n",
|
||||
"\n",
|
||||
"# A function that writes a User Prompt that asks for summaries of websites:\n",
|
||||
"def user_prompt_for(website):\n",
|
||||
" user_prompt = f\"You are looking at a website titled {website.title}\"\n",
|
||||
" user_prompt += \"\\nThe contents of this website is as follows; \\\n",
|
||||
"please provide a short summary of this website in markdown. \\\n",
|
||||
"If it includes news or announcements, then summarize these too.\\n\\n\"\n",
|
||||
" user_prompt += website.text\n",
|
||||
" return user_prompt\n",
|
||||
"\n",
|
||||
"# See how this function creates exactly the format above\n",
|
||||
"def messages_for(website):\n",
|
||||
" return [\n",
|
||||
" {\"role\": \"system\", \"content\": system_prompt},\n",
|
||||
" {\"role\": \"user\", \"content\": user_prompt_for(website)}\n",
|
||||
" ]\n",
|
||||
"\n",
|
||||
"# Call the Ollama local API.\n",
|
||||
"def summarize(url):\n",
|
||||
" website = Website(url)\n",
|
||||
" response = ollama_via_openai.chat.completions.create(\n",
|
||||
" model = MODEL,\n",
|
||||
" messages = messages_for(website)\n",
|
||||
" )\n",
|
||||
" return response.choices[0].message.content\n",
|
||||
"\n",
|
||||
"# A function to display this nicely in the Jupyter output, using markdown\n",
|
||||
"def display_summary(url):\n",
|
||||
" summary = summarize(url)\n",
|
||||
" display(Markdown(summary))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "16277650-7925-47dc-9194-02bbb520d691",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/markdown": [
|
||||
"This appears to be a sample issue of the CNN website, showcasing various news articles and features from around the world. I'll summarize some of the top headlines:\n",
|
||||
"\n",
|
||||
"**World News**\n",
|
||||
"\n",
|
||||
"* **Pope Francis**: The Pope has passed away at the age of 96, leaving behind a legacy of service and compassion.\n",
|
||||
"* **Israel-Hamas War**: The conflict between Israel and Hamas has intensified, with both sides suffering losses and a human cost.\n",
|
||||
"* **Ukraine-Russia War**: Russia has returned the body of a Ukrainian journalist who died in Russian detention, sparking concerns about Russian treatment of prisoners.\n",
|
||||
"\n",
|
||||
"**US Politics**\n",
|
||||
"\n",
|
||||
"* **Trump Administration**: Former President Donald Trump is rumored to be planning a comeback, with several high-profile officials announcing their resignation or departures.\n",
|
||||
"* **TSAFIR ABAVOV**: Two Israeli officials have been accused of attempting to purchase the remains of two dead Palestinian men for thousands of dollars.\n",
|
||||
"\n",
|
||||
"**Business and Technology**\n",
|
||||
"\n",
|
||||
"* **Apple Tariffs**: The US government has imposed tariffs on Chinese tech giant Apple, with CEO Tim Cook stating that the tariffs could cost the company up to $900 million this quarter.\n",
|
||||
"* **Meta's AI Assistant App**: Facebook parent Meta has launched an AI assistant app, further competing with OpenAI and Google in the emerging field of digital assistants.\n",
|
||||
"\n",
|
||||
"**Health**\n",
|
||||
"\n",
|
||||
"* **Whooping Cough Outbreak**: Cases of whooping cough are rising globally, with experts warning of the need for increased vaccination efforts.\n",
|
||||
"* **Forever Chemicals Research**: Researchers have made gains in understanding how to build homes using fungi as a sustainable alternative material solution.\n",
|
||||
"\n",
|
||||
"This is just a snapshot of some of the top news headlines from the CNN website. If you'd like to know more about any specific topic, feel free to ask!"
|
||||
],
|
||||
"text/plain": [
|
||||
"<IPython.core.display.Markdown object>"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"display_summary(\"https://cnn.com\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"id": "86fd552d-d95c-4636-878c-86d3f6338a0c",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/markdown": [
|
||||
"**Anthropic Website Summary**\n",
|
||||
"==========================\n",
|
||||
"\n",
|
||||
"Anthropic is a company that builds AI to serve humanity's long-term well-being. They aim to create tools with human benefit at their foundation, focusing on responsible AI development.\n",
|
||||
"\n",
|
||||
"### News and Announcements\n",
|
||||
"\n",
|
||||
"* **Claude 3.7 Sonnet**: Anthropic's most intelligent AI model is now available.\n",
|
||||
"\t+ Released in February 2025\n",
|
||||
"* **Anthropic Economic Index**: New publication released on March 27, 2025, discussing societal impacts of large language models.\n",
|
||||
"* **Alignment faking in large language models**: Blog post from December 18, 2024, exploring alignment science.\n",
|
||||
"* **Introducing the Model Context Protocol**: Product update for November 25, 2024.\n",
|
||||
"\n",
|
||||
"### AI Research and Products\n",
|
||||
"\n",
|
||||
"Anthropic focuses on building powerful technologies with human benefit at their foundation. They provide various resources, including:\n",
|
||||
"\n",
|
||||
"* Claude, an open-source AI platform\n",
|
||||
"* Anthropic Academy, a learning platform\n",
|
||||
"* Research overview, featuring the Anthropic Economic Index and more\n",
|
||||
"\n",
|
||||
"The company's mission is to create tools that put safety at the frontier of AI development.\n",
|
||||
"\n",
|
||||
"### Products and Pricing\n",
|
||||
"\n",
|
||||
"Anthropic offers various products and pricing plans for customers, including:\n",
|
||||
"\n",
|
||||
"* Claude Code\n",
|
||||
"* Claude team plan\n",
|
||||
"* Claude enterprise plan\n",
|
||||
"* Claude education plan\n",
|
||||
"* Claude apps\n",
|
||||
"* Pricing plans for Claude.ai"
|
||||
],
|
||||
"text/plain": [
|
||||
"<IPython.core.display.Markdown object>"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"display_summary(\"https://anthropic.com\")"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -0,0 +1,225 @@
|
||||
import dotenv
|
||||
import asyncio
|
||||
|
||||
import os
|
||||
os.environ['PYPPETEER_CHROMIUM_REVISION'] = '1263111'
|
||||
|
||||
from rich.console import Console
|
||||
from rich.markdown import Markdown
|
||||
from openai import OpenAI
|
||||
from openai.types.chat import ChatCompletion
|
||||
from typing import Optional, Union, Dict, List
|
||||
from pyppeteer import launch
|
||||
from pyppeteer_stealth import stealth
|
||||
from random import randint
|
||||
|
||||
console = Console()
|
||||
|
||||
class Config:
|
||||
def __init__(self, filename: str = ".env"):
|
||||
dotenv.load_dotenv(filename)
|
||||
self._config = dotenv.dotenv_values(filename)
|
||||
|
||||
def get(self, key: str) -> str:
|
||||
return self._config.get(key, None)
|
||||
|
||||
def get_int(self, key: str) -> int:
|
||||
value = self.get(key)
|
||||
if value is not None:
|
||||
return int(value)
|
||||
return None
|
||||
|
||||
def get_bool(self, key: str) -> bool:
|
||||
value = self.get(key)
|
||||
if value is not None:
|
||||
return value.lower() in ("true", "1", "yes")
|
||||
return None
|
||||
|
||||
@property
|
||||
def openai_api_key(self) -> str:
|
||||
return self.get("OPENAI_API_KEY")
|
||||
|
||||
class Website:
|
||||
|
||||
__url: str
|
||||
__title: str
|
||||
__text: str
|
||||
|
||||
@property
|
||||
def url(self) -> str:
|
||||
return self.__url
|
||||
|
||||
@property
|
||||
def title(self) -> str:
|
||||
return self.__title
|
||||
|
||||
@property
|
||||
def text(self) -> str:
|
||||
return self.__text
|
||||
|
||||
@url.setter
|
||||
def url(self, url: str) -> None:
|
||||
self.__url = url
|
||||
self.__scrape()
|
||||
|
||||
def __scrape(self) -> None:
|
||||
"""
|
||||
Scrape the website using pyppeteer.
|
||||
"""
|
||||
import asyncio
|
||||
async def main() -> None:
|
||||
browser = await launch(headless=True)
|
||||
page = await browser.newPage()
|
||||
await stealth(page)
|
||||
|
||||
# randomize user agent
|
||||
user_agents: List[str] = [
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 13_0) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.0 Safari/605.1.15",
|
||||
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.0.0 Safari/537.36",
|
||||
]
|
||||
ua = user_agents[randint(0, len(user_agents) - 1)]
|
||||
await page.setUserAgent(ua)
|
||||
await page.setRequestInterception(True)
|
||||
page.on("request", lambda req: asyncio.ensure_future(
|
||||
req.abort() if req.resourceType == "stylesheet" else req.continue_()
|
||||
))
|
||||
|
||||
try:
|
||||
await page.goto(self.url, {"timeout": 60000})
|
||||
self.__title = await page.title()
|
||||
self.__text = await page.evaluate('() => document.body.innerText')
|
||||
except Exception as e:
|
||||
console.print(f"[red]Error scraping {self.url}: {e}[red]")
|
||||
finally:
|
||||
await page.close()
|
||||
await browser.close()
|
||||
|
||||
asyncio.run(main())
|
||||
|
||||
def __init__(self, url: str) -> None:
|
||||
self.url = url
|
||||
|
||||
def __str__(self) -> str:
|
||||
return f"Website(url={self.url}, title=\"{self.title}\")"
|
||||
|
||||
class LlmSummarizer:
|
||||
#region Config
|
||||
__config: Config
|
||||
@property
|
||||
def config(self) -> Config:
|
||||
if self.__config is None:
|
||||
raise ValueError("Config not initialized")
|
||||
return self.__config
|
||||
#endregion
|
||||
|
||||
#region OpenAI
|
||||
__openai: OpenAI = None
|
||||
|
||||
@property
|
||||
def openai(self) -> OpenAI:
|
||||
"""
|
||||
Lazy load the OpenAI client. This is done to avoid creating the client if it's not needed.
|
||||
"""
|
||||
if self.__openai is None:
|
||||
self.__openai = OpenAI(api_key=self.config.openai_api_key)
|
||||
return self.__openai
|
||||
|
||||
#endregion
|
||||
|
||||
#region System behavior
|
||||
__system_behavior: Dict[str, str] = None
|
||||
|
||||
@property
|
||||
def system_behavior(self) -> Dict[str, str]:
|
||||
"""
|
||||
Lazy load the system behavior. This is done to avoid creating the system behavior if it's not needed.
|
||||
"""
|
||||
if self.__system_behavior is None:
|
||||
self.__system_behavior = {
|
||||
"role": "system",
|
||||
"content": (
|
||||
"You are an assistant that analyzes the contents of a website "
|
||||
"and provides a short summary, ignoring the text that might be navigation-related."
|
||||
"Respond in markdown and be concise."
|
||||
)
|
||||
}
|
||||
return self.__system_behavior
|
||||
|
||||
#endregion
|
||||
|
||||
#region user_prompt_for
|
||||
|
||||
def user_prompt_for(self, website: Website) -> Dict[str, str]:
|
||||
user_prompt_content: str = (
|
||||
f"You are looking at the website titled \"{website.title}\""
|
||||
"The content of this website is as follows; "
|
||||
"please provide a short summary of this website in markdown."
|
||||
"If it includes news or announcements, then summarize these too.\n\n"
|
||||
f"\"\"\"\n{website.text}\n\"\"\"\n\n"
|
||||
)
|
||||
return {
|
||||
"role": "user",
|
||||
"content": user_prompt_content
|
||||
}
|
||||
|
||||
#endregion
|
||||
|
||||
#region messages_for
|
||||
|
||||
def messages_for(self, website: Website) -> List[Dict[str, str]]:
|
||||
"""
|
||||
Create the messages for the OpenAI API.
|
||||
"""
|
||||
return [
|
||||
self.system_behavior,
|
||||
self.user_prompt_for(website)
|
||||
]
|
||||
|
||||
#endregion
|
||||
|
||||
#region summarize
|
||||
|
||||
def summarize(self, website: Union[Website, str]) -> Optional[str]:
|
||||
"""
|
||||
Summarize the website using the OpenAI API.
|
||||
"""
|
||||
if isinstance(website, str):
|
||||
website = Website(website)
|
||||
messages: List[Dict[str, str]] = self.messages_for(website)
|
||||
try:
|
||||
response: ChatCompletion = self.openai.chat.completions.create(
|
||||
model="gpt-4o-mini",
|
||||
messages=messages,
|
||||
temperature=0.2,
|
||||
max_tokens=512,
|
||||
)
|
||||
return response.choices[0].message.content
|
||||
except Exception as e:
|
||||
console.print(f"[red]Error summarizing {website if isinstance(website, str) else website.url}: {e}[red]")
|
||||
return None
|
||||
|
||||
#endregion
|
||||
|
||||
def __init__(self, config: Config) -> None:
|
||||
self.__config = config
|
||||
|
||||
def display_markdown(content: str) -> None:
|
||||
"""
|
||||
Display the markdown content using rich.
|
||||
"""
|
||||
console.print(Markdown(content))
|
||||
|
||||
def show_summary(summary: str) -> None:
|
||||
"""
|
||||
Show the summary of the website using rich.
|
||||
"""
|
||||
if summary:
|
||||
display_markdown(summary)
|
||||
else:
|
||||
console.print("No summary found.")
|
||||
|
||||
if __name__ == "__main__":
|
||||
summarizer = LlmSummarizer(Config())
|
||||
summary = summarizer.summarize("https://cnn.com")
|
||||
show_summary(summary)
|
||||
@@ -0,0 +1,7 @@
|
||||
beautifulsoup4
|
||||
openai
|
||||
dotenv
|
||||
requests
|
||||
rich
|
||||
pyppeteer
|
||||
pyppeteer_stealth
|
||||
@@ -0,0 +1,302 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "52629582-ec22-447a-ae09-cba16a46976d",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Datasheet Comparator - MVP"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "40de9dc5-0387-4950-8f8f-4805b46187c3",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"This notebook is part of a project that compares technical specifications from two electronic component datasheets.\n",
|
||||
"\n",
|
||||
"Initially, the PDFs are provided as local files, but future versions will allow users to:\n",
|
||||
"- Select datasheets interactively from within the notebook\n",
|
||||
"- Search and retrieve part information from distributor APIs (e.g. Mouser, Digi-Key)\n",
|
||||
"- Use AI to extract, analyze, and summarize key specifications and differences\n",
|
||||
"\n",
|
||||
"The goal is to support engineers in identifying part changes, upgrades, or replacements efficiently."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "b51c91b6-953b-479c-acc5-ab2a189fabba",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# 📌 Section A: Setup"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "553666d5-af7e-46f0-b945-0d48c32bfbbf",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"import requests\n",
|
||||
"from dotenv import load_dotenv\n",
|
||||
"from bs4 import BeautifulSoup\n",
|
||||
"from IPython.display import Markdown, display\n",
|
||||
"from openai import OpenAI\n",
|
||||
"import fitz # PyMuPDF for PDF parsing\n",
|
||||
"import pandas as pd"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "a19c077a-36e3-4ff2-bee7-85f23e90b89a",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Load OpenAI API key from environment variable (recommended)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "6435f1c7-f161-4cad-b68a-05080304ff22",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"load_dotenv(override=True)\n",
|
||||
"api_key = os.getenv(\"OPENAI_API_KEY\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "3722da9c-e1e9-4838-8ab9-04e45e52d8f0",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"openai = OpenAI()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "34916ec4-643c-4b76-8e21-13c3364782fa",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Define paths to datasheets\n",
|
||||
"💬 **Note:** These example datasheet paths will later be replaced by a user-driven file selection dialog within the Jupyter notebook; optionally, this section could be extended to fetch component data directly from distributor websites."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "42621aa4-7094-4209-95ba-ecf03ba609fb",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"pdf_path_1 = \"./datasheets/part_old.pdf\"\n",
|
||||
"pdf_path_2 = \"./datasheets/part_new.pdf\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "8f09e201-ab22-4b9d-a9a3-b12cc671a68a",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# 📌 Section B: Extract text from datasheets"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "ff36d62e-efb6-4d08-a1d5-ceb470917103",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def extract_text_from_pdf(path):\n",
|
||||
" text = \"\"\n",
|
||||
" with fitz.open(path) as doc:\n",
|
||||
" for page in doc:\n",
|
||||
" text += page.get_text()\n",
|
||||
" return text"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "0da6bc72-93e2-4229-885b-7020f3920855",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# 📌 Section C: Use ChatGPT to summarize and compare"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "4e8de5f9-c2b6-4d6f-9cde-c1275ec0be83",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Section C.1: Define system_prompt"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "30dc6c3a-d7a1-4837-9d57-00c4b2d63092",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"system_prompt = \"You are a technical assistant helping to compare electronic component datasheets.\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "5bf19f66-89f2-4fbf-b5d6-ff1f8e06ba6d",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Section C.2: Define user_prompt, summerize and compare"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "4ff4e362-11d4-4737-a10e-1953ac0eac55",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def summarize_datasheet(text, part_name, system_prompt):\n",
|
||||
" user_prompt = f\"\"\"\n",
|
||||
" Summarize the most important technical characteristics of the electronic component '{part_name}' based on this datasheet text:\n",
|
||||
" ---\n",
|
||||
" {text}\n",
|
||||
" ---\n",
|
||||
" Give a structured list of properties like voltage, current, dimensions, operating temperature, etc.\n",
|
||||
" \"\"\"\n",
|
||||
" response = openai.chat.completions.create(\n",
|
||||
" model=\"gpt-4o-mini\",\n",
|
||||
" messages=[\n",
|
||||
" {\"role\": \"system\", \"content\": system_prompt},\n",
|
||||
" {\"role\": \"user\", \"content\": user_prompt}\n",
|
||||
" ]\n",
|
||||
" )\n",
|
||||
" return response.choices[0].message.content\n",
|
||||
" \n",
|
||||
"def compare_parts(text1, text2, system_prompt):\n",
|
||||
" user_prompt = f\"\"\"\n",
|
||||
" Compare the following two summaries of electronic components and evaluate whether the second part is a valid replacement for the first one.\n",
|
||||
" Identify any differences in electrical specs, mechanical dimensions, and compliance with medical device requirements.\n",
|
||||
" Suggest what changes would be required to use the second part in place of the first (e.g., schematic/layout changes).\n",
|
||||
" \n",
|
||||
" Old Part Summary:\n",
|
||||
" {text1}\n",
|
||||
"\n",
|
||||
" New Part Summary:\n",
|
||||
" {text2}\n",
|
||||
"\n",
|
||||
" Provide a table of differences and a short final recommendation.\n",
|
||||
" \"\"\"\n",
|
||||
" response = openai.chat.completions.create(\n",
|
||||
" model=\"gpt-4o-mini\",\n",
|
||||
" messages=[\n",
|
||||
" {\"role\": \"system\", \"content\": system_prompt},\n",
|
||||
" {\"role\": \"user\", \"content\": user_prompt}\n",
|
||||
" ]\n",
|
||||
" )\n",
|
||||
" return response.choices[0].message.content"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "92524623-b1f9-4b55-9056-d02c41457df4",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# 📌 Section D: Put it all together and print it nicely."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "ebd172eb-a8fb-4308-95c7-fee8f3f250ae",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def display_summary_and_compare(part1, part2, system_prompt):\n",
|
||||
" content1 = extract_text_from_pdf(part1)\n",
|
||||
" content2 = extract_text_from_pdf(part2)\n",
|
||||
" summary1 = summarize_datasheet(content1, \"Old Part\", system_prompt)\n",
|
||||
" summary2 = summarize_datasheet(content2, \"New Part\", system_prompt)\n",
|
||||
" compare = compare_parts(summary1, summary2, system_prompt)\n",
|
||||
" report = summary1 + summary2 + compare\n",
|
||||
" display(Markdown(report))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "ab2f1cfb-7e7b-429d-9f53-68524f93afbf",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"display_summary_and_compare(pdf_path_1, pdf_path_2, system_prompt)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "09c76689-db27-4fa4-9fb2-4ac1d4d111fb",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# 📌 Section E: Next Steps (to be developed)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "8ade0b16-52a6-4af4-a1ae-d0a505bf87a0",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# - Parse key properties into structured tables (e.g., using regex or ChatGPT)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "2a7f6e50-1490-47ef-b911-278981636528",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# - Automatically download datasheets from distributor websites"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "740bdc6d-48e4-4c7f-b7e9-4bb0d86b653f",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# - Search for compatible parts via web APIs"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "adda4dda-8bed-423b-a9c2-87f988ffa391",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# - Export results to Excel or Markdown"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python (datasheet_env)",
|
||||
"language": "python",
|
||||
"name": "datasheet_env"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.11"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
Reference in New Issue
Block a user