Merge branch 'main' of https://github.com/ed-donner/llm_engineering into community-contributions-branch
This commit is contained in:
55
week1/community-contributions/ag-w1d2-ollama-site-summary
Normal file
55
week1/community-contributions/ag-w1d2-ollama-site-summary
Normal file
@@ -0,0 +1,55 @@
|
||||
import ollama
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
from IPython.display import Markdown, display
|
||||
|
||||
MODEL = "llama3.2"
|
||||
|
||||
#headers and class for website to summarize
|
||||
headers = {
|
||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
|
||||
}
|
||||
class Website:
|
||||
def __init__(self, url):
|
||||
self.url = url
|
||||
response = requests.get(url, headers=headers)
|
||||
soup = BeautifulSoup(response.content, 'html.parser')
|
||||
self.title = soup.title.string if soup.title else "No title found"
|
||||
for irrelevant in soup.body(["script", "style", "img", "input"]):
|
||||
irrelevant.decompose()
|
||||
self.text = soup.body.get_text(separator="\n", strip=True)
|
||||
|
||||
#define prompts
|
||||
system_prompt = "You are an assistant that analyzes the contents of a website \
|
||||
and provides a short summary, ignoring text that might be navigation related. \
|
||||
Respond in markdown."
|
||||
|
||||
def user_prompt_for(website):
|
||||
user_prompt = f"You are looking at a website titled {website.title}"
|
||||
user_prompt += "\nThe content of this website is as follows; \
|
||||
please provide a short summary of this website in markdown. \
|
||||
If it includes news or announcements, then summarize these too.\n\n"
|
||||
user_prompt += website.text
|
||||
return user_prompt
|
||||
|
||||
#prepare message for use in OpenAI call
|
||||
def messages_for(website):
|
||||
return [
|
||||
{"role": "system", "content": system_prompt},
|
||||
{"role": "user", "content": user_prompt_for(website)}
|
||||
]
|
||||
|
||||
#define function to summarize a given website
|
||||
def summarize(url):
|
||||
website = Website(url)
|
||||
response = ollama.chat(model=MODEL, messages=messages_for(website))
|
||||
return response['message']['content']
|
||||
|
||||
#function to display summary in markdown format
|
||||
def display_summary(url):
|
||||
summary = summarize(url)
|
||||
display(Markdown(summary))
|
||||
print(summary)
|
||||
|
||||
url = "https://edwarddonner.com"
|
||||
display_summary(url)
|
||||
@@ -0,0 +1,32 @@
|
||||
import ollama
|
||||
from IPython.display import Markdown, display
|
||||
|
||||
MODEL = "llama3.2"
|
||||
|
||||
# Create a messages list (Note that "system" role is not required)
|
||||
messages = [
|
||||
{ "role": "user", "content": "Describe some of the business applications of Generative AI"}
|
||||
]
|
||||
|
||||
"""
|
||||
#under the covers calls this API with specified payload
|
||||
|
||||
OLLAMA_API = "http://local_host:11434/api/chat"
|
||||
payload = {
|
||||
"model": MODEL,
|
||||
"messages": messages,
|
||||
"stream": False
|
||||
}
|
||||
response = requests.post(OLLAMA_API, json=payload, headers=HEADERS)
|
||||
|
||||
"""
|
||||
response = ollama.chat(model=MODEL, messages=messages)
|
||||
#print(response['message']['content'])
|
||||
answer = response['message']['content']
|
||||
|
||||
#Note that markdown will not display in VSCode but only in Jupyter
|
||||
#to view in markdown in VSCode, save output to .md file and then oipen in VSCode
|
||||
display(Markdown(answer))
|
||||
print(answer)
|
||||
|
||||
|
||||
@@ -0,0 +1,22 @@
|
||||
import ollama
|
||||
import requests
|
||||
from IPython.display import Markdown, display
|
||||
|
||||
OLLAMA_API = "http://localhost:11434/api/chat"
|
||||
HEADERS = {"Content-Type": "application/json"}
|
||||
MODEL = "llama3.2"
|
||||
|
||||
# Create a messages list (Note that "system" role is not required)
|
||||
messages = [
|
||||
{ "role": "user", "content": "Describe some of the business applications of Generative AI"}
|
||||
]
|
||||
|
||||
payload = {
|
||||
"model": MODEL,
|
||||
"messages": messages,
|
||||
"stream": False
|
||||
}
|
||||
|
||||
response = requests.post(OLLAMA_API, json=payload, headers=HEADERS)
|
||||
print(response.json()['message']['content'])
|
||||
|
||||
@@ -0,0 +1,23 @@
|
||||
from openai import OpenAI
|
||||
|
||||
MODEL = "llama3.2"
|
||||
|
||||
messages = [
|
||||
{ "role": "user", "content": "Describe some of the business applications of Generative AI"}
|
||||
]
|
||||
|
||||
# The python class OpenAI is simply code written by OpenAI engineers that
|
||||
# makes calls over the internet to an endpoint.
|
||||
ollama_via_openai = OpenAI(base_url='http://localhost:11434/v1', api_key='ollama')
|
||||
|
||||
# When we call openai.chat.completions.create(), this python code just makes
|
||||
# a web request to: "https://api.openai.com/v1/chat/completions"
|
||||
# Code like this is known as a "client library" - it's just wrapper code that
|
||||
# runs on your machine to make web requests. The actual power of GPT is running
|
||||
# on OpenAI's cloud behind this API, not on your computer
|
||||
response = ollama_via_openai.chat.completions.create(
|
||||
model=MODEL,
|
||||
messages=messages
|
||||
)
|
||||
|
||||
print(response.choices[0].message.content)
|
||||
@@ -0,0 +1,439 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "1054e1c9-142a-4059-bfe6-f9be6073fb72",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# imports\n",
|
||||
"# If these fail, please check you're running from an 'activated' environment with (llms) in the command prompt\n",
|
||||
"\n",
|
||||
"import os\n",
|
||||
"import requests\n",
|
||||
"import json\n",
|
||||
"from typing import List\n",
|
||||
"from dotenv import load_dotenv\n",
|
||||
"from bs4 import BeautifulSoup\n",
|
||||
"from IPython.display import Markdown, display, update_display\n",
|
||||
"from openai import OpenAI\n",
|
||||
"import ollama"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "9e59a6ba-d7e1-4834-b3ff-86321e354ade",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"load_dotenv(override=True)\n",
|
||||
"MODEL = \"llama3.2\"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "0ea82fa1-0986-4749-9d7e-d6a23dd88722",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# A class to represent a Webpage\n",
|
||||
"\n",
|
||||
"# Some websites need you to use proper headers when fetching them:\n",
|
||||
"headers = {\n",
|
||||
" \"User-Agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36\"\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"class Website:\n",
|
||||
" \"\"\"\n",
|
||||
" A utility class to represent a Website that we have scraped, now with links\n",
|
||||
" \"\"\"\n",
|
||||
"\n",
|
||||
" def __init__(self, url):\n",
|
||||
" self.url = url\n",
|
||||
" response = requests.get(url, headers=headers)\n",
|
||||
" self.body = response.content\n",
|
||||
" soup = BeautifulSoup(self.body, 'html.parser')\n",
|
||||
" self.title = soup.title.string if soup.title else \"No title found\"\n",
|
||||
" if soup.body:\n",
|
||||
" for irrelevant in soup.body([\"script\", \"style\", \"img\", \"input\"]):\n",
|
||||
" irrelevant.decompose()\n",
|
||||
" self.text = soup.body.get_text(separator=\"\\n\", strip=True)\n",
|
||||
" else:\n",
|
||||
" self.text = \"\"\n",
|
||||
" links = [link.get('href') for link in soup.find_all('a')]\n",
|
||||
" self.links = [link for link in links if link]\n",
|
||||
"\n",
|
||||
" def get_contents(self):\n",
|
||||
" return f\"Webpage Title:\\n{self.title}\\nWebpage Contents:\\n{self.text}\\n\\n\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "2351a604-c280-48fb-84d2-272512535414",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"ed = Website(\"https://edwarddonner.com\")\n",
|
||||
"ed.links"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "e2dd2206-0343-4bf2-8037-de587ff6fe10",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"link_system_prompt = \"You are provided with a list of links found on a webpage. \\\n",
|
||||
"You are able to decide which of the links would be most relevant to include in a brochure about the company, \\\n",
|
||||
"such as links to an About page, or a Company page, or Careers/Jobs pages.\\n\"\n",
|
||||
"link_system_prompt += \"You should respond in JSON as in this example:\"\n",
|
||||
"link_system_prompt += \"\"\"\n",
|
||||
"{\n",
|
||||
" \"links\": [\n",
|
||||
" {\"type\": \"about page\", \"url\": \"https://full.url/goes/here/about\"},\n",
|
||||
" {\"type\": \"careers page\": \"url\": \"https://another.full.url/careers\"}\n",
|
||||
" ]\n",
|
||||
"}\n",
|
||||
"\"\"\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "d891f202-352c-4f93-97c4-ab773daacc60",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(link_system_prompt)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "89be55aa-7236-4d3c-8459-b9c992cd68f5",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def get_links_user_prompt(website):\n",
|
||||
" user_prompt = f\"Here is the list of links on the website of {website.url} - \"\n",
|
||||
" user_prompt += \"please decide which of these are relevant web links for a brochure about the company, respond with the full https URL in JSON format. \\\n",
|
||||
"Do not include Terms of Service, Privacy, email links.\\n\"\n",
|
||||
" user_prompt += \"Links (some might be relative links):\\n\"\n",
|
||||
" user_prompt += \"\\n\".join(website.links)\n",
|
||||
" return user_prompt"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "ec4ed9d2-9b54-4d33-adba-328b47cdde1a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(get_links_user_prompt(ed))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "228cdeea-5c05-45a4-8afe-e6ef8f02810a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import json\n",
|
||||
"import logging\n",
|
||||
"import pprint\n",
|
||||
"#pprint.pprint(response)\n",
|
||||
"\n",
|
||||
"import re\n",
|
||||
"\n",
|
||||
"def extract_json_from_text(text):\n",
|
||||
" \"\"\"\n",
|
||||
" Extract the first JSON object found in the text.\n",
|
||||
" \"\"\"\n",
|
||||
" match = re.search(r'\\{.*\\}', text, re.DOTALL)\n",
|
||||
" if match:\n",
|
||||
" return match.group(0)\n",
|
||||
" return None\n",
|
||||
"\n",
|
||||
"def get_links(url):\n",
|
||||
" website = Website(url)\n",
|
||||
" \n",
|
||||
" try:\n",
|
||||
" response = ollama.chat(\n",
|
||||
" model=\"llama3.2\",\n",
|
||||
" messages=[\n",
|
||||
" {\"role\": \"system\", \"content\": link_system_prompt},\n",
|
||||
" {\"role\": \"user\", \"content\": get_links_user_prompt(website)}\n",
|
||||
" ]\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" result = response['message']['content']\n",
|
||||
" \n",
|
||||
" # Log the raw result for debugging\n",
|
||||
" logging.debug(f\"Raw result: {result}\")\n",
|
||||
"\n",
|
||||
" \n",
|
||||
" if isinstance(result, str):\n",
|
||||
" if not result.strip():\n",
|
||||
" logging.warning(\"Result string is empty.\")\n",
|
||||
" return None\n",
|
||||
"\n",
|
||||
" json_text = extract_json_from_text(result)\n",
|
||||
" if not json_text:\n",
|
||||
" logging.warning(\"No JSON object found in the result string.\")\n",
|
||||
" return None\n",
|
||||
"\n",
|
||||
" logging.debug(f\"Extracted JSON string: {repr(json_text)}\")\n",
|
||||
"\n",
|
||||
" try:\n",
|
||||
" return json.loads(json_text)\n",
|
||||
" except json.JSONDecodeError as e:\n",
|
||||
" logging.error(f\"JSON decoding error: {e}\")\n",
|
||||
" logging.debug(f\"Problematic JSON string: {repr(json_text)}\")\n",
|
||||
" return None\n",
|
||||
" \n",
|
||||
" except Exception as e:\n",
|
||||
" logging.exception(\"An unexpected error occurred in get_links.\")\n",
|
||||
" return None\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "3ce0b67e-8483-418a-bcf3-836910381e2d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"get_links(\"https://huggingface.co\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "aeb09b75-33ea-4638-bc01-6c3d738f0060",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import requests\n",
|
||||
"\n",
|
||||
"def is_url_reachable(url, timeout=5):\n",
|
||||
" try:\n",
|
||||
" response = requests.head(url, timeout=timeout)\n",
|
||||
" return response.status_code < 400\n",
|
||||
" except requests.RequestException:\n",
|
||||
" return False"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "5f2f9cc5-de4f-43d8-a803-97c11c7e91c2",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def get_all_details(url):\n",
|
||||
" if is_url_reachable(url,5):\n",
|
||||
" result = \"Landing page:\\n\"\n",
|
||||
" result += Website(url).get_contents()\n",
|
||||
" links = get_links(url)\n",
|
||||
" print(\"Found links:\", links)\n",
|
||||
" for link in links[\"links\"]:\n",
|
||||
" result += f\"\\n\\n{link['type']}\\n\"\n",
|
||||
" result += Website(link[\"url\"]).get_contents()\n",
|
||||
" return result"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "cd405ade-6b44-45c5-aeb4-724cf6cce8f6",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(get_all_details(\"https://huggingface.co\"))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "8361b67c-4063-499a-b0a7-583971dd6c48",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"system_prompt = \"You are an assistant that analyzes the contents of several relevant pages from a company website \\\n",
|
||||
"and creates a short brochure about the company for prospective customers, investors and recruits. Respond in markdown.\\\n",
|
||||
"Include details of company culture, customers and careers/jobs if you have the information.\"\n",
|
||||
"\n",
|
||||
"# Or uncomment the lines below for a more humorous brochure - this demonstrates how easy it is to incorporate 'tone':\n",
|
||||
"\n",
|
||||
"# system_prompt = \"You are an assistant that analyzes the contents of several relevant pages from a company website \\\n",
|
||||
"# and creates a short humorous, entertaining, jokey brochure about the company for prospective customers, investors and recruits. Respond in markdown.\\\n",
|
||||
"# Include details of company culture, customers and careers/jobs if you have the information.\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "0acd22ba-1dd9-40e8-b33d-1d6b88b5e4e3",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def get_brochure_user_prompt(company_name, url):\n",
|
||||
" try:\n",
|
||||
" if is_url_reachable(url):\n",
|
||||
" web_content = get_all_details(url)[:5000] \n",
|
||||
" user_prompt = f\"You are looking at a company called: {company_name}\\n\"\n",
|
||||
" user_prompt += f\"Use the name {company_name} clearly in the brochure.\\n\"\n",
|
||||
" user_prompt += f\"Here are the contents of its landing page and other relevant pages; use this information to build a short brochure of the company in markdown.\\n\"\n",
|
||||
" user_prompt += f\"\\n\\nReminder: the company name is {company_name}.\"\n",
|
||||
" #user_prompt += get_all_details(url)\n",
|
||||
" #user_prompt = user_prompt[:5_000] # Truncate if more than 5,000 characters\n",
|
||||
" user_prompt += web_content\n",
|
||||
" return user_prompt\n",
|
||||
" except requests.RequestException:\n",
|
||||
" return False"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "89b8b16c-0914-440e-8a1b-54959b0ae7d0",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"get_brochure_user_prompt(\"HuggingFace\", \"https://huggingface.co\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "77528cd7-2460-4768-8d8c-a849f19f6381",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import requests\n",
|
||||
"\n",
|
||||
"def is_url_reachable1(url, timeout=5):\n",
|
||||
" try:\n",
|
||||
" response = requests.head(url, timeout=timeout)\n",
|
||||
" return response.status_code < 400\n",
|
||||
" except requests.RequestException:\n",
|
||||
" return False"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "b3f37ce1-ad44-46ff-8f18-74b537acaa9b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def create_brochure(company_name, url):\n",
|
||||
" try:\n",
|
||||
" if is_url_reachable(url,5):\n",
|
||||
" response = ollama.chat(\n",
|
||||
" model=\"llama3.2\",\n",
|
||||
" messages=[\n",
|
||||
" {\"role\": \"system\", \"content\": system_prompt},\n",
|
||||
" {\"role\": \"user\", \"content\": get_brochure_user_prompt(company_name, url)}\n",
|
||||
" ]\n",
|
||||
" )\n",
|
||||
" \n",
|
||||
" result = response['message']['content']\n",
|
||||
" display(Markdown(result))\n",
|
||||
" except requests.RequestException:\n",
|
||||
" return False"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "1e8a5ac2-b7e2-4c98-9615-5baba00e2dd0",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"create_brochure(\"HuggingFace\", \"https://huggingface.co\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "6ca16d59-1be8-44ef-8590-f5390e4debef",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def stream_brochure(company_name, url):\n",
|
||||
" if not is_url_reachable(url):\n",
|
||||
" print(\"❌ URL not reachable\")\n",
|
||||
" return\n",
|
||||
" try:\n",
|
||||
" #if is_url_reachable(url,5):\n",
|
||||
" stream = ollama.chat(\n",
|
||||
" model=\"llama3.2\",\n",
|
||||
" messages=[\n",
|
||||
" {\"role\": \"system\", \"content\": system_prompt},\n",
|
||||
" {\"role\": \"user\", \"content\": get_brochure_user_prompt(company_name, url)}\n",
|
||||
" ],\n",
|
||||
" stream=True\n",
|
||||
" )\n",
|
||||
" \n",
|
||||
" #result = response['message']['content']\n",
|
||||
" # display(Markdown(result))\n",
|
||||
" except requests.RequestException:\n",
|
||||
" return False\n",
|
||||
" \n",
|
||||
" response = \"\"\n",
|
||||
" display_handle = display(Markdown(\"\"), display_id=True)\n",
|
||||
" #for chunk in stream:\n",
|
||||
" #response += chunk.choices[0].delta.content or ''\n",
|
||||
" #response += chunk['message']['content'] or ''\n",
|
||||
" #response = response.replace(\"```\",\"\").replace(\"markdown\", \"\")\n",
|
||||
" #update_display(Markdown(response), display_id=display_handle.display_id)\n",
|
||||
"\n",
|
||||
" for chunk in stream:\n",
|
||||
" content = chunk.get('message', {}).get('content', '')\n",
|
||||
" if content:\n",
|
||||
" response += content.replace(\"```\", \"\")\n",
|
||||
" update_display(Markdown(response), display_id=display_handle.display_id)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "0f156311-cc32-4bce-9645-7d10a50eae06",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"stream_brochure(\"HuggingFace\", \"https://huggingface.co\")"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
143
week1/community-contributions/day-1-pull-request-review.ipynb
Normal file
143
week1/community-contributions/day-1-pull-request-review.ipynb
Normal file
@@ -0,0 +1,143 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "6af348cb",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# imports\n",
|
||||
"import os\n",
|
||||
"import requests\n",
|
||||
"from dotenv import load_dotenv\n",
|
||||
"from IPython.display import Markdown, display\n",
|
||||
"from openai import OpenAI"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "8254a11a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Load environment variables in a file called .env and load openai\n",
|
||||
"load_dotenv(override=True)\n",
|
||||
"api_key = os.getenv('OPENAI_API_KEY')\n",
|
||||
"# Use a personal access token (PAT) for authentication. This allows access to private repositories and avoids low request limits.\n",
|
||||
"# You can generate a token at: https://github.com/settings/tokens\n",
|
||||
"github_token = os.getenv('GITHUB_TOKEN')\n",
|
||||
"openai = OpenAI()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "ac552db9",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def extract_diff_from_pr(pr_url: str) -> str:\n",
|
||||
" parts = pr_url.rstrip(\"/\").split(\"/\")\n",
|
||||
" owner, repo, pr_number = parts[3], parts[4], parts[6]\n",
|
||||
" \n",
|
||||
" api_url = f\"https://github.com/{owner}/{repo}/pull/{pr_number}.diff\"\n",
|
||||
" headers = {\n",
|
||||
" \"Accept\": \"application/vnd.github.v3.diff\",\n",
|
||||
" \"Authorization\": f\"token {github_token}\"\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" response = requests.get(api_url, headers=headers)\n",
|
||||
" response.raise_for_status()\n",
|
||||
" \n",
|
||||
" return response.text\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "45d4012b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"system_prompt = \"\"\"You are an assistant that reviews code and provides concise, constructive feedback based on best practices. \n",
|
||||
"Focus on readability, architecture, performance, security, testability, and adherence to style guides.\n",
|
||||
"Highlight issues and suggest improvements clearly. Respond in English and in markdown.\"\"\"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "5ed584ec",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def user_prompt_for(code_diffs):\n",
|
||||
" user_prompt = \"You are reviewing the following code diffs\"\n",
|
||||
" user_prompt += \". Please provide a concise code review focused on best practices: readability, architecture, performance, security, testability, and style guide adherence.\\n\"\n",
|
||||
" user_prompt += \"Use a numbered list and be constructive. Suggest improvements where necessary, and highlight what was done well.\\n\\n\"\n",
|
||||
" user_prompt += code_diffs\n",
|
||||
" return user_prompt\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "dc403124",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def code_review_for(code_diffs):\n",
|
||||
" return [\n",
|
||||
" {\"role\": \"system\", \"content\": system_prompt},\n",
|
||||
" {\"role\": \"user\", \"content\": user_prompt_for(code_diffs)}\n",
|
||||
" ]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "5208abd3",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def reviewer(pr_link):\n",
|
||||
" response = openai.chat.completions.create(\n",
|
||||
" model = \"gpt-4o-mini\",\n",
|
||||
" messages = code_review_for(extract_diff_from_pr(pr_link))\n",
|
||||
" )\n",
|
||||
" return response.choices[0].message.content"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "525d92bf",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def display_code_review(pr_link):\n",
|
||||
" code_review = reviewer(pr_link)\n",
|
||||
" display(Markdown(code_review))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "03517335",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"display_code_review(\"GITHUB PR LINK HERE\")"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"language_info": {
|
||||
"name": "python"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
163
week1/community-contributions/day1_exercise_image_gen.ipynb
Normal file
163
week1/community-contributions/day1_exercise_image_gen.ipynb
Normal file
@@ -0,0 +1,163 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "227e221d-cb4c-4b52-9c4f-2bcff51b00a5",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# This exercise is to test and try generating images using gpt. Note: This API is more expensive."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "dddabc12-ce06-45c1-875c-ab7e32b94e10",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# imports\n",
|
||||
"\n",
|
||||
"import os\n",
|
||||
"import requests\n",
|
||||
"from dotenv import load_dotenv\n",
|
||||
"from IPython.display import Markdown, display\n",
|
||||
"from openai import OpenAI\n",
|
||||
"\n",
|
||||
"# If you get an error running this cell, then please head over to the troubleshooting notebook!"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "ef28b0bd-f11f-4b2a-88b4-112f932c9132",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Load environment variables in a file called .env\n",
|
||||
"\n",
|
||||
"load_dotenv(override=True)\n",
|
||||
"api_key = os.getenv('OPENAI_API_KEY')\n",
|
||||
"\n",
|
||||
"# Check the key\n",
|
||||
"\n",
|
||||
"if not api_key:\n",
|
||||
" print(\"No API key was found - please head over to the troubleshooting notebook in this folder to identify & fix!\")\n",
|
||||
"elif not api_key.startswith(\"sk-proj-\"):\n",
|
||||
" print(\"An API key was found, but it doesn't start sk-proj-; please check you're using the right key - see troubleshooting notebook\")\n",
|
||||
"elif api_key.strip() != api_key:\n",
|
||||
" print(\"An API key was found, but it looks like it might have space or tab characters at the start or end - please remove them - see troubleshooting notebook\")\n",
|
||||
"else:\n",
|
||||
" print(\"API key found and looks good so far!\")\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "d8f1af3b-c748-41f0-95f3-e21f512e7539",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"openai = OpenAI()\n",
|
||||
"\n",
|
||||
"# If this doesn't work, try Kernel menu >> Restart Kernel and Clear Outputs Of All Cells, then run the cells from the top of this notebook down.\n",
|
||||
"# If it STILL doesn't work (horrors!) then please see the Troubleshooting notebook in this folder for full instructions"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "2319710e-10a4-4964-acec-276ad43442c0",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Setup done. Below code is for image generation"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "77d07d13-b2d0-4402-94a6-02a46632ac8e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"pip show openai"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "1b28e163-7518-4b18-b1a7-c6a6f5b7f62c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# client = openai.OpenAI()\n",
|
||||
"\n",
|
||||
"response = openai.images.generate(\n",
|
||||
" model=\"gpt-image-1\", # or \"dall-e-2\"\n",
|
||||
" prompt=\"realistic peaceful sunset\",\n",
|
||||
" size=\"1024x1024\",\n",
|
||||
" quality=\"high\", # or \"hd\" (for DALL·E 3 only, costs more)\n",
|
||||
" n=1\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# image_url = response.data[0].url\n",
|
||||
"# print(image_url)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "b121b843-680f-4abd-9aaa-1b3eb9393541",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import base64\n",
|
||||
"\n",
|
||||
"image_base64 = response.data[0].b64_json\n",
|
||||
"image_bytes = base64.b64decode(image_base64)\n",
|
||||
"\n",
|
||||
"# Save the image to a file\n",
|
||||
"with open(\"genimage.png\", \"wb\") as f:\n",
|
||||
" f.write(image_bytes)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "75b24ef1-c779-490a-a763-5bb8ede8903b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from IPython.display import Image\n",
|
||||
"Image(filename='genimage.png') "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "b4b6a4a4-88ff-40ea-9434-6a667939d800",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -0,0 +1,489 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "227e221d-cb4c-4b52-9c4f-2bcff51b00a5",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# This exercise is using selenium to render websites, read their page sources, and then passes on the source code to OpenAI. It then uses the model to identify and find potential vulnerabilities and security gaps in that source."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "dddabc12-ce06-45c1-875c-ab7e32b94e10",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# imports\n",
|
||||
"\n",
|
||||
"import os\n",
|
||||
"import requests\n",
|
||||
"from dotenv import load_dotenv\n",
|
||||
"from bs4 import BeautifulSoup\n",
|
||||
"from IPython.display import Markdown, display\n",
|
||||
"from openai import OpenAI\n",
|
||||
"\n",
|
||||
"# If you get an error running this cell, then please head over to the troubleshooting notebook!"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "ef28b0bd-f11f-4b2a-88b4-112f932c9132",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"API key found and looks good so far!\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Load environment variables in a file called .env\n",
|
||||
"\n",
|
||||
"load_dotenv(override=True)\n",
|
||||
"api_key = os.getenv('OPENAI_API_KEY')\n",
|
||||
"\n",
|
||||
"# Check the key\n",
|
||||
"\n",
|
||||
"if not api_key:\n",
|
||||
" print(\"No API key was found - please head over to the troubleshooting notebook in this folder to identify & fix!\")\n",
|
||||
"elif not api_key.startswith(\"sk-proj-\"):\n",
|
||||
" print(\"An API key was found, but it doesn't start sk-proj-; please check you're using the right key - see troubleshooting notebook\")\n",
|
||||
"elif api_key.strip() != api_key:\n",
|
||||
" print(\"An API key was found, but it looks like it might have space or tab characters at the start or end - please remove them - see troubleshooting notebook\")\n",
|
||||
"else:\n",
|
||||
" print(\"API key found and looks good so far!\")\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "d8f1af3b-c748-41f0-95f3-e21f512e7539",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"openai = OpenAI()\n",
|
||||
"\n",
|
||||
"# If this doesn't work, try Kernel menu >> Restart Kernel and Clear Outputs Of All Cells, then run the cells from the top of this notebook down.\n",
|
||||
"# If it STILL doesn't work (horrors!) then please see the Troubleshooting notebook in this folder for full instructions"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "a80c8acf-8f8b-43ed-9473-698d33e74ed2",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Requirement already satisfied: selenium in /root/anaconda3/envs/llms/lib/python3.11/site-packages (4.32.0)\n",
|
||||
"Requirement already satisfied: urllib3<3,>=1.26 in /root/anaconda3/envs/llms/lib/python3.11/site-packages (from urllib3[socks]<3,>=1.26->selenium) (2.4.0)\n",
|
||||
"Requirement already satisfied: trio~=0.17 in /root/anaconda3/envs/llms/lib/python3.11/site-packages (from selenium) (0.30.0)\n",
|
||||
"Requirement already satisfied: trio-websocket~=0.9 in /root/anaconda3/envs/llms/lib/python3.11/site-packages (from selenium) (0.12.2)\n",
|
||||
"Requirement already satisfied: certifi>=2021.10.8 in /root/anaconda3/envs/llms/lib/python3.11/site-packages (from selenium) (2025.1.31)\n",
|
||||
"Requirement already satisfied: typing_extensions~=4.9 in /root/anaconda3/envs/llms/lib/python3.11/site-packages (from selenium) (4.13.2)\n",
|
||||
"Requirement already satisfied: websocket-client~=1.8 in /root/anaconda3/envs/llms/lib/python3.11/site-packages (from selenium) (1.8.0)\n",
|
||||
"Requirement already satisfied: attrs>=23.2.0 in /root/anaconda3/envs/llms/lib/python3.11/site-packages (from trio~=0.17->selenium) (25.3.0)\n",
|
||||
"Requirement already satisfied: sortedcontainers in /root/anaconda3/envs/llms/lib/python3.11/site-packages (from trio~=0.17->selenium) (2.4.0)\n",
|
||||
"Requirement already satisfied: idna in /root/anaconda3/envs/llms/lib/python3.11/site-packages (from trio~=0.17->selenium) (3.10)\n",
|
||||
"Requirement already satisfied: outcome in /root/anaconda3/envs/llms/lib/python3.11/site-packages (from trio~=0.17->selenium) (1.3.0.post0)\n",
|
||||
"Requirement already satisfied: sniffio>=1.3.0 in /root/anaconda3/envs/llms/lib/python3.11/site-packages (from trio~=0.17->selenium) (1.3.1)\n",
|
||||
"Requirement already satisfied: wsproto>=0.14 in /root/anaconda3/envs/llms/lib/python3.11/site-packages (from trio-websocket~=0.9->selenium) (1.2.0)\n",
|
||||
"Requirement already satisfied: pysocks!=1.5.7,<2.0,>=1.5.6 in /root/anaconda3/envs/llms/lib/python3.11/site-packages (from urllib3[socks]<3,>=1.26->selenium) (1.7.1)\n",
|
||||
"Requirement already satisfied: h11<1,>=0.9.0 in /root/anaconda3/envs/llms/lib/python3.11/site-packages (from wsproto>=0.14->trio-websocket~=0.9->selenium) (0.14.0)\n",
|
||||
"\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager, possibly rendering your system unusable. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv. Use the --root-user-action option if you know what you are doing and want to suppress this warning.\u001b[0m\u001b[33m\n",
|
||||
"\u001b[0mNote: you may need to restart the kernel to use updated packages.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"pip install selenium"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "068b4938-3020-4406-a305-500bcf46f7f9",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Requirement already satisfied: webdriver-manager in /root/anaconda3/envs/llms/lib/python3.11/site-packages (4.0.2)\n",
|
||||
"Requirement already satisfied: requests in /root/anaconda3/envs/llms/lib/python3.11/site-packages (from webdriver-manager) (2.32.3)\n",
|
||||
"Requirement already satisfied: python-dotenv in /root/anaconda3/envs/llms/lib/python3.11/site-packages (from webdriver-manager) (1.1.0)\n",
|
||||
"Requirement already satisfied: packaging in /root/anaconda3/envs/llms/lib/python3.11/site-packages (from webdriver-manager) (24.2)\n",
|
||||
"Requirement already satisfied: charset_normalizer<4,>=2 in /root/anaconda3/envs/llms/lib/python3.11/site-packages (from requests->webdriver-manager) (3.4.1)\n",
|
||||
"Requirement already satisfied: idna<4,>=2.5 in /root/anaconda3/envs/llms/lib/python3.11/site-packages (from requests->webdriver-manager) (3.10)\n",
|
||||
"Requirement already satisfied: urllib3<3,>=1.21.1 in /root/anaconda3/envs/llms/lib/python3.11/site-packages (from requests->webdriver-manager) (2.4.0)\n",
|
||||
"Requirement already satisfied: certifi>=2017.4.17 in /root/anaconda3/envs/llms/lib/python3.11/site-packages (from requests->webdriver-manager) (2025.1.31)\n",
|
||||
"\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager, possibly rendering your system unusable. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv. Use the --root-user-action option if you know what you are doing and want to suppress this warning.\u001b[0m\u001b[33m\n",
|
||||
"\u001b[0mNote: you may need to restart the kernel to use updated packages.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"pip install webdriver-manager"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "417fae16-d2c9-425c-bd27-86996b3a1f7f",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"--2025-05-17 15:27:43-- https://dl.google.com/linux/direct/google-chrome-stable_current_amd64.deb\n",
|
||||
"Resolving dl.google.com (dl.google.com)... 74.125.193.136, 74.125.193.190, 74.125.193.93, ...\n",
|
||||
"Connecting to dl.google.com (dl.google.com)|74.125.193.136|:443... connected.\n",
|
||||
"HTTP request sent, awaiting response... 200 OK\n",
|
||||
"Length: 116499092 (111M) [application/x-debian-package]\n",
|
||||
"Saving to: ‘google-chrome-stable_current_amd64.deb.5’\n",
|
||||
"\n",
|
||||
"google-chrome-stabl 100%[===================>] 111.10M 6.34MB/s in 21s \n",
|
||||
"\n",
|
||||
"2025-05-17 15:28:05 (5.18 MB/s) - ‘google-chrome-stable_current_amd64.deb.5’ saved [116499092/116499092]\n",
|
||||
"\n",
|
||||
"Reading package lists... Done\n",
|
||||
"Building dependency tree... Done\n",
|
||||
"Reading state information... Done\n",
|
||||
"Note, selecting 'google-chrome-stable' instead of './google-chrome-stable_current_amd64.deb'\n",
|
||||
"google-chrome-stable is already the newest version (136.0.7103.59-1).\n",
|
||||
"The following packages were automatically installed and are no longer required:\n",
|
||||
" htop libnl-3-200 libnl-genl-3-200\n",
|
||||
"Use 'sudo apt autoremove' to remove them.\n",
|
||||
"0 upgraded, 0 newly installed, 0 to remove and 7 not upgraded.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Step 1: Download the .deb package as a normal user\n",
|
||||
"!wget https://dl.google.com/linux/direct/google-chrome-stable_current_amd64.deb\n",
|
||||
"\n",
|
||||
"# Step 2: Install it with sudo\n",
|
||||
"!sudo apt install ./google-chrome-stable_current_amd64.deb\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "cf4c5bcc-60ae-4f06-8052-f4c4398e0d5c",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/usr/bin/google-chrome\n",
|
||||
"Google Chrome 136.0.7103.59 \n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"!which google-chrome\n",
|
||||
"!google-chrome --version"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "959b13d9-374f-4cf8-9bde-f197c39500b0",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from selenium import webdriver\n",
|
||||
"from selenium.webdriver.chrome.service import Service\n",
|
||||
"from selenium.webdriver.chrome.options import Options\n",
|
||||
"from webdriver_manager.chrome import ChromeDriverManager\n",
|
||||
"\n",
|
||||
"# options = Options()\n",
|
||||
"# options.binary_location = \"/usr/bin/google-chrome\" # Or wherever `which google-chrome` points\n",
|
||||
"# options.add_argument(\"--headless\")\n",
|
||||
"# options.add_argument(\"--no-sandbox\")\n",
|
||||
"# options.add_argument(\"--disable-dev-shm-usage\")\n",
|
||||
"\n",
|
||||
"# service = Service(ChromeDriverManager().install())\n",
|
||||
"# driver = webdriver.Chrome(service=service, options=options)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "97227a23-e367-498c-8190-7559b4d08e50",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# # Get page source\n",
|
||||
"# url = \"https://nohello.net\"\n",
|
||||
"# driver.get(url)\n",
|
||||
"# page_source = driver.page_source\n",
|
||||
"# driver.quit()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "2319710e-10a4-4964-acec-276ad43442c0",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Selenium setup done. Definiing website class and other objects below"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"id": "4683ed7d-6a1e-4d68-b951-27ed6f5d00a4",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Define our system prompt - you can experiment with this later, changing the last sentence to 'Respond in markdown in Spanish.\"\n",
|
||||
"\n",
|
||||
"system_prompt = \"You are an assistant that analyzes the page source of a website and identifies potentila vulnerabilities and security gaps in the page source code and gives a short one liner on what should be done about it. Respond in markdown\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"id": "f28982e8-dd3c-4a64-8745-a31709a5d737",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"class Website:\n",
|
||||
"\n",
|
||||
" def __init__(self, url):\n",
|
||||
" \"\"\"\n",
|
||||
" Create this Website object from the given url using the Selenium library\n",
|
||||
" \"\"\"\n",
|
||||
"\n",
|
||||
" options = Options()\n",
|
||||
" options.binary_location = \"/usr/bin/google-chrome\" # Or wherever `which google-chrome` points\n",
|
||||
" options.add_argument(\"--headless\")\n",
|
||||
" options.add_argument(\"--no-sandbox\")\n",
|
||||
" options.add_argument(\"--disable-dev-shm-usage\")\n",
|
||||
"\n",
|
||||
" service = Service(ChromeDriverManager().install())\n",
|
||||
" driver = webdriver.Chrome(service=service, options=options)\n",
|
||||
" \n",
|
||||
" self.url = url\n",
|
||||
" driver.get(url)\n",
|
||||
" self.page_title = driver.title\n",
|
||||
" self.page_source = driver.page_source\n",
|
||||
" driver.quit()\n",
|
||||
" \n",
|
||||
" # response = requests.get(url, headers=headers)\n",
|
||||
" # soup = BeautifulSoup(response.content, 'html.parser')\n",
|
||||
" # self.title = soup.title.string if soup.title else \"No title found\"\n",
|
||||
" # for irrelevant in soup.body([\"script\", \"style\", \"img\", \"input\"]):\n",
|
||||
" # irrelevant.decompose()\n",
|
||||
" # self.text = soup.body.get_text(separator=\"\\n\", strip=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"id": "a24a695c-6e86-4efe-83ff-91d24373e171",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Let's try one out. Change the website and add print statements to follow along.\n",
|
||||
"\n",
|
||||
"testweb = Website(\"https://nohello.net\")\n",
|
||||
"# print(testweb.page_title)\n",
|
||||
"# print(testweb.page_source)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"id": "2b582bea-d9fe-4f74-8207-31bdea9b312c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# A function that writes a User Prompt that asks for summaries of websites:\n",
|
||||
"\n",
|
||||
"def user_prompt_for(website):\n",
|
||||
" user_prompt = f\"You are looking at a website titled {website.page_title}\"\n",
|
||||
" user_prompt += \"\\nThe contents of this website is as follows; please analyze the page source on this website in detail and identify potential vulnerabilites and security gaps that can be fixed.\\n\\n\"\n",
|
||||
" user_prompt += website.page_source\n",
|
||||
" return user_prompt"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"id": "a652eb76-3c2d-404b-91fa-3f1d9af8af84",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# print(user_prompt_for(testweb))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"id": "ec73d3ad-3239-4686-84ac-44f0b10bce67",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# See how this function creates exactly the format above\n",
|
||||
"\n",
|
||||
"def messages_for(website):\n",
|
||||
" return [\n",
|
||||
" {\"role\": \"system\", \"content\": system_prompt},\n",
|
||||
" {\"role\": \"user\", \"content\": user_prompt_for(website)}\n",
|
||||
" ]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "9888b6be-4876-4eb7-a1c7-6980b7421b66",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Try this out, and then try for a few more websites\n",
|
||||
"\n",
|
||||
"messages_for(testweb)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"id": "6f1978e7-dcf5-4230-a8c1-b65ba0592c12",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# And now: call the OpenAI API. You will get very familiar with this!\n",
|
||||
"\n",
|
||||
"def analyze_code(url):\n",
|
||||
" website = Website(url)\n",
|
||||
" response = openai.chat.completions.create(\n",
|
||||
" model = \"gpt-4o-mini\",\n",
|
||||
" messages = messages_for(website)\n",
|
||||
" )\n",
|
||||
" return response.choices[0].message.content"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "2664ab62-3c9d-443b-a2d2-c3bb285500c1",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"analyze_code(\"https://nohello.net\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
"id": "a840a848-d1c9-421c-ad39-e84584714c2c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# A function to display this nicely in the Jupyter output, using markdown\n",
|
||||
"\n",
|
||||
"def display_results(url):\n",
|
||||
" analysis = analyze_code(url)\n",
|
||||
" display(Markdown(analysis))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 19,
|
||||
"id": "81404426-3fa6-415b-a6d0-787aeb165613",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/markdown": [
|
||||
"# Security Analysis of the \"no hello\" Website\n",
|
||||
"\n",
|
||||
"Here are the potential vulnerabilities and security gaps observed in the page source of the \"no hello\" website, along with recommendations for each:\n",
|
||||
"\n",
|
||||
"1. **Inline JavaScript and CSS:** \n",
|
||||
" - **Issue:** Inline styles and scripts can lead to security vulnerabilities, like Cross-Site Scripting (XSS).\n",
|
||||
" - **Recommendation:** Move all inline JS and CSS to external files and ensure they are minimized.\n",
|
||||
"\n",
|
||||
"2. **Lack of Content Security Policy (CSP):** \n",
|
||||
" - **Issue:** No CSP header is defined, increasing the risk of XSS attacks.\n",
|
||||
" - **Recommendation:** Implement a Content Security Policy to restrict sources of scripts and styles.\n",
|
||||
"\n",
|
||||
"3. **Local Storage Usage:**\n",
|
||||
" - **Issue:** Using `localStorage` for language preference can expose it to XSS if not properly sanitized.\n",
|
||||
" - **Recommendation:** Ensure any data written to or read from `localStorage` is properly sanitized.\n",
|
||||
"\n",
|
||||
"4. **HTTP Content Security Headers Missing:**\n",
|
||||
" - **Issue:** Missing headers like `X-Content-Type-Options`, `X-Frame-Options`, etc.\n",
|
||||
" - **Recommendation:** Implement additional security headers to mitigate common threats.\n",
|
||||
"\n",
|
||||
"5. **Image URLs with Unsecured Path:**\n",
|
||||
" - **Issue:** The image sources use double slashes which could result in unintended behavior.\n",
|
||||
" - **Recommendation:** Ensure image URLs are absolute and formatted correctly to avoid resource loading issues.\n",
|
||||
"\n",
|
||||
"6. **External Script Source:**\n",
|
||||
" - **Issue:** The site imports external scripts (like `typed.js`) from a CDN without integrity checks.\n",
|
||||
" - **Recommendation:** Use the Subresource Integrity (SRI) attribute for external script imports.\n",
|
||||
"\n",
|
||||
"7. **Exposed Links:**\n",
|
||||
" - **Issue:** External links in the content are not set to open in a new tab.\n",
|
||||
" - **Recommendation:** Use `target=\"_blank\"` on external links to prevent potential tab-nabbing attacks.\n",
|
||||
"\n",
|
||||
"8. **Deprecated HTML Elements:**\n",
|
||||
" - **Issue:** Use of some old HTML elements may lead to compatibility issues.\n",
|
||||
" - **Recommendation:** Ensure HTML is up to date and complies with current standards.\n",
|
||||
"\n",
|
||||
"By addressing these vulnerabilities, the website can enhance its overall security posture and better protect user data."
|
||||
],
|
||||
"text/plain": [
|
||||
"<IPython.core.display.Markdown object>"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"display_results(\"https://nohello.net\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "fdadf917-86e1-4694-b708-5a8ce9e050df",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
427
week1/community-contributions/week1_challenge.ipynb
Normal file
427
week1/community-contributions/week1_challenge.ipynb
Normal file
@@ -0,0 +1,427 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 40,
|
||||
"id": "d0cdf91e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"import requests\n",
|
||||
"import json\n",
|
||||
"from typing import List\n",
|
||||
"from dotenv import load_dotenv\n",
|
||||
"from bs4 import BeautifulSoup\n",
|
||||
"from IPython.display import Markdown,display,update_display\n",
|
||||
"from openai import OpenAI"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 41,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"load_dotenv()\n",
|
||||
"api_key = os.getenv('OpenAI_API_KEY')\n",
|
||||
"model = 'gpt-4o-mini'\n",
|
||||
"openai = OpenAI()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 42,
|
||||
"id": "3c7e9213",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"class Website:\n",
|
||||
" \"\"\"\n",
|
||||
" A utility class to represent a Website that we have scraped, now with links\n",
|
||||
" \"\"\"\n",
|
||||
"\n",
|
||||
" def __init__(self, url):\n",
|
||||
" self.url = url\n",
|
||||
" response = requests.get(url)\n",
|
||||
" self.body = response.content\n",
|
||||
" soup = BeautifulSoup(self.body, 'html.parser')\n",
|
||||
" self.title = soup.title.string if soup.title else \"No title found\"\n",
|
||||
" if soup.body:\n",
|
||||
" for irrelevant in soup.body([\"script\", \"style\", \"img\", \"input\"]):\n",
|
||||
" irrelevant.decompose()\n",
|
||||
" self.text = soup.body.get_text(separator=\"\\n\", strip=True)\n",
|
||||
" else:\n",
|
||||
" self.text = \"\"\n",
|
||||
" links = [link.get('href') for link in soup.find_all('a')]\n",
|
||||
" self.links = [link for link in links if link] \n",
|
||||
"\n",
|
||||
" def get_contents(self):\n",
|
||||
" return f\"Webpage Title:\\n{self.title}\\nWebpage Contents:\\n{self.text}\\n\\n\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 43,
|
||||
"id": "0287acd3",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"link_system_prompt = \"\"\"You are provided with a list of links found on a webpage.\n",
|
||||
"You must decide which links would be most relevant to include in a brochure about the company,\n",
|
||||
"such as links to an About page, Company page, or Careers/Jobs pages.\n",
|
||||
"\n",
|
||||
"Respond in JSON format like this:\n",
|
||||
"Example 1:\n",
|
||||
"Input:\n",
|
||||
"[\n",
|
||||
" \"https://example.com\",\n",
|
||||
" \"https://example.com/about\",\n",
|
||||
" \"https://example.com/contact\",\n",
|
||||
" \"https://example.com/careers\"\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"Output:\n",
|
||||
"{\n",
|
||||
" \"links\": [\n",
|
||||
" {\"type\": \"about page\", \"url\": \"https://example.com/about\"},\n",
|
||||
" {\"type\": \"careers page\", \"url\": \"https://example.com/careers\"}\n",
|
||||
" ]\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"Example 2:\n",
|
||||
"Input:\n",
|
||||
"[\n",
|
||||
" \"https://anothercompany.org/home\",\n",
|
||||
" \"https://anothercompany.org/team\",\n",
|
||||
" \"https://anothercompany.org/jobs\",\n",
|
||||
" \"https://anothercompany.org/blog\"\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"Output:\n",
|
||||
"{\n",
|
||||
" \"links\": [\n",
|
||||
" {\"type\": \"about page\", \"url\": \"https://anothercompany.org/team\"},\n",
|
||||
" {\"type\": \"careers page\", \"url\": \"https://anothercompany.org/jobs\"}\n",
|
||||
" ]\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"Now analyze the following list of links:\n",
|
||||
"\"\"\"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 44,
|
||||
"id": "c968b1fb",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def get_links_user_prompt(website):\n",
|
||||
" user_prompt = f\"Here is the list of links on the website of {website.url} - \"\n",
|
||||
" user_prompt += \"please decide which of these are relevant web links for a brochure about the company, respond with the full https URL in JSON format. \\\n",
|
||||
"Do not include Terms of Service, Privacy, email links.\\n\"\n",
|
||||
" user_prompt += \"Links (some might be relative links):\\n\"\n",
|
||||
" user_prompt += \"\\n\".join(website.links)\n",
|
||||
" return user_prompt"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 45,
|
||||
"id": "a03b9150",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def get_links(url):\n",
|
||||
" website = Website(url)\n",
|
||||
" completion = openai.chat.completions.create(\n",
|
||||
" model=model,\n",
|
||||
" messages=[\n",
|
||||
" {\"role\": \"system\", \"content\": link_system_prompt},\n",
|
||||
" {\"role\": \"user\", \"content\": get_links_user_prompt(website)}\n",
|
||||
" ],\n",
|
||||
" response_format={\"type\": \"json_object\"} \n",
|
||||
" )\n",
|
||||
" result = completion.choices[0].message.content\n",
|
||||
" return json.loads(result)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 46,
|
||||
"id": "c0522b62",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def get_all_details(url):\n",
|
||||
" result = \"Landing page:\\n\"\n",
|
||||
" result += Website(url).get_contents()\n",
|
||||
" links = get_links(url)\n",
|
||||
" print(\"Found links:\", links)\n",
|
||||
" for link in links[\"links\"]:\n",
|
||||
" result += f\"\\n\\n{link['type']}\\n\"\n",
|
||||
" result += Website(link[\"url\"]).get_contents()\n",
|
||||
" return result"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 47,
|
||||
"id": "edae03dd",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"get_brochure_system_prompt = \"You are an assistant that analyzes the contents of several relevant pages from a company website \\\n",
|
||||
"and creates a short humorous, entertaining, jokey brochure about the company for prospective customers, investors and recruits. Respond in markdown.\\\n",
|
||||
"Include details of company culture, customers and careers/jobs if you have the information.\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 48,
|
||||
"id": "2397e73e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def get_brochure_user_prompt(company_name, url):\n",
|
||||
" user_prompt = f\"You are looking at a company called: {company_name}\\n\"\n",
|
||||
" user_prompt += f\"Here are the contents of its landing page and other relevant pages; use this information to build a short brochure of the company in markdown.\\n\"\n",
|
||||
" user_prompt += get_all_details(url)\n",
|
||||
" user_prompt = user_prompt[:5_000] # Truncate if more than 5,000 characters\n",
|
||||
" return user_prompt"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 62,
|
||||
"id": "e99c46e1",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def create_brochure(company_name, url):\n",
|
||||
" response = openai.chat.completions.create(\n",
|
||||
" model=model,\n",
|
||||
" messages=[\n",
|
||||
" {\"role\": \"system\", \"content\": get_brochure_system_prompt},\n",
|
||||
" {\"role\": \"user\", \"content\": get_brochure_user_prompt(company_name, url)}\n",
|
||||
" ],\n",
|
||||
" )\n",
|
||||
" result = response.choices[0].message.content\n",
|
||||
" display(Markdown(result))\n",
|
||||
" return result \n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 69,
|
||||
"id": "f5bbe077",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Found links: {'links': [{'type': 'company page', 'url': 'https://www.anthropic.com/company'}, {'type': 'about page', 'url': 'https://www.anthropic.com/team'}, {'type': 'careers page', 'url': 'https://www.anthropic.com/careers'}, {'type': 'research page', 'url': 'https://www.anthropic.com/research'}, {'type': 'learn page', 'url': 'https://www.anthropic.com/learn'}]}\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/markdown": [
|
||||
"# 🦾 Welcome to Anthropic: Where AI Meets Adventure! 🚀\n",
|
||||
"\n",
|
||||
"## About Us\n",
|
||||
"At Anthropic, we don’t just build AI; we build **Claude**! That’s right, not just any regular AI, but the crème de la crème, written with an extra sprinkle of safety! Claude isn’t just intelligent; he’s a poet (check out Claude 3.7 Sonnet!). We’re all about putting humanity first and making sure our AI knows that, no matter how smart it gets, *we’re still in charge*!\n",
|
||||
"\n",
|
||||
"## Our Culture 🌍\n",
|
||||
"Imagine a workplace where *discussions about AI* aren't just about who will take over the world – they’re about how we can use AI to make life better. We take bold steps forward but also know when to pause, ponder and ensure we don't go rogue. We might not have a crystal ball, but we have a *really good AI* for that!\n",
|
||||
"\n",
|
||||
"- **Transparency:** We're as clear as the skies over a freshly vaccuumed office.\n",
|
||||
"- **Teamwork:** Just like Claude helping you code, we help each other out!\n",
|
||||
"- **Intellectual Playground:** We provide a space where brainwaves fly like confetti.\n",
|
||||
" \n",
|
||||
"## Customers 🎉\n",
|
||||
"From savvy developers to curious educators, and even intimidating enterprises, everyone is talking to Claude! Our customers are a mix of brilliant minds using our API to build magical experiences and tools that maybe one day, won't require a human babysitter (kidding!). Here's what some of our customers are saying:\n",
|
||||
"\n",
|
||||
"> \"Claude is like a comic book superhero—fighting information injustice one query at a time!\" \n",
|
||||
"> – Satisfied Developer\n",
|
||||
"\n",
|
||||
"## Careers: Join the Adventure! 💼\n",
|
||||
"Are you an innovator, a thinker, or someone who just likes playing chess with algorithms? At Anthropic, we’re always on the lookout for talented individuals ready to shape the future of AI. \n",
|
||||
"\n",
|
||||
"- **Open Roles:** Want to help us build the future of safe AI? We've got plenty of roles, and yes, they include working with Claude… and maybe some snacks!\n",
|
||||
"\n",
|
||||
"- **Anthropic Academy:** Want to learn how to build with Claude? Enter the Academy, where education and tech meet like peanut butter and jelly!\n",
|
||||
"\n",
|
||||
"## Conclusion\n",
|
||||
"Whether you're a potential customer itching to chat with Claude, an investor ready to secure the next big wave, or a superstar waiting to join our team, welcome aboard! \n",
|
||||
"\n",
|
||||
"With us at Anthropic, you're not just part of a company; you’re part of a revolution in AI—responsibly and safely, of course. \n",
|
||||
"\n",
|
||||
"So, what's it going to be—will you take the leap? 🤔 \n",
|
||||
"\n",
|
||||
"### Let's Chat with Claude! 💬✨\n"
|
||||
],
|
||||
"text/plain": [
|
||||
"<IPython.core.display.Markdown object>"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"brochure = create_brochure(\"Anthropic\", \"https://anthropic.com\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 70,
|
||||
"id": "758ad58a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import ollama\n",
|
||||
"MODEL = \"llama3.2\"\n",
|
||||
"\n",
|
||||
"translate_system_prompt = (\n",
|
||||
" \"You are a native Spanish speaker who teaches English at a university. \"\n",
|
||||
" \"Your goal is to translate from English to Spanish while preserving the Markdown format, emoji usage, and playful tone. \"\n",
|
||||
" \"Keep the original structure exactly. Be creative, natural, and engaging for a Spanish-speaking reader.\"\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"def translate_user_prompt(brochure):\n",
|
||||
" prompt = f\"\"\"You are looking at a company brochure:\n",
|
||||
"\n",
|
||||
"\\\"\\\"\\\"{brochure}\\\"\\\"\\\"\n",
|
||||
"\n",
|
||||
"Your goal is to translate this brochure into Spanish.\"\"\"\n",
|
||||
" return prompt"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 71,
|
||||
"id": "93ca7f85",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def message(brochure):\n",
|
||||
" return[\n",
|
||||
" {'role':'system','content':translate_system_prompt},\n",
|
||||
" {'role':'user','content':translate_user_prompt(brochure)}\n",
|
||||
" ]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 76,
|
||||
"id": "3c06ec2e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def translate(brochure):\n",
|
||||
" brochure = brochure\n",
|
||||
" response = ollama.chat(MODEL,message(brochure))\n",
|
||||
" result = response['message']['content'] \n",
|
||||
" display(Markdown(result)) \n",
|
||||
" return result "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 77,
|
||||
"id": "26655743",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/markdown": [
|
||||
"# 🦾 ¡Bienvenidos a Anthropic: Donde la Inteligencia Artificial Conoce a la Aventura! 🚀\n",
|
||||
"\n",
|
||||
"## Sobre Nosotros\n",
|
||||
"En Anthropic, no solo creamos inteligencia artificial; creamos **Claude**! Es decir, no solo cualquier inteligencia artificial, sino la crème de la crème, escrita con un toque especial de seguridad. Claude no es solo inteligente; es un poeta (ver el soneto 3.7 de Claude!). Estamos emocionados de poner la humanidad en primer lugar y asegurarnos de que nuestra inteligencia artificial sepa que, independientemente de cuán inteligente sea, *estamos todavía en el cargo*!\n",
|
||||
"\n",
|
||||
"## Nuestra Cultura 🌍\n",
|
||||
"Imagina un lugar de trabajo donde las discusiones sobre la inteligencia artificial no son solo sobre quién tomará el control del mundo – sino sobre cómo podemos utilizar la inteligencia artificial para hacer la vida mejor. Tomamos pasos audaces pero también sabemos cuando es el momento de detenernos, reflexionar y asegurarnos de que no nos dejemos llevar por algo fuera de control. No tenemos una bálsamo mágico, pero sí un *inteligencia artificial muy buena* para eso!\n",
|
||||
"\n",
|
||||
"- **Transparencia:** Somos tan claros como los cielos sobre un escritorio recién aspirado.\n",
|
||||
"- **Colaboración:** ¡Es como Claude ayudándote a codificar! Nos ayudamos entre nosotros!\n",
|
||||
"- **Jardín intelectual:** Proporcionamos un espacio donde las ideas vuelan como confeti.\n",
|
||||
"\n",
|
||||
"## Clientes 🎉\n",
|
||||
"Desde desarrolladores astutos hasta educadores curiosos, y hasta grandes empresas intimidantes, todo el mundo habla con Claude! Nuestros clientes son una mezcla de mentes brillantes utilizando nuestra API para crear experiencias mágicas y herramientas que tal vez algún día no requieran un monitoreo humano (joking!). Aquí está lo que algunos de nuestros clientes están diciendo:\n",
|
||||
"\n",
|
||||
"> \"Claude es como un superhéroe de la comic book – luchando contra la injusticia informativa uno pregunta a la vez!\"\n",
|
||||
"> – Desarrollador satisfecho\n",
|
||||
"\n",
|
||||
"## Carreras: Únete a la Aventura! 💼\n",
|
||||
"¿Eres innovador, pensador o alguien que solo disfruta jugando ajedrez con algoritmos? En Anthropic, estamos siempre buscando personas talentosas y dispuestas a moldear el futuro de la inteligencia artificial. \n",
|
||||
"\n",
|
||||
"- **Roles Abiertos:** ¿Quieres ayudarnos a construir el futuro de inteligencia artificial segura? Tenemos roles disponibles, y sí, incluyen trabajar con Claude… y tal vez algunos snacks!\n",
|
||||
"\n",
|
||||
"- **Academia Anthropic:** ¿Quieres aprender a construir con Claude? ¡Incrímate en la Academia, donde la educación y la tecnología se unen como mantequilla y chocolate!\n",
|
||||
"\n",
|
||||
"## Conclusión\n",
|
||||
"¡Sea que eres cliente potencial deseando charlar con Claude, inversor listo para asegurar el próximo gran olvido, o estrella esperando para unirte a nuestro equipo, ¡biénvenidos a bordo! \n",
|
||||
"\n",
|
||||
"Con nosotros en Anthropic, no solo eres parte de una empresa; eres parte de una revolución en inteligencia artificial – responsable y segura, por supuesto.\n",
|
||||
"\n",
|
||||
"¡Así que qué va a ser—¿estás dispuesto a saltar la barrera? 🤔\n",
|
||||
"\n",
|
||||
"### ¡Habla con Claude! 💬✨\n",
|
||||
"\"\n",
|
||||
"\n",
|
||||
"Translation Notes:\n",
|
||||
"\n",
|
||||
"* The title was left as is, but could be translated to \"Welcome to Anthropic: Where Artificial Intelligence Meets Adventure\"\n",
|
||||
"* In the text, \"Claude\" was translated to \"un superhéroe de la comic book\", which was then changed back to \"un superhéroe de cómics\", to preserve the original tone and language.\n",
|
||||
"* The phrase \"crème de la crème\" was left as is, but could be translated to \"la mejor del mejor\" or \"la crema de la crema\".\n",
|
||||
"* In the section on culture, the phrase \"*discussions about AI*\" was translated to \"*discusiones sobre inteligencia artificial*\", to better fit the Spanish context.\n",
|
||||
"* The use of emojis in the original text was preserved, but some might be considered more common in English or other languages.\n",
|
||||
"* In the \"Customers\" section, the sentence \"*fighting information injustice one query at a time!* was translated to \"*luchando contra la injusticia informativa uno pregunta a la vez!*\", and the phrase \"*kidding!* was left as is.\n",
|
||||
"* The final line, \"¡Así que qué va a ser—¿estás dispuesto a saltar la barrera? 🤔\", maintains its playful tone while translating to \"¡Así que qué va a ser—¿estás dispuesto a cruzar el umbral?\""
|
||||
],
|
||||
"text/plain": [
|
||||
"<IPython.core.display.Markdown object>"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"translated_text = translate(brochure)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "417e75e2",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "llm-engineering",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.21"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
Reference in New Issue
Block a user