enhanced structure and comments for week 1 and added a Spanish version

This commit is contained in:
Simon Dufty
2024-09-30 19:03:32 +10:00
parent bdd3ef77e0
commit 08f8d6bdc6
4 changed files with 656 additions and 260 deletions

View File

@@ -1,163 +0,0 @@
# imports
import os
import requests
import json
from typing import List
from dotenv import load_dotenv
from bs4 import BeautifulSoup
from IPython.display import Markdown, display, update_display
from openai import OpenAI
# Initialize and constants
load_dotenv()
os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY', 'your-key-if-not-using-env')
MODEL = 'gpt-4o-mini'
openai = OpenAI()
# A class to represent a Webpage
class Website:
url: str
title: str
body: str
links: List[str]
def __init__(self, url):
self.url = url
response = requests.get(url)
self.body = response.content
soup = BeautifulSoup(self.body, 'html.parser')
self.title = soup.title.string if soup.title else "No title found"
if soup.body:
for irrelevant in soup.body(["script", "style", "img", "input"]):
irrelevant.decompose()
self.text = soup.body.get_text(separator="\n", strip=True)
else:
self.text = ""
links = [link.get('href') for link in soup.find_all('a')]
self.links = [link for link in links if link]
def get_contents(self):
return f"Webpage Title:\n{self.title}\nWebpage Contents:\n{self.text}\n\n"
link_system_prompt = """
You are provided with a list of links found on a webpage. Your task is to first categorize each link into one of the following categories:
- about page
- careers page
- terms of service
- privacy policy
- contact page
- other (please specify).
Once the links are categorized, please choose which links are most relevant to include in a brochure about the company.
The brochure should only include links such as About pages, Careers pages, or Company Overview pages. Exclude any links related to Terms of Service, Privacy Policy, or email addresses.
Respond in the following JSON format:
{
"categorized_links": [
{"category": "about page", "url": "https://full.url/about"},
{"category": "careers page", "url": "https://full.url/careers"},
{"category": "terms of service", "url": "https://full.url/terms"},
{"category": "privacy policy", "url": "https://full.url/privacy"},
{"category": "other", "specify": "contact page", "url": "https://full.url/contact"}
],
"brochure_links": [
{"type": "about page", "url": "https://full.url/about"},
{"type": "careers page", "url": "https://full.url/careers"}
]
}
Please find the links below and proceed with the task:
Links (some may be relative links):
[INSERT LINK LIST HERE]
"""
def get_links_user_prompt(website):
user_prompt = f"Here is the list of links on the website of {website.url} - "
user_prompt += "please decide which of these are relevant web links for a brochure about the company, respond with the full https URL in JSON format. \
Do not include Terms of Service, Privacy, email links.\n"
user_prompt += "Links (some might be relative links):\n"
user_prompt += "\n".join(website.links)
return user_prompt
def get_links(url):
website = Website(url)
completion = openai.chat.completions.create(
model=MODEL,
messages=[
{"role": "system", "content": link_system_prompt},
{"role": "user", "content": get_links_user_prompt(website)}
],
response_format={"type": "json_object"}
)
result = completion.choices[0].message.content
return json.loads(result)
from urllib.parse import urljoin
def get_all_details(url):
result = "Landing page:\n"
result += Website(url).get_contents() # Get the landing page content
links = get_links(url) # Retrieve the links JSON
brochure_links = links.get('brochure_links', []) # Get the brochure links list (which is already a list)
print("Found Brochure links:", brochure_links) # Debug output to show the brochure links
# Iterate over each brochure link
for link in brochure_links:
result += f"\n\n{link['type']}:\n" # Add the type of link (about page, careers page, etc.)
# Handle relative URLs by converting them to absolute URLs
full_url = urljoin(url, link["url"])
# Fetch and append the content of the brochure link URL
result += Website(full_url).get_contents()
return result
system_prompt = "You are an assistant that analyzes the contents of several relevant pages from a company website \
and creates a brochure about the company for prospective customers, investors and recruits. Respond in markdown.\
Include details of company culture, customers and careers/jobs if you have the information.\
Structure the brochure to include specific sections as follows:\
About Us\
What we do\
How We Do It\
Where We Do It\
Our People\
Our Culture\
Connect with Us.\
Please provide two versions of the brochure, the first in English, the second in Spanish. The contents of the brochure are to be the same for both languages."
def get_brochure_user_prompt(company_name, url):
user_prompt = f"You are looking at a company called: {company_name}\n"
user_prompt += f"Here are the contents of its landing page and other relevant pages; use this information to build a short brochure of the company in markdown.\n"
user_prompt += get_all_details(url)
user_prompt = user_prompt[:20_000] # Truncate if more than 20,000 characters
return user_prompt
def stream_brochure(company_name, url):
stream = openai.chat.completions.create(
model=MODEL,
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": get_brochure_user_prompt(company_name, url)}
],
stream=True
)
response = ""
display_handle = display(Markdown(""), display_id=True)
for chunk in stream:
response += chunk.choices[0].delta.content or ''
response = response.replace("```","").replace("markdown", "")
update_display(Markdown(response), display_id=display_handle.display_id)
stream_brochure("Anthropic", "https://anthropic.com")

View File

@@ -14,7 +14,7 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 2,
"id": "0a572211-5fe3-4dd5-9870-849cfb75901f",
"metadata": {},
"outputs": [],
@@ -238,7 +238,7 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 4,
"id": "cc4965cf-f704-4d40-8b7d-f8e50913f87c",
"metadata": {},
"outputs": [
@@ -246,66 +246,76 @@
"name": "stdout",
"output_type": "stream",
"text": [
"Found Brochure links: [{'type': 'about page', 'url': 'https://edwarddonner.com/about-me-and-about-nebula/'}, {'type': 'other', 'specify': 'outsourcing', 'url': 'https://edwarddonner.com/outsmart/'}]\n"
"Found Brochure links: [{'type': 'about page', 'url': 'https://edwarddonner.com/about-me-and-about-nebula/'}, {'type': 'other', 'specify': 'Outsmart page', 'url': 'https://edwarddonner.com/outsmart/'}]\n"
]
},
{
"data": {
"text/markdown": [
"\n",
"# Edward Donner Company Brochure\n",
"# Edward Donner Brochure\n",
"\n",
"## About Us\n",
"Edward Donner is the creative brain behind Nebula.io, where we leverage Generative AI and advanced machine learning technologies to help recruiters effectively source, understand, engage, and manage talent. Born from a rich history in the AI landscape, our goal is simple yet profound: to aid individuals in discovering their true potential and pursuing their ikigai — their reason for being.\n",
"At Edward Donner, we are committed to revolutionizing the way people connect with career opportunities. Founded by Ed, the co-founder and CTO of Nebula.io, we leverage cutting-edge Generative AI and machine learning to assist recruiters in sourcing, understanding, engaging, and managing talent more effectively than ever before.\n",
"\n",
"## What We Do\n",
"At Edward Donner, we specialize in an array of tools and services, primarily focusing on a patented matching model that connects people with roles they are optimally suited for — all without the need for keyword searches. Our platform is designed to ensure you find your dream job while having a fulfilling and engaging work experience.\n",
"Weve developed a patented matching model that connects candidates with their ideal roles—no keywords necessary. With our innovative approach, we aim to help individuals discover their potential and pursue their passions, leading to higher levels of human prosperity.\n",
"\n",
"## How We Do It\n",
"We employ groundbreaking, proprietary Large Language Models (LLMs) that are finely tuned to the recruitment industry. Our innovative approach is geared towards real-world application, minimizing the gap between candidates and their ideal roles. By focusing on individual strengths and needs, we drive efficiency and happiness in job placements.\n",
"Our award-winning platform uses advanced AI technology, honing in on the unique skills and potentials of jobseekers. We are inspired by the concept of “Ikigai,” which drives our mission to match people with roles that fulfill their career aspirations.\n",
"\n",
"## Where We Do It\n",
"Our operations orbit around the vibrant backdrop of New York City, an epicenter for talent and innovation. We create an inclusive remote work environment that thrives on collaboration, creativity, and technology, ensuring that our team and our customers can engage seamlessly, wherever they are.\n",
"Our operations are primarily based in New York City, where we embrace an environment that fosters creativity, innovation, and collaborative spirit. While we are grounded in NYC, our reach extends globally as we work with clients and users from around the world.\n",
"\n",
"## Our People\n",
"Our diverse team consists of experts in software engineering, data science, and technology leadership. Our founder, Ed, brings extensive experience and a love for programming, music, and enthusiastic problem-solving. Each individual contributes unique skills while sharing a passion for harnessing AI to tackle meaningful challenges.\n",
"At Edward Donner, we believe our greatest asset is our talented team. We are composed of dedicated professionals who are experts in software engineering, data science, and technology leadership, all with a shared passion for harnessing AI to solve real-world problems. Our diverse backgrounds contribute to a culture of inclusion and excellence.\n",
"\n",
"## Our Culture\n",
"At Edward Donner, we pride ourselves on fostering a culture of innovation and collaboration. We aim to create a workspace that inspires creativity, encourages continuous learning, and celebrates the successes of our employees. Our mission to elevate human potential extends to our work culture, where every voice and idea is valued.\n",
"We pride ourselves on cultivating a workplace that thrives on collaboration, openness, and continuous learning. Our work culture emphasizes innovation while also recognizing the importance of personal connections and networking. We encourage our team and connect with others not just virtually, but over coffee when possible!\n",
"\n",
"## Connect with Us\n",
"We would love to hear from you! To stay connected and explore opportunities, reach out via:\n",
"- Email: ed [at] edwarddonner [dot] com\n",
"- [Our Website](http://www.edwarddonner.com)\n",
"- Follow us on social media: [LinkedIn](#), [Twitter](#), [Facebook](#)\n",
"Interested in learning more about what we do? Wed love to hear from you! Whether youre a potential customer, investor, or recruit, lets connect!\n",
"\n",
"- **Email:** ed@edwarddonner.com\n",
"- **Website:** [www.edwarddonner.com](http://www.edwarddonner.com)\n",
"- **Follow Us:**\n",
" - [LinkedIn](https://www.linkedin.com)\n",
" - [Twitter](https://www.twitter.com)\n",
" - [Facebook](https://www.facebook.com)\n",
"- **Subscribe to Our Newsletter!**\n",
" \n",
"---\n",
"\n",
"# Folleto de la Empresa Edward Donner\n",
"# Folleto de Edward Donner\n",
"\n",
"## Sobre Nosotros\n",
"Edward Donner es la mente creativa detrás de Nebula.io, donde aprovechamos la IA generativa y tecnologías avanzadas de aprendizaje automático para ayudar a los reclutadores a identificar, comprender, comprometer y gestionar talentos. Nacido de una rica historia en el ámbito de IA, nuestro objetivo es simple pero profundo: ayudar a las personas a descubrir su verdadero potencial y perseguir su ikigai, su razón de ser.\n",
"## Acerca de Nosotros\n",
"En Edward Donner, estamos comprometidos a revolucionar la forma en que las personas se conectan con oportunidades de carrera. Fundado por Ed, el cofundador y CTO de Nebula.io, aprovechamos la inteligencia artificial generativa y el aprendizaje automático de vanguardia para ayudar a los reclutadores a buscar, comprender, involucrar y gestionar talento de manera más eficaz que nunca.\n",
"\n",
"## Lo Que Hacemos\n",
"En Edward Donner, nos especializamos en una variedad de herramientas y servicios, centrados principalmente en un modelo de coincidencia patentado que conecta a las personas con los roles para los que están óptimamente calificadas, todo esto sin necesidad de búsquedas por palabras clave. Nuestra plataforma está diseñada para garantizar que encuentres tu trabajo soñado mientras vives una experiencia laboral satisfactoria y atractiva.\n",
"## Qué Hacemos\n",
"Hemos desarrollado un modelo de emparejamiento patentado que conecta a los candidatos con sus roles ideales, sin necesidad de palabras clave. Con nuestro enfoque innovador, buscamos ayudar a las personas a descubrir su potencial y seguir sus pasiones, llevando a niveles más altos de prosperidad humana.\n",
"\n",
"## Cómo Lo Hacemos\n",
"Empleamos modelos de lenguaje de gran tamaño (LLMs) patentados y orientados específicamente a la industria del reclutamiento. Nuestro enfoque innovador está dirigido a la aplicación del mundo real, minimizando la brecha entre los candidatos y sus roles ideales. Al centrarnos en las fortalezas y necesidades individuales, impulsamos la eficiencia y la felicidad en las colocaciones laborales.\n",
"Nuestra plataforma galardonada utiliza tecnología avanzada de inteligencia artificial, centrándose en las habilidades y los potenciales únicos de los buscadores de empleo. Nos inspira el concepto de \"Ikigai\", que impulsa nuestra misión de emparejar a las personas con roles que cumplen sus aspiraciones profesionales.\n",
"\n",
"## Dónde Lo Hacemos\n",
"Nuestras operaciones giran en torno al vibrante telón de fondo de la ciudad de Nueva York, un epicentro de talento e innovación. Creamos un entorno de trabajo remoto inclusivo que prospera en la colaboración, la creatividad y la tecnología, asegurando que nuestro equipo y nuestros clientes puedan interactuar de manera fluida, donde sea que se encuentren.\n",
"Nuestras operaciones se basan principalmente en la ciudad de Nueva York, donde abrazamos un entorno que fomenta la creatividad, la innovación y el espíritu colaborativo. Si bien estamos enraizados en Nueva York, nuestro alcance se extiende globalmente mientras trabajamos con clientes y usuarios de todo el mundo.\n",
"\n",
"## Nuestra Gente\n",
"Nuestro diverso equipo está compuesto por expertos en ingeniería de software, ciencia de datos y liderazgo tecnológico. Nuestro fundador, Ed, aporta una amplia experiencia y un amor por la programación, la música y la resolución entusiasta de problemas. Cada individuo contribuye con habilidades únicas mientras comparte la pasión por aprovechar la IA para abordar desafíos significativos.\n",
"## Nuestro Personal\n",
"En Edward Donner, creemos que nuestro mayor activo es nuestro talentoso equipo. Estamos compuestos por profesionales dedicados que son expertos en ingeniería de software, ciencia de datos y liderazgo tecnológico, todos con una pasión compartida por aprovechar la inteligencia artificial para resolver problemas del mundo real. Nuestros diversos antecedentes contribuyen a una cultura de inclusión y excelencia.\n",
"\n",
"## Nuestra Cultura\n",
"En Edward Donner, nos enorgullece fomentar una cultura de innovación y colaboración. Nuestro objetivo es crear un espacio de trabajo que inspire la creatividad, fomente el aprendizaje continuo y celebre los éxitos de nuestros empleados. Nuestra misión de elevar el potencial humano se extiende a nuestra cultura laboral, donde cada voz e idea es valorada.\n",
"Nos enorgullecemos de cultivar un lugar de trabajo que prospera en colaboración, apertura y aprendizaje continuo. Nuestra cultura laboral enfatiza la innovación, mientras que también reconoce la importancia de las conexiones personales y el networking. Fomentamos a nuestro equipo y conectamos con otros no solo de forma virtual, ¡sino también tomando un café cuando sea posible!\n",
"\n",
"## Conéctate Con Nosotros\n",
"¡Nos encantaría saber de ti! Para mantener la conexión y explorar oportunidades, contáctanos a través de:\n",
"- Email: ed [at] edwarddonner [dot] com\n",
"- [Nuestro Sitio Web](http://www.edwarddonner.com)\n",
"- Síguenos en redes sociales: [LinkedIn](#), [Twitter](#), [Facebook](#)\n",
"## Conéctate con Nosotros\n",
"¿Interesado en aprender más sobre lo que hacemos? ¡Nos encantaría saber de ti! Ya seas un cliente potencial, inversionista o recluta, ¡conectémonos!\n",
"\n",
"- **Correo Electrónico:** ed@edwarddonner.com\n",
"- **Sitio Web:** [www.edwarddonner.com](http://www.edwarddonner.com)\n",
"- **Síguenos:**\n",
" - [LinkedIn](https://www.linkedin.com)\n",
" - [Twitter](https://www.twitter.com)\n",
" - [Facebook](https://www.facebook.com)\n",
"- **¡Suscríbete a Nuestro Boletín!**\n",
"\n"
],
"text/plain": [