add solutions for week1
This commit is contained in:
@@ -0,0 +1,279 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "4e5da3f5-ebd0-4e20-ab89-95847187287b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import requests\n",
|
||||
"import json\n",
|
||||
"from typing import List\n",
|
||||
"from bs4 import BeautifulSoup\n",
|
||||
"from IPython.display import Markdown, display, update_display, clear_output\n",
|
||||
"from openai import OpenAI\n",
|
||||
"from dotenv import load_dotenv\n",
|
||||
"import os\n",
|
||||
"from scraper import fetch_website_links, fetch_website_contents"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "86adec56-3b27-46da-9b1a-1e5946a76a09",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"load_dotenv(override=True)\n",
|
||||
"api_key = os.getenv('OPENROUTER_API_KEY')\n",
|
||||
"\n",
|
||||
"# Check the key\n",
|
||||
"\n",
|
||||
"if not api_key:\n",
|
||||
" print(\"No API key was found - please head over to the troubleshooting notebook in this folder to identify & fix!\")\n",
|
||||
"elif api_key.strip() != api_key:\n",
|
||||
" print(\"An API key was found, but it looks like it might have space or tab characters at the start or end - please remove them - see troubleshooting notebook\")\n",
|
||||
"else:\n",
|
||||
" print(\"API key found and looks good so far!\")\n",
|
||||
"headers = {\n",
|
||||
" \"User-Agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36\"\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"openrouter_url = \"https://openrouter.ai/api/v1\"\n",
|
||||
"openai = OpenAI(api_key=api_key, base_url=openrouter_url)\n",
|
||||
"MODEL = \"gpt-5-nano\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "abf2f706-2709-404a-9fb7-774a9f57dd11",
|
||||
"metadata": {
|
||||
"editable": true,
|
||||
"slideshow": {
|
||||
"slide_type": ""
|
||||
},
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"company_name = input(\"Enter the company name: \")\n",
|
||||
"url = input(\"Enter the company url: \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "153fa3d1-3ce5-46d0-838d-3e95a4b8628b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"link_system_prompt = \"You are provided with a list of links found on a webpage. You are able to decide which of the links would be most relevant to include in a brochure about the company, such as links to an About page, or a Company page, or Careers/Jobs pages.\\n\"\n",
|
||||
"link_system_prompt += \"You should respond in JSON as in this example:\"\n",
|
||||
"link_system_prompt += \"\"\"\n",
|
||||
" EXAMPLE 1:\n",
|
||||
" {\n",
|
||||
" \"links\": [\n",
|
||||
" {\"type\": \"about page\", \"url\": \"https://full.url/goes/here/about\"},\n",
|
||||
" {\"type\": \"careers page\", \"url\": \"https://another.full.url/careers\"}\n",
|
||||
" ]\n",
|
||||
" }\n",
|
||||
" EXAMPLE 2:\n",
|
||||
" {\n",
|
||||
" \"links\": [\n",
|
||||
" {\"type\": \"company blog\", \"url\": \"https://blog.example.com\"},\n",
|
||||
" {\"type\": \"our story\", \"url\": \"https://example.com/our-story\"}\n",
|
||||
" ]\n",
|
||||
" }\n",
|
||||
" \"\"\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "6fcacc2e-7445-4d8a-aa80-489d3a2247ec",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def get_links_user_prompt(url):\n",
|
||||
" user_prompt = f\"Here is the list of links on the website of {url} - \"\n",
|
||||
" user_prompt += \"please decide which of these are relevant web links for a brochure about the company, respond with the full https URL in JSON format. Do not include Terms of Service, Privacy, email links.\\n\"\n",
|
||||
" user_prompt += \"Links (some might be relative links):\\n\"\n",
|
||||
" links = fetch_website_links(url)\n",
|
||||
" user_prompt += \"\\n\".join(links[:20])\n",
|
||||
" return user_prompt"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "dfe222c5-0d3e-4be2-85e1-596ab9d407dc",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def get_links(url):\n",
|
||||
" response = openai.chat.completions.create(\n",
|
||||
" model = MODEL,\n",
|
||||
" messages = [\n",
|
||||
" {\"role\": \"system\", \"content\": link_system_prompt},\n",
|
||||
" {\"role\": \"user\", \"content\": get_links_user_prompt(url)}\n",
|
||||
" ],\n",
|
||||
" response_format = {\"type\": \"json_object\"}\n",
|
||||
" )\n",
|
||||
" result = response.choices[0].message.content\n",
|
||||
" return json.loads(result)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "c964bdce-be5d-41c7-a8d7-8e25e58463c5",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def get_all_details(url):\n",
|
||||
" result = \"Landing page:\\n\"\n",
|
||||
" result += fetch_website_contents(url)\n",
|
||||
" links = get_links(url)\n",
|
||||
"\n",
|
||||
" for link in links[\"links\"]:\n",
|
||||
" result += f\"{link['type']}\\n\"\n",
|
||||
" try:\n",
|
||||
" result += f\"\\n\\n### Link: Link: {link['type']}\\n\"\n",
|
||||
" result += fetch_website_contents(link[\"url\"])\n",
|
||||
" except Exception as e:\n",
|
||||
" print(f\"Omitted link: {link['url']}: {e}\")\n",
|
||||
" continue\n",
|
||||
" return result"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "5337019a-b789-49d7-bf10-0f15148c0276",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"system_prompt = (\n",
|
||||
" \"You are an assistant that analyzes the contents of several relevant pages from a company website \"\n",
|
||||
" \"and creates a great type of brochure about the company for prospective customers, investors, and recruits. \"\n",
|
||||
" \"Respond in markdown. Include details of company culture, customers, and careers/jobs if you have the information. Add emoticons where ever possible.\\n\\n\"\n",
|
||||
"\n",
|
||||
" \"Please structure the brochure using the following sections:\\n\"\n",
|
||||
" \"1. **Introduction**: A brief overview of the company.\\n\"\n",
|
||||
" \"2. **Company Culture**: Emphasize fun, atmosphere, and any unique cultural elements.\\n\"\n",
|
||||
" \"3. **Customers**: Mention notable customers or industries.\\n\"\n",
|
||||
" \"4. **Careers/Jobs**: Highlight career opportunities.\\n\"\n",
|
||||
" \"5. **Conclusion**: Wrap up with a final lighthearted message.\\n\"\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "1dd4f2d4-8189-452a-b15a-c09ae5894ac8",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def get_brochure_user_prompt(company_name, url):\n",
|
||||
" user_prompt = f\"You are looking at a company called: {company_name}\\n\"\n",
|
||||
" user_prompt += f\"Here are the contents of its landing page and other relevant pages; use this information to build a short brochure of the company in markdown.\\n\"\n",
|
||||
" user_prompt += get_all_details(url)\n",
|
||||
" user_prompt = user_prompt[:20000]\n",
|
||||
" return user_prompt"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "8ab4bfef-eb22-43fb-8a46-f1f6a225793b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def stream_brochure():\n",
|
||||
" global brochure_text\n",
|
||||
" brochure_text = \"\"\n",
|
||||
"\n",
|
||||
" stream = openai.chat.completions.create(\n",
|
||||
" model = MODEL,\n",
|
||||
" messages = [\n",
|
||||
" {\"role\": \"system\", \"content\": system_prompt},\n",
|
||||
" {\"role\": \"user\", \"content\": get_brochure_user_prompt(company_name, url)}\n",
|
||||
" ],\n",
|
||||
" stream = True\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" response = \"\"\n",
|
||||
" display_handle = display(Markdown(\"\"), display_id = True)\n",
|
||||
" for chunk in stream:\n",
|
||||
" content = chunk.choices[0].delta.content or ''\n",
|
||||
" response += content\n",
|
||||
" brochure_text += content\n",
|
||||
" response = response.replace(\"```\", \"\"). replace(\"markdown\", \"\")\n",
|
||||
" update_display(Markdown(response), display_id = display_handle.display_id)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "7828c747-7872-48e2-b3e6-faab95ba76cb",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def user_translate_brochure(language):\n",
|
||||
" clear_output(wait = True)\n",
|
||||
"\n",
|
||||
" translation_stream = openai.chat.completions.create(\n",
|
||||
" model = MODEL,\n",
|
||||
" messages = [\n",
|
||||
" {\"role\": \"user\", \"content\": f\"Translate the following to {language}:\\n {brochure_text}\"}\n",
|
||||
" ],\n",
|
||||
" stream = True\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" display_handle = display(Markdown(\"\"), display_id = True)\n",
|
||||
" translated_text = \"\"\n",
|
||||
"\n",
|
||||
" for chunk in translation_stream:\n",
|
||||
" content = chunk.choices[0].delta.content or \"\"\n",
|
||||
" if content:\n",
|
||||
" translated_text += content\n",
|
||||
" update_display(Markdown(translated_text), display_id = display_handle.display_id)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "e6cfa92a-8a86-485d-a7e1-1651705ee6dc",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"stream_brochure()\n",
|
||||
"language_choice = input(\"Enter the language to translate the brochure into (e.g., 'French'): \")\n",
|
||||
"user_translate_brochure(language_choice)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.13.7"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
Reference in New Issue
Block a user