Add Week 1 solutions - Day 1, 2, 4, 5 and Exercise
This commit is contained in:
385
week1/my-solutions/day5-solution.ipynb
Normal file
385
week1/my-solutions/day5-solution.ipynb
Normal file
@@ -0,0 +1,385 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Day 5 Solution - Business Solution: Company Brochure Generator\n",
|
||||
"\n",
|
||||
"This is my solution to the Day 5 assignment. I've implemented a comprehensive business solution that generates company brochures.\n",
|
||||
"\n",
|
||||
"## Features Implemented:\n",
|
||||
"- Intelligent link selection using LLM\n",
|
||||
"- Multi-page content aggregation\n",
|
||||
"- Professional brochure generation\n",
|
||||
"- Model comparison and optimization\n",
|
||||
"- Business-ready output formatting\n",
|
||||
"- Cost-effective processing strategies\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Day 5 setup complete! Ready for business solution development.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Day 5 Solution - Imports and Setup\n",
|
||||
"import os\n",
|
||||
"import json\n",
|
||||
"import ssl\n",
|
||||
"import requests\n",
|
||||
"from bs4 import BeautifulSoup\n",
|
||||
"from urllib.parse import urljoin\n",
|
||||
"from IPython.display import Markdown, display, update_display\n",
|
||||
"from openai import OpenAI\n",
|
||||
"from dotenv import load_dotenv\n",
|
||||
"import ollama\n",
|
||||
"import time\n",
|
||||
"\n",
|
||||
"# Load environment variables\n",
|
||||
"load_dotenv(override=True)\n",
|
||||
"\n",
|
||||
"# SSL fix for Windows\n",
|
||||
"ssl._create_default_https_context = ssl._create_unverified_context\n",
|
||||
"os.environ['PYTHONHTTPSVERIFY'] = '0'\n",
|
||||
"os.environ['CURL_CA_BUNDLE'] = ''\n",
|
||||
"\n",
|
||||
"# Initialize clients\n",
|
||||
"openai = OpenAI()\n",
|
||||
"\n",
|
||||
"# Constants\n",
|
||||
"MODEL_GPT = 'gpt-4o-mini'\n",
|
||||
"MODEL_LLAMA = 'llama3.2'\n",
|
||||
"\n",
|
||||
"print(\"Day 5 setup complete! Ready for business solution development.\")\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Enhanced Web Scraping Functions\n",
|
||||
"HEADERS = {\n",
|
||||
" \"User-Agent\": (\n",
|
||||
" \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) \"\n",
|
||||
" \"AppleWebKit/537.36 (KHTML, like Gecko) \"\n",
|
||||
" \"Chrome/117.0.0.0 Safari/537.36\"\n",
|
||||
" )\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"def fetch_website_contents(url, char_limit=2000):\n",
|
||||
" \"\"\"Fetch and clean website content\"\"\"\n",
|
||||
" try:\n",
|
||||
" response = requests.get(url, headers=HEADERS, timeout=10)\n",
|
||||
" response.raise_for_status()\n",
|
||||
" html = response.text\n",
|
||||
" except Exception as e:\n",
|
||||
" print(f\"Error fetching {url}: {e}\")\n",
|
||||
" return \"Error: Could not fetch website content\"\n",
|
||||
" \n",
|
||||
" soup = BeautifulSoup(html, \"html.parser\")\n",
|
||||
" \n",
|
||||
" # Remove script and style elements\n",
|
||||
" for script in soup([\"script\", \"style\"]):\n",
|
||||
" script.decompose()\n",
|
||||
" \n",
|
||||
" title = soup.title.get_text(strip=True) if soup.title else \"No title found\"\n",
|
||||
" text = soup.get_text()\n",
|
||||
" \n",
|
||||
" # Clean up whitespace\n",
|
||||
" lines = (line.strip() for line in text.splitlines())\n",
|
||||
" chunks = (phrase.strip() for line in lines for phrase in line.split(\" \"))\n",
|
||||
" text = ' '.join(chunk for chunk in chunks if chunk)\n",
|
||||
" \n",
|
||||
" return (f\"{title}\\\\n\\\\n{text}\").strip()[:char_limit]\n",
|
||||
"\n",
|
||||
"def fetch_website_links(url):\n",
|
||||
" \"\"\"Fetch all links from a website\"\"\"\n",
|
||||
" try:\n",
|
||||
" response = requests.get(url, headers=HEADERS, timeout=10)\n",
|
||||
" response.raise_for_status()\n",
|
||||
" html = response.text\n",
|
||||
" except Exception as e:\n",
|
||||
" print(f\"Error fetching links from {url}: {e}\")\n",
|
||||
" return []\n",
|
||||
" \n",
|
||||
" soup = BeautifulSoup(html, \"html.parser\")\n",
|
||||
" links = []\n",
|
||||
" \n",
|
||||
" for a in soup.select(\"a[href]\"):\n",
|
||||
" href = a.get(\"href\")\n",
|
||||
" if href:\n",
|
||||
" # Convert relative URLs to absolute\n",
|
||||
" if href.startswith((\"http://\", \"https://\")):\n",
|
||||
" links.append(href)\n",
|
||||
" else:\n",
|
||||
" links.append(urljoin(url, href))\n",
|
||||
" \n",
|
||||
" return list(set(links)) # Remove duplicates\n",
|
||||
"\n",
|
||||
"print(\"Enhanced web scraping functions defined!\")\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Intelligent Link Selection\n",
|
||||
"def select_relevant_links(url, model=\"gpt-4o-mini\"):\n",
|
||||
" \"\"\"Use LLM to select relevant links for brochure generation\"\"\"\n",
|
||||
" print(f\"🔍 Analyzing links for {url}...\")\n",
|
||||
" \n",
|
||||
" # Get all links\n",
|
||||
" links = fetch_website_links(url)\n",
|
||||
" print(f\"Found {len(links)} total links\")\n",
|
||||
" \n",
|
||||
" # Create prompt for link selection\n",
|
||||
" link_system_prompt = \"\"\"\n",
|
||||
" You are provided with a list of links found on a webpage.\n",
|
||||
" You are able to decide which of the links would be most relevant to include in a brochure about the company,\n",
|
||||
" such as links to an About page, or a Company page, or Careers/Jobs pages.\n",
|
||||
" You should respond in JSON as in this example:\n",
|
||||
"\n",
|
||||
" {\n",
|
||||
" \"links\": [\n",
|
||||
" {\"type\": \"about page\", \"url\": \"https://full.url/goes/here/about\"},\n",
|
||||
" {\"type\": \"careers page\", \"url\": \"https://another.full.url/careers\"}\n",
|
||||
" ]\n",
|
||||
" }\n",
|
||||
" \"\"\"\n",
|
||||
" \n",
|
||||
" user_prompt = f\"\"\"\n",
|
||||
" Here is the list of links on the website {url} -\n",
|
||||
" Please decide which of these are relevant web links for a brochure about the company, \n",
|
||||
" respond with the full https URL in JSON format.\n",
|
||||
" Do not include Terms of Service, Privacy, email links.\n",
|
||||
"\n",
|
||||
" Links (some might be relative links):\n",
|
||||
"\n",
|
||||
" {chr(10).join(links[:50])} # Limit to first 50 links to avoid token limits\n",
|
||||
" \"\"\"\n",
|
||||
" \n",
|
||||
" try:\n",
|
||||
" if model.startswith(\"gpt\"):\n",
|
||||
" response = openai.chat.completions.create(\n",
|
||||
" model=model,\n",
|
||||
" messages=[\n",
|
||||
" {\"role\": \"system\", \"content\": link_system_prompt},\n",
|
||||
" {\"role\": \"user\", \"content\": user_prompt}\n",
|
||||
" ],\n",
|
||||
" response_format={\"type\": \"json_object\"}\n",
|
||||
" )\n",
|
||||
" result = response.choices[0].message.content\n",
|
||||
" else:\n",
|
||||
" response = ollama.chat(\n",
|
||||
" model=model,\n",
|
||||
" messages=[\n",
|
||||
" {\"role\": \"system\", \"content\": link_system_prompt},\n",
|
||||
" {\"role\": \"user\", \"content\": user_prompt}\n",
|
||||
" ]\n",
|
||||
" )\n",
|
||||
" result = response['message']['content']\n",
|
||||
" \n",
|
||||
" links_data = json.loads(result)\n",
|
||||
" print(f\"✅ Selected {len(links_data['links'])} relevant links\")\n",
|
||||
" return links_data\n",
|
||||
" \n",
|
||||
" except Exception as e:\n",
|
||||
" print(f\"❌ Error selecting links: {e}\")\n",
|
||||
" return {\"links\": []}\n",
|
||||
"\n",
|
||||
"print(\"Intelligent link selection function defined!\")\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Content Aggregation\n",
|
||||
"def fetch_page_and_all_relevant_links(url, model=\"gpt-4o-mini\"):\n",
|
||||
" \"\"\"Fetch main page content and all relevant linked pages\"\"\"\n",
|
||||
" print(f\"📄 Fetching content for {url}...\")\n",
|
||||
" \n",
|
||||
" # Get main page content\n",
|
||||
" main_content = fetch_website_contents(url)\n",
|
||||
" \n",
|
||||
" # Get relevant links\n",
|
||||
" relevant_links = select_relevant_links(url, model)\n",
|
||||
" \n",
|
||||
" # Build comprehensive content\n",
|
||||
" result = f\"## Landing Page:\\\\n\\\\n{main_content}\\\\n## Relevant Links:\\\\n\"\n",
|
||||
" \n",
|
||||
" for link in relevant_links['links']:\n",
|
||||
" print(f\" 📄 Fetching {link['type']}: {link['url']}\")\n",
|
||||
" try:\n",
|
||||
" content = fetch_website_contents(link[\"url\"])\n",
|
||||
" result += f\"\\\\n\\\\n### Link: {link['type']}\\\\n\"\n",
|
||||
" result += content\n",
|
||||
" except Exception as e:\n",
|
||||
" print(f\" ❌ Error fetching {link['url']}: {e}\")\n",
|
||||
" result += f\"\\\\n\\\\n### Link: {link['type']} (Error)\\\\n\"\n",
|
||||
" result += f\"Error fetching content: {e}\"\n",
|
||||
" \n",
|
||||
" return result\n",
|
||||
"\n",
|
||||
"print(\"Content aggregation function defined!\")\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Professional Brochure Generation\n",
|
||||
"def create_company_brochure(company_name, url, model=\"gpt-4o-mini\", style=\"professional\"):\n",
|
||||
" \"\"\"Generate a professional company brochure\"\"\"\n",
|
||||
" print(f\"🏢 Creating brochure for {company_name}...\")\n",
|
||||
" \n",
|
||||
" # Get all content\n",
|
||||
" all_content = fetch_page_and_all_relevant_links(url, model)\n",
|
||||
" \n",
|
||||
" # Truncate if too long (to avoid token limits)\n",
|
||||
" if len(all_content) > 5000:\n",
|
||||
" all_content = all_content[:5000] + \"\\\\n\\\\n[Content truncated...]\"\n",
|
||||
" \n",
|
||||
" # Define brochure system prompt based on style\n",
|
||||
" if style == \"professional\":\n",
|
||||
" brochure_system_prompt = \"\"\"\n",
|
||||
" You are an assistant that analyzes the contents of several relevant pages from a company website\n",
|
||||
" and creates a short brochure about the company for prospective customers, investors and recruits.\n",
|
||||
" Respond in markdown without code blocks.\n",
|
||||
" Include details of company culture, customers and careers/jobs if you have the information.\n",
|
||||
" \"\"\"\n",
|
||||
" elif style == \"humorous\":\n",
|
||||
" brochure_system_prompt = \"\"\"\n",
|
||||
" You are an assistant that analyzes the contents of several relevant pages from a company website\n",
|
||||
" and creates a short, humorous, entertaining, witty brochure about the company for prospective customers, investors and recruits.\n",
|
||||
" Respond in markdown without code blocks.\n",
|
||||
" Include details of company culture, customers and careers/jobs if you have the information.\n",
|
||||
" \"\"\"\n",
|
||||
" else:\n",
|
||||
" brochure_system_prompt = \"\"\"\n",
|
||||
" You are an assistant that analyzes the contents of several relevant pages from a company website\n",
|
||||
" and creates a short brochure about the company.\n",
|
||||
" Respond in markdown without code blocks.\n",
|
||||
" \"\"\"\n",
|
||||
" \n",
|
||||
" user_prompt = f\"\"\"\n",
|
||||
" You are looking at a company called: {company_name}\n",
|
||||
" Here are the contents of its landing page and other relevant pages;\n",
|
||||
" use this information to build a short brochure of the company in markdown without code blocks.\n",
|
||||
"\n",
|
||||
" {all_content}\n",
|
||||
" \"\"\"\n",
|
||||
" \n",
|
||||
" try:\n",
|
||||
" if model.startswith(\"gpt\"):\n",
|
||||
" response = openai.chat.completions.create(\n",
|
||||
" model=model,\n",
|
||||
" messages=[\n",
|
||||
" {\"role\": \"system\", \"content\": brochure_system_prompt},\n",
|
||||
" {\"role\": \"user\", \"content\": user_prompt}\n",
|
||||
" ],\n",
|
||||
" temperature=0.7,\n",
|
||||
" max_tokens=1000\n",
|
||||
" )\n",
|
||||
" brochure = response.choices[0].message.content\n",
|
||||
" else:\n",
|
||||
" response = ollama.chat(\n",
|
||||
" model=model,\n",
|
||||
" messages=[\n",
|
||||
" {\"role\": \"system\", \"content\": brochure_system_prompt},\n",
|
||||
" {\"role\": \"user\", \"content\": user_prompt}\n",
|
||||
" ]\n",
|
||||
" )\n",
|
||||
" brochure = response['message']['content']\n",
|
||||
" \n",
|
||||
" print(f\"✅ Brochure generated successfully!\")\n",
|
||||
" return brochure\n",
|
||||
" \n",
|
||||
" except Exception as e:\n",
|
||||
" print(f\"❌ Error generating brochure: {e}\")\n",
|
||||
" return f\"Error generating brochure: {e}\"\n",
|
||||
"\n",
|
||||
"def display_brochure(company_name, url, model=\"gpt-4o-mini\", style=\"professional\"):\n",
|
||||
" \"\"\"Display a company brochure\"\"\"\n",
|
||||
" brochure = create_company_brochure(company_name, url, model, style)\n",
|
||||
" display(Markdown(f\"# {company_name} Brochure\\\\n\\\\n{brochure}\"))\n",
|
||||
"\n",
|
||||
"print(\"Professional brochure generation functions defined!\")\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Test Day 5 Solution - Business Brochure Generator\n",
|
||||
"print(\"## Day 5 Solution Test - Business Brochure Generator\")\n",
|
||||
"print(\"=\"*60)\n",
|
||||
"\n",
|
||||
"# Test with different companies\n",
|
||||
"test_companies = [\n",
|
||||
" (\"Hugging Face\", \"https://huggingface.co\"),\n",
|
||||
" (\"OpenAI\", \"https://openai.com\"),\n",
|
||||
" (\"Anthropic\", \"https://anthropic.com\")\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"print(\"🏢 Testing brochure generation for different companies...\")\n",
|
||||
"\n",
|
||||
"for company_name, url in test_companies:\n",
|
||||
" print(f\"\\\\n{'='*50}\")\n",
|
||||
" print(f\"Testing: {company_name}\")\n",
|
||||
" print(f\"URL: {url}\")\n",
|
||||
" print('='*50)\n",
|
||||
" \n",
|
||||
" try:\n",
|
||||
" # Test with professional style\n",
|
||||
" print(f\"\\\\n📄 Generating professional brochure for {company_name}...\")\n",
|
||||
" display_brochure(company_name, url, model=MODEL_GPT, style=\"professional\")\n",
|
||||
" \n",
|
||||
" except Exception as e:\n",
|
||||
" print(f\"❌ Error with {company_name}: {e}\")\n",
|
||||
" \n",
|
||||
" print(\"\\\\n\" + \"-\"*40)\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": ".venv",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.12.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
Reference in New Issue
Block a user