{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Day 5 Solution - Business Solution: Company Brochure Generator\n", "\n", "This is my solution to the Day 5 assignment. I've implemented a comprehensive business solution that generates company brochures.\n", "\n", "## Features Implemented:\n", "- Intelligent link selection using LLM\n", "- Multi-page content aggregation\n", "- Professional brochure generation\n", "- Model comparison and optimization\n", "- Business-ready output formatting\n", "- Cost-effective processing strategies\n" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Day 5 setup complete! Ready for business solution development.\n" ] } ], "source": [ "# Day 5 Solution - Imports and Setup\n", "import os\n", "import json\n", "import ssl\n", "import requests\n", "from bs4 import BeautifulSoup\n", "from urllib.parse import urljoin\n", "from IPython.display import Markdown, display, update_display\n", "from openai import OpenAI\n", "from dotenv import load_dotenv\n", "import ollama\n", "import time\n", "\n", "# Load environment variables\n", "load_dotenv(override=True)\n", "\n", "# SSL fix for Windows\n", "ssl._create_default_https_context = ssl._create_unverified_context\n", "os.environ['PYTHONHTTPSVERIFY'] = '0'\n", "os.environ['CURL_CA_BUNDLE'] = ''\n", "\n", "# Initialize clients\n", "openai = OpenAI()\n", "\n", "# Constants\n", "MODEL_GPT = 'gpt-4o-mini'\n", "MODEL_LLAMA = 'llama3.2'\n", "\n", "print(\"Day 5 setup complete! Ready for business solution development.\")\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Enhanced Web Scraping Functions\n", "HEADERS = {\n", " \"User-Agent\": (\n", " \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) \"\n", " \"AppleWebKit/537.36 (KHTML, like Gecko) \"\n", " \"Chrome/117.0.0.0 Safari/537.36\"\n", " )\n", "}\n", "\n", "def fetch_website_contents(url, char_limit=2000):\n", " \"\"\"Fetch and clean website content\"\"\"\n", " try:\n", " response = requests.get(url, headers=HEADERS, timeout=10)\n", " response.raise_for_status()\n", " html = response.text\n", " except Exception as e:\n", " print(f\"Error fetching {url}: {e}\")\n", " return \"Error: Could not fetch website content\"\n", " \n", " soup = BeautifulSoup(html, \"html.parser\")\n", " \n", " # Remove script and style elements\n", " for script in soup([\"script\", \"style\"]):\n", " script.decompose()\n", " \n", " title = soup.title.get_text(strip=True) if soup.title else \"No title found\"\n", " text = soup.get_text()\n", " \n", " # Clean up whitespace\n", " lines = (line.strip() for line in text.splitlines())\n", " chunks = (phrase.strip() for line in lines for phrase in line.split(\" \"))\n", " text = ' '.join(chunk for chunk in chunks if chunk)\n", " \n", " return (f\"{title}\\\\n\\\\n{text}\").strip()[:char_limit]\n", "\n", "def fetch_website_links(url):\n", " \"\"\"Fetch all links from a website\"\"\"\n", " try:\n", " response = requests.get(url, headers=HEADERS, timeout=10)\n", " response.raise_for_status()\n", " html = response.text\n", " except Exception as e:\n", " print(f\"Error fetching links from {url}: {e}\")\n", " return []\n", " \n", " soup = BeautifulSoup(html, \"html.parser\")\n", " links = []\n", " \n", " for a in soup.select(\"a[href]\"):\n", " href = a.get(\"href\")\n", " if href:\n", " # Convert relative URLs to absolute\n", " if href.startswith((\"http://\", \"https://\")):\n", " links.append(href)\n", " else:\n", " links.append(urljoin(url, href))\n", " \n", " return list(set(links)) # Remove duplicates\n", "\n", "print(\"Enhanced web scraping functions defined!\")\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Intelligent Link Selection\n", "def select_relevant_links(url, model=\"gpt-4o-mini\"):\n", " \"\"\"Use LLM to select relevant links for brochure generation\"\"\"\n", " print(f\"šŸ” Analyzing links for {url}...\")\n", " \n", " # Get all links\n", " links = fetch_website_links(url)\n", " print(f\"Found {len(links)} total links\")\n", " \n", " # Create prompt for link selection\n", " link_system_prompt = \"\"\"\n", " You are provided with a list of links found on a webpage.\n", " You are able to decide which of the links would be most relevant to include in a brochure about the company,\n", " such as links to an About page, or a Company page, or Careers/Jobs pages.\n", " You should respond in JSON as in this example:\n", "\n", " {\n", " \"links\": [\n", " {\"type\": \"about page\", \"url\": \"https://full.url/goes/here/about\"},\n", " {\"type\": \"careers page\", \"url\": \"https://another.full.url/careers\"}\n", " ]\n", " }\n", " \"\"\"\n", " \n", " user_prompt = f\"\"\"\n", " Here is the list of links on the website {url} -\n", " Please decide which of these are relevant web links for a brochure about the company, \n", " respond with the full https URL in JSON format.\n", " Do not include Terms of Service, Privacy, email links.\n", "\n", " Links (some might be relative links):\n", "\n", " {chr(10).join(links[:50])} # Limit to first 50 links to avoid token limits\n", " \"\"\"\n", " \n", " try:\n", " if model.startswith(\"gpt\"):\n", " response = openai.chat.completions.create(\n", " model=model,\n", " messages=[\n", " {\"role\": \"system\", \"content\": link_system_prompt},\n", " {\"role\": \"user\", \"content\": user_prompt}\n", " ],\n", " response_format={\"type\": \"json_object\"}\n", " )\n", " result = response.choices[0].message.content\n", " else:\n", " response = ollama.chat(\n", " model=model,\n", " messages=[\n", " {\"role\": \"system\", \"content\": link_system_prompt},\n", " {\"role\": \"user\", \"content\": user_prompt}\n", " ]\n", " )\n", " result = response['message']['content']\n", " \n", " links_data = json.loads(result)\n", " print(f\"āœ… Selected {len(links_data['links'])} relevant links\")\n", " return links_data\n", " \n", " except Exception as e:\n", " print(f\"āŒ Error selecting links: {e}\")\n", " return {\"links\": []}\n", "\n", "print(\"Intelligent link selection function defined!\")\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Content Aggregation\n", "def fetch_page_and_all_relevant_links(url, model=\"gpt-4o-mini\"):\n", " \"\"\"Fetch main page content and all relevant linked pages\"\"\"\n", " print(f\"šŸ“„ Fetching content for {url}...\")\n", " \n", " # Get main page content\n", " main_content = fetch_website_contents(url)\n", " \n", " # Get relevant links\n", " relevant_links = select_relevant_links(url, model)\n", " \n", " # Build comprehensive content\n", " result = f\"## Landing Page:\\\\n\\\\n{main_content}\\\\n## Relevant Links:\\\\n\"\n", " \n", " for link in relevant_links['links']:\n", " print(f\" šŸ“„ Fetching {link['type']}: {link['url']}\")\n", " try:\n", " content = fetch_website_contents(link[\"url\"])\n", " result += f\"\\\\n\\\\n### Link: {link['type']}\\\\n\"\n", " result += content\n", " except Exception as e:\n", " print(f\" āŒ Error fetching {link['url']}: {e}\")\n", " result += f\"\\\\n\\\\n### Link: {link['type']} (Error)\\\\n\"\n", " result += f\"Error fetching content: {e}\"\n", " \n", " return result\n", "\n", "print(\"Content aggregation function defined!\")\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Professional Brochure Generation\n", "def create_company_brochure(company_name, url, model=\"gpt-4o-mini\", style=\"professional\"):\n", " \"\"\"Generate a professional company brochure\"\"\"\n", " print(f\"šŸ¢ Creating brochure for {company_name}...\")\n", " \n", " # Get all content\n", " all_content = fetch_page_and_all_relevant_links(url, model)\n", " \n", " # Truncate if too long (to avoid token limits)\n", " if len(all_content) > 5000:\n", " all_content = all_content[:5000] + \"\\\\n\\\\n[Content truncated...]\"\n", " \n", " # Define brochure system prompt based on style\n", " if style == \"professional\":\n", " brochure_system_prompt = \"\"\"\n", " You are an assistant that analyzes the contents of several relevant pages from a company website\n", " and creates a short brochure about the company for prospective customers, investors and recruits.\n", " Respond in markdown without code blocks.\n", " Include details of company culture, customers and careers/jobs if you have the information.\n", " \"\"\"\n", " elif style == \"humorous\":\n", " brochure_system_prompt = \"\"\"\n", " You are an assistant that analyzes the contents of several relevant pages from a company website\n", " and creates a short, humorous, entertaining, witty brochure about the company for prospective customers, investors and recruits.\n", " Respond in markdown without code blocks.\n", " Include details of company culture, customers and careers/jobs if you have the information.\n", " \"\"\"\n", " else:\n", " brochure_system_prompt = \"\"\"\n", " You are an assistant that analyzes the contents of several relevant pages from a company website\n", " and creates a short brochure about the company.\n", " Respond in markdown without code blocks.\n", " \"\"\"\n", " \n", " user_prompt = f\"\"\"\n", " You are looking at a company called: {company_name}\n", " Here are the contents of its landing page and other relevant pages;\n", " use this information to build a short brochure of the company in markdown without code blocks.\n", "\n", " {all_content}\n", " \"\"\"\n", " \n", " try:\n", " if model.startswith(\"gpt\"):\n", " response = openai.chat.completions.create(\n", " model=model,\n", " messages=[\n", " {\"role\": \"system\", \"content\": brochure_system_prompt},\n", " {\"role\": \"user\", \"content\": user_prompt}\n", " ],\n", " temperature=0.7,\n", " max_tokens=1000\n", " )\n", " brochure = response.choices[0].message.content\n", " else:\n", " response = ollama.chat(\n", " model=model,\n", " messages=[\n", " {\"role\": \"system\", \"content\": brochure_system_prompt},\n", " {\"role\": \"user\", \"content\": user_prompt}\n", " ]\n", " )\n", " brochure = response['message']['content']\n", " \n", " print(f\"āœ… Brochure generated successfully!\")\n", " return brochure\n", " \n", " except Exception as e:\n", " print(f\"āŒ Error generating brochure: {e}\")\n", " return f\"Error generating brochure: {e}\"\n", "\n", "def display_brochure(company_name, url, model=\"gpt-4o-mini\", style=\"professional\"):\n", " \"\"\"Display a company brochure\"\"\"\n", " brochure = create_company_brochure(company_name, url, model, style)\n", " display(Markdown(f\"# {company_name} Brochure\\\\n\\\\n{brochure}\"))\n", "\n", "print(\"Professional brochure generation functions defined!\")\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Test Day 5 Solution - Business Brochure Generator\n", "print(\"## Day 5 Solution Test - Business Brochure Generator\")\n", "print(\"=\"*60)\n", "\n", "# Test with different companies\n", "test_companies = [\n", " (\"Hugging Face\", \"https://huggingface.co\"),\n", " (\"OpenAI\", \"https://openai.com\"),\n", " (\"Anthropic\", \"https://anthropic.com\")\n", "]\n", "\n", "print(\"šŸ¢ Testing brochure generation for different companies...\")\n", "\n", "for company_name, url in test_companies:\n", " print(f\"\\\\n{'='*50}\")\n", " print(f\"Testing: {company_name}\")\n", " print(f\"URL: {url}\")\n", " print('='*50)\n", " \n", " try:\n", " # Test with professional style\n", " print(f\"\\\\nšŸ“„ Generating professional brochure for {company_name}...\")\n", " display_brochure(company_name, url, model=MODEL_GPT, style=\"professional\")\n", " \n", " except Exception as e:\n", " print(f\"āŒ Error with {company_name}: {e}\")\n", " \n", " print(\"\\\\n\" + \"-\"*40)\n" ] } ], "metadata": { "kernelspec": { "display_name": ".venv", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.12" } }, "nbformat": 4, "nbformat_minor": 2 }