LLM_Engineering_OLD/week1/community-contributions/ai-powered-marketing-brochures-gpt-5/ai-brochure-creator.py

from ai_core import AICore
from ai_brochure_config import AIBrochureConfig
from extractor_of_relevant_links import ExtractorOfRelevantLinks
from website import Website
from openai.types.responses import Response
from rich.console import Console
from rich.markdown import Markdown
from requests import Session
from concurrent.futures import ThreadPoolExecutor, as_completed
from json import loads

class BrochureCreator(AICore[str]):
    """
    Builds a short Markdown brochure for a company or individual by:
    - extracting relevant links from the website,
    - inferring the entity name and status,
    - and prompting the model using the collected page content.
    """

    @property
    def _website(self) -> Website:
        """Return the main Website instance to analyze."""
        return self.__website

    @property
    def _extractor(self) -> ExtractorOfRelevantLinks:
        """Return the helper responsible for extracting relevant links."""
        return self.__extractor

    def __init__(self, config: AIBrochureConfig, website: Website) -> None:
        """
        Initialize the brochure creator with configuration and target website.

        Parameters:
            config: AI and runtime configuration.
            website: The root website to analyze and summarize.
        """
        system_behavior: str = ("You are an assistant that analyzes the contents of several relevant pages from a company website "
                                "and creates a short brochure about the company for prospective customers, investors and recruits. "
                                "Include details of company culture, customers and careers/jobs if information is available. ")
        super().__init__(config, system_behavior)
        self.__website: Website = website
        self.__extractor: ExtractorOfRelevantLinks = ExtractorOfRelevantLinks(config, website)

    def create_brochure(self) -> str:
        """
        Create a short Markdown brochure based on the website's content.

        Returns:
            A Markdown string with the brochure, or a fallback message if no relevant pages were found.
        """
        relevant_pages: list[dict[str, str | Website]] = self._get_relevant_pages()
        if not relevant_pages:
            return "No relevant pages found to create a brochure."

        brochure_prompt_part: str = self._form_brochure_prompt(relevant_pages)
        inferred_company_name, inferred_status = self._infer_entity(brochure_prompt_part)

        full_brochure_prompt: str = self._form_full_prompt(inferred_company_name, inferred_status)
        response: str = self.ask(full_brochure_prompt)
        return response

    def _get_relevant_pages(self) -> list[dict[str, str | Website]]:
        """
        Resolve relevant links into Website objects using a shared session and concurrency.
        """
        relevant_pages: list[dict[str, str | Website]] = []
        relevant_links: list[dict[str, str]] = self._extractor.extract_relevant_links()["links"]
        # Limit the number of pages to fetch to keep latency and token usage reasonable.
        MAX_PAGES: int = 6
        links_subset = relevant_links[:MAX_PAGES]

        def build_page(item: dict[str, str], session: Session) -> dict[str, str | Website] | None:
            try:
                url = str(item["url"])
                page_type = str(item["type"])
                return {"type": page_type, "page": Website(url, session=session)}
            except Exception:
                return None

        with Session() as session, ThreadPoolExecutor(max_workers=4) as executor:
            futures = [executor.submit(build_page, link, session) for link in links_subset]
            for fut in as_completed(futures):
                res = fut.result()
                if res:
                    relevant_pages.append(res)

        return relevant_pages

    def _truncate_text(self, text: str, limit: int) -> str:
        """
        Truncate text to 'limit' characters to reduce tokens and latency.
        """
        if len(text) <= limit:
            return text
        return text[: max(0, limit - 20)] + "... [truncated]"

    def _form_brochure_prompt(self, relevant_pages: list[dict[str, str | Website]]) -> str:
        """
        Assemble a prompt that includes the main page and relevant pages' titles and text.

        Parameters:
            relevant_pages: List of page descriptors returned by _get_relevant_pages.

        Returns:
            A prompt string containing quoted sections per page.
        """
        QUOTE_DELIMITER: str = "\n\"\"\"\n"
        MAX_MAIN_CHARS = 6000
        MAX_PAGE_CHARS = 3000
        prompt: str = (
            f"Main page:{QUOTE_DELIMITER}"
            f"Title: {self._website.title}\n"
            f"Text:\n{self._truncate_text(self._website.text, MAX_MAIN_CHARS)}{QUOTE_DELIMITER}\n"
        )

        for page in relevant_pages:
            if isinstance(page['page'], Website) and not page['page'].fetch_failed:
                prompt += (
                    f"{page['type']}:{QUOTE_DELIMITER}"
                    f"Title: {page['page'].title}\n"
                    f"Text:\n{self._truncate_text(page['page'].text, MAX_PAGE_CHARS)}{QUOTE_DELIMITER}\n"
                )

        return prompt

    def _infer_entity(self, brochure_prompt_part: str) -> tuple[str, str]:
        """
        Infer both the entity name and status in a single model call to reduce latency.
        Returns:
            (name, status) where status is 'company' or 'individual'.
        """
        prompt = (
            "From the following website excerpts, infer the entity name and whether it is a company or an individual. "
            "Respond strictly as JSON with keys 'name' and 'status' (status must be 'company' or 'individual').\n"
            f"{brochure_prompt_part}"
        )
        raw = self.ask(prompt)
        try:
            data: dict[str, str] = loads(raw)
            name: str = str(data.get("name", "")).strip() or "Unknown"
            status: str = str(data.get("status", "")).strip().lower()
            if status not in ("company", "individual"):
                status = "company"
            return name, status
        except Exception:
            # Fallback: use entire output as name, assume company
            return raw.strip() or "Unknown", "company"

    def _form_full_prompt(self, inferred_company_name: str, inferred_status: str) -> str:
        """
        Build the final brochure-generation prompt using the inferred entity and prior history.

        Parameters:
            inferred_company_name: The inferred entity name.
            inferred_status: Either 'company' or 'individual'.

        Returns:
            A final prompt instructing the model to produce a Markdown brochure.
        """
        full_prompt: str = (f"You are looking at a {inferred_status} called {inferred_company_name}, to whom website {self._website.website_url} belongs.\n"
                            f"Build a short brochure about the {inferred_status}. Use the information from the website that is already stored in the history.\n"
                            "Your response must be in a Markdown format.")
        return full_prompt

    def ask(self, question: str) -> str:
        """
        Send a question to the model, update chat history, and return the text output.

        Parameters:
            question: The user prompt.

        Returns:
            The model output text.
        """
        self.history_manager.add_user_message(question)
        response: Response = self._ai_api.responses.create(
            model=self.config.model_name,
            instructions=self.history_manager.system_behavior,
            input=self.history_manager.chat_history,
            reasoning={ "effort": "low" }
        )
        self.history_manager.add_assistant_message(response)
        return response.output_text

console: Console = Console()

def display_markdown(content: str) -> None:
    """
    Render Markdown content to the console using rich.
    """
    console.print(Markdown(content))

def show_summary(summary: str) -> None:
    """
    Print a Markdown summary if provided; otherwise print a fallback message.
    """
    if summary:
        display_markdown(summary)
    else:
        console.print("No summary found.")

if __name__ == "__main__":
    website: Website = Website("<put your site address here>")
    brochure_creator: BrochureCreator = BrochureCreator(AIBrochureConfig(), website)
    brochure: str = brochure_creator.create_brochure()
    display_markdown(brochure)