ai-web-summarizer

2025-01-27 19:08:54 +08:00
parent 2c16f20416
commit d7021cbeb2
10 changed files with 343 additions and 0 deletions
--- a/week3/community-contributions/ai-web-summarizer/.gitignore
+++ b/week3/community-contributions/ai-web-summarizer/.gitignore
@@ -0,0 +1,33 @@
 # Python
 __pycache__/
 *.py[cod]
 *.pyo
 *.pyd
 .Python
 env/
 venv/
 *.env
 *.ini
 *.log
 # VSCode
 .vscode/
 # IDE files
 .idea/
 # System files
 .DS_Store
 Thumbs.db
 # Environment variables
 .env
 # Jupyter notebook checkpoints
 .ipynb_checkpoints
 # Dependencies
 *.egg-info/
 dist/
 build/
--- a/week3/community-contributions/ai-web-summarizer/README.md
+++ b/week3/community-contributions/ai-web-summarizer/README.md
@@ -0,0 +1,143 @@
 # AI Web Page Summarizer
 This project is a simple AI-powered web page summarizer that leverages OpenAI's GPT models and local inference with Ollama to generate concise summaries of given text. The goal is to create a "Reader's Digest of the Internet" by summarizing web content efficiently.
 ## Features
 - Summarize text using OpenAI's GPT models or local Ollama models.
 - Flexible summarization engine selection (OpenAI API, Ollama API, or Ollama library).
 - Simple and modular code structure.
 - Error handling for better reliability.
 ## Project Structure
 ```
 ai-summarizer/
 │-- summarizer/
 │   │-- __init__.py
 │   │-- fetcher.py       # Web content fetching logic
 │   │-- summarizer.py    # Main summarization logic
 │-- utils/
 │   │-- __init__.py
 │   │-- logger.py        # Logging configuration
 │-- main.py              # Entry point of the app
 │-- .env                 # Environment variables
 │-- requirements.txt     # Python dependencies
 │-- README.md            # Project documentation
 ```
 ## Prerequisites
 - Python 3.8 or higher
 - OpenAI API Key (You can obtain it from [OpenAI](https://platform.openai.com/signup))
 - Ollama installed locally ([Installation Guide](https://ollama.ai))
 - `conda` for managing environments (optional)
 ## Installation
 1. **Clone the repository:**
   ```bash
   git clone https://github.com/your-username/ai-summarizer.git
   cd ai-summarizer
   ```
 2. **Create a virtual environment (optional but recommended):**
   ```bash
   conda create --name summarizer-env python=3.9
   conda activate summarizer-env
   ```
 3. **Install dependencies:**
   ```bash
   pip install -r requirements.txt
   ```
 4. **Set up environment variables:**
   Create a `.env` file in the project root and add your OpenAI API key (if using OpenAI):
   ```env
   OPENAI_API_KEY=your-api-key-here
   ```
 ## Usage
 1. **Run the summarizer:**
   ```bash
   python main.py
   ```
 2. **Sample Output:**
   ```shell
   Enter a URL to summarize: https://example.com
   Summary of the page:
   AI refers to machines demonstrating intelligence similar to humans and animals.
   ```
 3. **Engine Selection:**
   The summarizer supports multiple engines. Modify `main.py` to select your preferred model:
   ```python
   summary = summarize_text(content, 'gpt-4o-mini', engine="openai")
   summary = summarize_text(content, 'deepseek-r1:1.5B', engine="ollama-api")
   summary = summarize_text(content, 'deepseek-r1:1.5B', engine="ollama-lib")
   ```
 ## Configuration
 You can modify the model, max tokens, and temperature in `summarizer/summarizer.py`:
 ```python
 response = client.chat.completions.create(
    model="gpt-4o-mini",
    messages=[...],
    max_tokens=300,
    temperature=0.7
 )
 ```
 ## Error Handling
 If any issues occur, the script will print an error message, for example:
 ```
 Error during summarization: Invalid API key or Ollama not running.
 ```
 ## Dependencies
 The required dependencies are listed in `requirements.txt`:
 ```
 openai
 python-dotenv
 requests
 ollama-api
 ```
 Install them using:
 ```bash
 pip install -r requirements.txt
 ```
 ## Contributing
 Contributions are welcome! Feel free to fork the repository and submit pull requests.
 ## License
 This project is licensed under the MIT License. See the `LICENSE` file for more details.
 ## Contact
 For any inquiries, please reach out to:
 - Linkedin: https://www.linkedin.com/in/khanarafat/
 - GitHub: https://github.com/raoarafat
--- a/week3/community-contributions/ai-web-summarizer/main.py
+++ b/week3/community-contributions/ai-web-summarizer/main.py
@@ -0,0 +1,28 @@
 from summarizer.fetcher import fetch_web_content
 from summarizer.summarizer import summarize_text
 from utils.logger import logger
 def main():
    url = input("Enter a URL to summarize: ")
    logger.info(f"Fetching content from: {url}")
    content = fetch_web_content(url)
    if content:
        logger.info("Content fetched successfully. Sending to OpenAI for summarization...")
        # summary = summarize_text(content,'gpt-4o-mini', engine="openai")
        # summary = summarize_text(content, 'deepseek-r1:1.5B', engine="ollama-lib")
        summary = summarize_text(content, 'deepseek-r1:1.5B', engine="ollama-api")
        if summary:
            logger.info("Summary generated successfully.")
            print("\nSummary of the page:\n")
            print(summary)
        else:
            logger.error("Failed to generate summary.")
    else:
        logger.error("Failed to fetch web content.")
 if __name__ == "__main__":
    main()
--- a/week3/community-contributions/ai-web-summarizer/requirements.txt
+++ b/week3/community-contributions/ai-web-summarizer/requirements.txt
@@ -0,0 +1,4 @@
 openai
 requests
 beautifulsoup4
 python-dotenv
--- a/week3/community-contributions/ai-web-summarizer/summarizer/init.py
+++ b/week3/community-contributions/ai-web-summarizer/summarizer/init.py
--- a/week3/community-contributions/ai-web-summarizer/summarizer/fetcher.py
+++ b/week3/community-contributions/ai-web-summarizer/summarizer/fetcher.py
@@ -0,0 +1,23 @@
 import requests
 from bs4 import BeautifulSoup
 def fetch_web_content(url):
    try:
        response = requests.get(url)
        response.raise_for_status()
        # Parse the HTML content
        soup = BeautifulSoup(response.text, 'html.parser')
        # Extract readable text from the web page (ignoring scripts, styles, etc.)
        page_text = soup.get_text(separator=' ', strip=True)
        return page_text[:5000]  # Limit to 5000 chars (API limitation)
    except requests.exceptions.RequestException as e:
        print(f"Error fetching the webpage: {e}")
        return None
 if __name__ == "__main__":
    url = "https://en.wikipedia.org/wiki/Natural_language_processing"
    content = fetch_web_content(url)
    print(content[:500])  # Print a sample of the content
--- a/week3/community-contributions/ai-web-summarizer/summarizer/summarizer.py
+++ b/week3/community-contributions/ai-web-summarizer/summarizer/summarizer.py
@@ -0,0 +1,85 @@
 import openai  # type: ignore
 import ollama
 import requests
 from utils.config import Config
 # Local Ollama API endpoint
 OLLAMA_API = "http://127.0.0.1:11434/api/chat"
 # Initialize OpenAI client with API key
 client = openai.Client(api_key=Config.OPENAI_API_KEY)
 def summarize_with_openai(text, model):
    """Summarize text using OpenAI's GPT model."""
    try:
        response = client.chat.completions.create(
            model=model,
            messages=[
                {"role": "system", "content": "You are a helpful assistant that summarizes web pages."},
                {"role": "user", "content": f"Summarize the following text: {text}"}
            ],
            max_tokens=300,
            temperature=0.7
        )
        return response.choices[0].message.content
    except Exception as e:
        print(f"Error during OpenAI summarization: {e}")
        return None
 def summarize_with_ollama_lib(text, model):
    """Summarize text using Ollama Python library."""
    try:
        messages = [
            {"role": "system", "content": "You are a helpful assistant that summarizes web pages."},
            {"role": "user", "content": f"Summarize the following text: {text}"}
        ]
        response = ollama.chat(model=model, messages=messages)
        return response['message']['content']
    except Exception as e:
        print(f"Error during Ollama summarization: {e}")
        return None
 def summarize_with_ollama_api(text, model):
    """Summarize text using local Ollama API."""
    try:
        payload = {
            "model": model,
            "messages": [
                {"role": "system", "content": "You are a helpful assistant that summarizes web pages."},
                {"role": "user", "content": f"Summarize the following text: {text}"}
            ],
            "stream": False  # Set to True for streaming responses
        }
        response = requests.post(OLLAMA_API, json=payload)
        response_data = response.json()
        return response_data.get('message', {}).get('content', 'No summary generated')
    except Exception as e:
        print(f"Error during Ollama API summarization: {e}")
        return None
 def summarize_text(text, model, engine="openai"):
    """Generic function to summarize text using the specified engine (openai/ollama-lib/ollama-api)."""
    if engine == "openai":
        return summarize_with_openai(text, model)
    elif engine == "ollama-lib":
        return summarize_with_ollama_lib(text, model)
    elif engine == "ollama-api":
        return summarize_with_ollama_api(text, model)
    else:
        print("Invalid engine specified. Use 'openai', 'ollama-lib', or 'ollama-api'.")
        return None
 if __name__ == "__main__":
    sample_text = "Artificial intelligence (AI) is intelligence demonstrated by machines, as opposed to the natural intelligence displayed by animals and humans."
    # Summarize using OpenAI
    openai_summary = summarize_text(sample_text, model="gpt-3.5-turbo", engine="openai")
    print("OpenAI Summary:", openai_summary)
    # Summarize using Ollama Python library
    ollama_lib_summary = summarize_text(sample_text, model="deepseek-r1:1.5B", engine="ollama-lib")
    print("Ollama Library Summary:", ollama_lib_summary)
    # Summarize using local Ollama API
    ollama_api_summary = summarize_text(sample_text, model="deepseek-r1:1.5B", engine="ollama-api")
    print("Ollama API Summary:", ollama_api_summary)
--- a/week3/community-contributions/ai-web-summarizer/utils/init.py
+++ b/week3/community-contributions/ai-web-summarizer/utils/init.py
--- a/week3/community-contributions/ai-web-summarizer/utils/config.py
+++ b/week3/community-contributions/ai-web-summarizer/utils/config.py
@@ -0,0 +1,11 @@
 import os
 from dotenv import load_dotenv
 # Load environment variables from .env file
 load_dotenv()
 class Config:
    OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
 if __name__ == "__main__":
    print("Your OpenAI Key is:", Config.OPENAI_API_KEY)
--- a/week3/community-contributions/ai-web-summarizer/utils/logger.py
+++ b/week3/community-contributions/ai-web-summarizer/utils/logger.py
@@ -0,0 +1,16 @@
 import logging
 # Setup logging configuration
 logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s - %(levelname)s - %(message)s",
    handlers=[
        logging.FileHandler("app.log"),
        logging.StreamHandler()
    ]
 )
 logger = logging.getLogger(__name__)
 if __name__ == "__main__":
    logger.info("Logger is working correctly.")