ai-web-summarizer

2025-01-27 19:08:54 +08:00
parent 2c16f20416
commit d7021cbeb2
10 changed files with 343 additions and 0 deletions
--- a/week3/community-contributions/ai-web-summarizer/.gitignore
+++ b/week3/community-contributions/ai-web-summarizer/.gitignore
@@ -0,0 +1,33 @@
+
+# Python
+__pycache__/
+*.py[cod]
+*.pyo
+*.pyd
+.Python
+env/
+venv/
+*.env
+*.ini
+*.log
+
+# VSCode
+.vscode/
+
+# IDE files
+.idea/
+
+# System files
+.DS_Store
+Thumbs.db
+
+# Environment variables
+.env
+
+# Jupyter notebook checkpoints
+.ipynb_checkpoints
+
+# Dependencies
+*.egg-info/
+dist/
+build/
--- a/week3/community-contributions/ai-web-summarizer/README.md
+++ b/week3/community-contributions/ai-web-summarizer/README.md
@@ -0,0 +1,143 @@
+# AI Web Page Summarizer
+
+This project is a simple AI-powered web page summarizer that leverages OpenAI's GPT models and local inference with Ollama to generate concise summaries of given text. The goal is to create a "Reader's Digest of the Internet" by summarizing web content efficiently.
+
+## Features
+
+- Summarize text using OpenAI's GPT models or local Ollama models.
+- Flexible summarization engine selection (OpenAI API, Ollama API, or Ollama library).
+- Simple and modular code structure.
+- Error handling for better reliability.
+
+## Project Structure
+
+```
+ai-summarizer/
+│-- summarizer/
+│   │-- __init__.py
+│   │-- fetcher.py       # Web content fetching logic
+│   │-- summarizer.py    # Main summarization logic
+│-- utils/
+│   │-- __init__.py
+│   │-- logger.py        # Logging configuration
+│-- main.py              # Entry point of the app
+│-- .env                 # Environment variables
+│-- requirements.txt     # Python dependencies
+│-- README.md            # Project documentation
+```
+
+## Prerequisites
+
+- Python 3.8 or higher
+- OpenAI API Key (You can obtain it from [OpenAI](https://platform.openai.com/signup))
+- Ollama installed locally ([Installation Guide](https://ollama.ai))
+- `conda` for managing environments (optional)
+
+## Installation
+
+1. **Clone the repository:**
+
+   ```bash
+   git clone https://github.com/your-username/ai-summarizer.git
+   cd ai-summarizer
+   ```
+
+2. **Create a virtual environment (optional but recommended):**
+
+   ```bash
+   conda create --name summarizer-env python=3.9
+   conda activate summarizer-env
+   ```
+
+3. **Install dependencies:**
+
+   ```bash
+   pip install -r requirements.txt
+   ```
+
+4. **Set up environment variables:**
+
+   Create a `.env` file in the project root and add your OpenAI API key (if using OpenAI):
+
+   ```env
+   OPENAI_API_KEY=your-api-key-here
+   ```
+
+## Usage
+
+1. **Run the summarizer:**
+
+   ```bash
+   python main.py
+   ```
+
+2. **Sample Output:**
+
+   ```shell
+   Enter a URL to summarize: https://example.com
+   Summary of the page:
+   AI refers to machines demonstrating intelligence similar to humans and animals.
+   ```
+
+3. **Engine Selection:**
+
+   The summarizer supports multiple engines. Modify `main.py` to select your preferred model:
+
+   ```python
+   summary = summarize_text(content, 'gpt-4o-mini', engine="openai")
+   summary = summarize_text(content, 'deepseek-r1:1.5B', engine="ollama-api")
+   summary = summarize_text(content, 'deepseek-r1:1.5B', engine="ollama-lib")
+   ```
+
+## Configuration
+
+You can modify the model, max tokens, and temperature in `summarizer/summarizer.py`:
+
+```python
+response = client.chat.completions.create(
+    model="gpt-4o-mini",
+    messages=[...],
+    max_tokens=300,
+    temperature=0.7
+)
+```
+
+## Error Handling
+
+If any issues occur, the script will print an error message, for example:
+
+```
+Error during summarization: Invalid API key or Ollama not running.
+```
+
+## Dependencies
+
+The required dependencies are listed in `requirements.txt`:
+
+```
+openai
+python-dotenv
+requests
+ollama-api
+```
+
+Install them using:
+
+```bash
+pip install -r requirements.txt
+```
+
+## Contributing
+
+Contributions are welcome! Feel free to fork the repository and submit pull requests.
+
+## License
+
+This project is licensed under the MIT License. See the `LICENSE` file for more details.
+
+## Contact
+
+For any inquiries, please reach out to:
+
+- Linkedin: https://www.linkedin.com/in/khanarafat/
+- GitHub: https://github.com/raoarafat
--- a/week3/community-contributions/ai-web-summarizer/main.py
+++ b/week3/community-contributions/ai-web-summarizer/main.py
@@ -0,0 +1,28 @@
+from summarizer.fetcher import fetch_web_content
+from summarizer.summarizer import summarize_text
+from utils.logger import logger
+
+def main():
+    url = input("Enter a URL to summarize: ")
+    
+    logger.info(f"Fetching content from: {url}")
+    content = fetch_web_content(url)
+
+    if content:
+        logger.info("Content fetched successfully. Sending to OpenAI for summarization...")
+        # summary = summarize_text(content,'gpt-4o-mini', engine="openai")
+        # summary = summarize_text(content, 'deepseek-r1:1.5B', engine="ollama-lib")
+        summary = summarize_text(content, 'deepseek-r1:1.5B', engine="ollama-api")
+
+
+        if summary:
+            logger.info("Summary generated successfully.")
+            print("\nSummary of the page:\n")
+            print(summary)
+        else:
+            logger.error("Failed to generate summary.")
+    else:
+        logger.error("Failed to fetch web content.")
+
+if __name__ == "__main__":
+    main()
--- a/week3/community-contributions/ai-web-summarizer/requirements.txt
+++ b/week3/community-contributions/ai-web-summarizer/requirements.txt
@@ -0,0 +1,4 @@
+openai
+requests
+beautifulsoup4
+python-dotenv
--- a/week3/community-contributions/ai-web-summarizer/summarizer/init.py
+++ b/week3/community-contributions/ai-web-summarizer/summarizer/init.py
--- a/week3/community-contributions/ai-web-summarizer/summarizer/fetcher.py
+++ b/week3/community-contributions/ai-web-summarizer/summarizer/fetcher.py
@@ -0,0 +1,23 @@
+import requests
+from bs4 import BeautifulSoup
+
+def fetch_web_content(url):
+    try:
+        response = requests.get(url)
+        response.raise_for_status()
+        
+        # Parse the HTML content
+        soup = BeautifulSoup(response.text, 'html.parser')
+        
+        # Extract readable text from the web page (ignoring scripts, styles, etc.)
+        page_text = soup.get_text(separator=' ', strip=True)
+        
+        return page_text[:5000]  # Limit to 5000 chars (API limitation)
+    except requests.exceptions.RequestException as e:
+        print(f"Error fetching the webpage: {e}")
+        return None
+
+if __name__ == "__main__":
+    url = "https://en.wikipedia.org/wiki/Natural_language_processing"
+    content = fetch_web_content(url)
+    print(content[:500])  # Print a sample of the content
--- a/week3/community-contributions/ai-web-summarizer/summarizer/summarizer.py
+++ b/week3/community-contributions/ai-web-summarizer/summarizer/summarizer.py
@@ -0,0 +1,85 @@
+import openai  # type: ignore
+import ollama
+import requests
+from utils.config import Config
+
+# Local Ollama API endpoint
+OLLAMA_API = "http://127.0.0.1:11434/api/chat"
+
+# Initialize OpenAI client with API key
+client = openai.Client(api_key=Config.OPENAI_API_KEY)
+
+def summarize_with_openai(text, model):
+    """Summarize text using OpenAI's GPT model."""
+    try:
+        response = client.chat.completions.create(
+            model=model,
+            messages=[
+                {"role": "system", "content": "You are a helpful assistant that summarizes web pages."},
+                {"role": "user", "content": f"Summarize the following text: {text}"}
+            ],
+            max_tokens=300,
+            temperature=0.7
+        )
+        return response.choices[0].message.content
+    except Exception as e:
+        print(f"Error during OpenAI summarization: {e}")
+        return None
+
+def summarize_with_ollama_lib(text, model):
+    """Summarize text using Ollama Python library."""
+    try:
+        messages = [
+            {"role": "system", "content": "You are a helpful assistant that summarizes web pages."},
+            {"role": "user", "content": f"Summarize the following text: {text}"}
+        ]
+        response = ollama.chat(model=model, messages=messages)
+        return response['message']['content']
+    except Exception as e:
+        print(f"Error during Ollama summarization: {e}")
+        return None
+
+def summarize_with_ollama_api(text, model):
+    """Summarize text using local Ollama API."""
+    try:
+        payload = {
+            "model": model,
+            "messages": [
+                {"role": "system", "content": "You are a helpful assistant that summarizes web pages."},
+                {"role": "user", "content": f"Summarize the following text: {text}"}
+            ],
+            "stream": False  # Set to True for streaming responses
+        }
+        response = requests.post(OLLAMA_API, json=payload)
+        response_data = response.json()
+        return response_data.get('message', {}).get('content', 'No summary generated')
+    except Exception as e:
+        print(f"Error during Ollama API summarization: {e}")
+        return None
+
+def summarize_text(text, model, engine="openai"):
+    """Generic function to summarize text using the specified engine (openai/ollama-lib/ollama-api)."""
+    if engine == "openai":
+        return summarize_with_openai(text, model)
+    elif engine == "ollama-lib":
+        return summarize_with_ollama_lib(text, model)
+    elif engine == "ollama-api":
+        return summarize_with_ollama_api(text, model)
+    else:
+        print("Invalid engine specified. Use 'openai', 'ollama-lib', or 'ollama-api'.")
+        return None
+
+if __name__ == "__main__":
+    sample_text = "Artificial intelligence (AI) is intelligence demonstrated by machines, as opposed to the natural intelligence displayed by animals and humans."
+
+    # Summarize using OpenAI
+    openai_summary = summarize_text(sample_text, model="gpt-3.5-turbo", engine="openai")
+    print("OpenAI Summary:", openai_summary)
+
+    # Summarize using Ollama Python library
+    ollama_lib_summary = summarize_text(sample_text, model="deepseek-r1:1.5B", engine="ollama-lib")
+    print("Ollama Library Summary:", ollama_lib_summary)
+
+    # Summarize using local Ollama API
+    ollama_api_summary = summarize_text(sample_text, model="deepseek-r1:1.5B", engine="ollama-api")
+    print("Ollama API Summary:", ollama_api_summary)
--- a/week3/community-contributions/ai-web-summarizer/utils/init.py
+++ b/week3/community-contributions/ai-web-summarizer/utils/init.py
--- a/week3/community-contributions/ai-web-summarizer/utils/config.py
+++ b/week3/community-contributions/ai-web-summarizer/utils/config.py
@@ -0,0 +1,11 @@
+import os
+from dotenv import load_dotenv
+
+# Load environment variables from .env file
+load_dotenv()
+
+class Config:
+    OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
+
+if __name__ == "__main__":
+    print("Your OpenAI Key is:", Config.OPENAI_API_KEY)
--- a/week3/community-contributions/ai-web-summarizer/utils/logger.py
+++ b/week3/community-contributions/ai-web-summarizer/utils/logger.py
@@ -0,0 +1,16 @@
+import logging
+
+# Setup logging configuration
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s - %(levelname)s - %(message)s",
+    handlers=[
+        logging.FileHandler("app.log"),
+        logging.StreamHandler()
+    ]
+)
+
+logger = logging.getLogger(__name__)
+
+if __name__ == "__main__":
+    logger.info("Logger is working correctly.")