sach91 bootcamp week8 exercise

2025-10-30 15:42:04 +05:30
parent 3fa7a3dad5
commit ef48ed539d
20 changed files with 3124 additions and 0 deletions
--- a/community-contributions/sach91-bootcamp/week8/app.py
+++ b/community-contributions/sach91-bootcamp/week8/app.py
@@ -0,0 +1,846 @@
+"""
+KnowledgeHub - Personal Knowledge Management & Research Assistant
+Main Gradio Application
+"""
+import os
+import logging
+import json
+import gradio as gr
+from pathlib import Path
+import chromadb
+from datetime import datetime
+
+# Setup logging
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+)
+logger = logging.getLogger(__name__)
+
+# Import utilities and agents
+from utils import OllamaClient, EmbeddingModel, DocumentParser
+from agents import (
+    IngestionAgent, QuestionAgent, SummaryAgent,
+    ConnectionAgent, ExportAgent
+)
+from models import Document
+
+# Constants
+VECTORSTORE_PATH = "./vectorstore"
+TEMP_UPLOAD_PATH = "./temp_uploads"
+DOCUMENTS_METADATA_PATH = "./vectorstore/documents_metadata.json"
+
+# Ensure directories exist
+os.makedirs(VECTORSTORE_PATH, exist_ok=True)
+os.makedirs(TEMP_UPLOAD_PATH, exist_ok=True)
+
+class KnowledgeHub:
+    """Main application class managing all agents"""
+
+    def __init__(self):
+        logger.info("Initializing KnowledgeHub...")
+
+        # Initialize ChromaDB
+        self.client = chromadb.PersistentClient(path=VECTORSTORE_PATH)
+        self.collection = self.client.get_or_create_collection(
+            name="knowledge_base",
+            metadata={"description": "Personal knowledge management collection"}
+        )
+
+        # Initialize embedding model
+        self.embedding_model = EmbeddingModel()
+
+        # Initialize shared LLM client
+        self.llm_client = OllamaClient(model="llama3.2")
+
+        # Check Ollama connection
+        if not self.llm_client.check_connection():
+            logger.warning("⚠️ Cannot connect to Ollama. Please ensure Ollama is running.")
+            logger.warning("Start Ollama with: ollama serve")
+        else:
+            logger.info("✓ Connected to Ollama")
+
+        # Initialize agents
+        self.ingestion_agent = IngestionAgent(
+            collection=self.collection,
+            embedding_model=self.embedding_model,
+            llm_client=self.llm_client
+        )
+
+        self.question_agent = QuestionAgent(
+            collection=self.collection,
+            embedding_model=self.embedding_model,
+            llm_client=self.llm_client
+        )
+
+        self.summary_agent = SummaryAgent(
+            collection=self.collection,
+            llm_client=self.llm_client
+        )
+
+        self.connection_agent = ConnectionAgent(
+            collection=self.collection,
+            embedding_model=self.embedding_model,
+            llm_client=self.llm_client
+        )
+
+        self.export_agent = ExportAgent(
+            llm_client=self.llm_client
+        )
+
+        # Track uploaded documents
+        self.documents = {}
+
+        # Load existing documents from metadata file
+        self._load_documents_metadata()
+
+        logger.info("✓ KnowledgeHub initialized successfully")
+
+    def _save_documents_metadata(self):
+        """Save document metadata to JSON file"""
+        try:
+            metadata = {
+                doc_id: doc.to_dict()
+                for doc_id, doc in self.documents.items()
+            }
+
+            with open(DOCUMENTS_METADATA_PATH, 'w') as f:
+                json.dump(metadata, f, indent=2)
+
+            logger.debug(f"Saved metadata for {len(metadata)} documents")
+        except Exception as e:
+            logger.error(f"Error saving document metadata: {e}")
+
+    def _load_documents_metadata(self):
+        """Load document metadata from JSON file"""
+        try:
+            if os.path.exists(DOCUMENTS_METADATA_PATH):
+                with open(DOCUMENTS_METADATA_PATH, 'r') as f:
+                    metadata = json.load(f)
+
+                # Reconstruct Document objects (simplified - without chunks)
+                for doc_id, doc_data in metadata.items():
+                    # Create a minimal Document object for UI purposes
+                    # Full chunks are still in ChromaDB
+                    doc = Document(
+                        id=doc_id,
+                        filename=doc_data['filename'],
+                        filepath=doc_data.get('filepath', ''),
+                        content=doc_data.get('content', ''),
+                        chunks=[],  # Chunks are in ChromaDB
+                        metadata=doc_data.get('metadata', {}),
+                        created_at=datetime.fromisoformat(doc_data['created_at'])
+                    )
+                    self.documents[doc_id] = doc
+
+                logger.info(f"✓ Loaded {len(self.documents)} existing documents from storage")
+            else:
+                logger.info("No existing documents found (starting fresh)")
+
+        except Exception as e:
+            logger.error(f"Error loading document metadata: {e}")
+            logger.info("Starting with empty document list")
+
+    def upload_document(self, files, progress=gr.Progress()):
+        """Handle document upload - supports single or multiple files with progress tracking"""
+        if files is None or len(files) == 0:
+            return "⚠️ Please select file(s) to upload", "", []
+
+        # Convert single file to list for consistent handling
+        if not isinstance(files, list):
+            files = [files]
+
+        results = []
+        successful = 0
+        failed = 0
+        total_chunks = 0
+
+        # Initialize progress tracking
+        progress(0, desc="Starting upload...")
+
+        for file_idx, file in enumerate(files, 1):
+            # Update progress
+            progress_pct = (file_idx - 1) / len(files)
+            progress(progress_pct, desc=f"Processing {file_idx}/{len(files)}: {Path(file.name).name}")
+
+            try:
+                logger.info(f"Processing file {file_idx}/{len(files)}: {file.name}")
+
+                # Save uploaded file temporarily
+                temp_path = os.path.join(TEMP_UPLOAD_PATH, Path(file.name).name)
+
+                # Copy file content
+                with open(temp_path, 'wb') as f:
+                    f.write(file.read() if hasattr(file, 'read') else open(file.name, 'rb').read())
+
+                # Process document
+                document = self.ingestion_agent.process(temp_path)
+
+                # Store document reference
+                self.documents[document.id] = document
+
+                # Track stats
+                successful += 1
+                total_chunks += document.num_chunks
+
+                # Add to results
+                results.append({
+                    'status': '✅',
+                    'filename': document.filename,
+                    'chunks': document.num_chunks,
+                    'size': f"{document.total_chars:,} chars"
+                })
+
+                # Clean up temp file
+                os.remove(temp_path)
+
+            except Exception as e:
+                logger.error(f"Error processing {file.name}: {e}")
+                failed += 1
+                results.append({
+                    'status': '❌',
+                    'filename': Path(file.name).name,
+                    'chunks': 0,
+                    'size': f"Error: {str(e)[:50]}"
+                })
+
+        # Final progress update
+        progress(1.0, desc="Upload complete!")
+
+        # Save metadata once after all uploads
+        if successful > 0:
+            self._save_documents_metadata()
+
+        # Create summary
+        summary = f"""## Upload Complete! 🎉
+
+**Total Files:** {len(files)}
+**✅ Successful:** {successful}
+**❌ Failed:** {failed}
+**Total Chunks Created:** {total_chunks:,}
+
+{f"⚠️ **{failed} file(s) failed** - Check results table below for details" if failed > 0 else "All files processed successfully!"}
+"""
+
+        # Create detailed results table
+        results_table = [[r['status'], r['filename'], r['chunks'], r['size']] for r in results]
+
+        # Create preview of first successful document
+        preview = ""
+        for doc in self.documents.values():
+            if doc.filename in [r['filename'] for r in results if r['status'] == '✅']:
+                preview = doc.content[:500] + "..." if len(doc.content) > 500 else doc.content
+                break
+
+        return summary, preview, results_table
+
+    def ask_question(self, question, top_k, progress=gr.Progress()):
+        """Handle question answering with progress tracking"""
+        if not question.strip():
+            return "⚠️ Please enter a question", [], ""
+
+        try:
+            # Initial status
+            progress(0, desc="Processing your question...")
+            status = "🔄 **Searching knowledge base...**\n\nRetrieving relevant documents..."
+
+            logger.info(f"Answering question: {question[:100]}")
+
+            # Update progress
+            progress(0.3, desc="Finding relevant documents...")
+
+            result = self.question_agent.process(question, top_k=top_k)
+
+            # Update progress
+            progress(0.7, desc="Generating answer with LLM...")
+
+            # Format answer
+            answer = f"""### Answer\n\n{result['answer']}\n\n"""
+
+            if result['sources']:
+                answer += f"**Sources:** {result['num_sources']} documents referenced\n\n"
+
+            # Format sources for display
+            sources_data = []
+            for i, source in enumerate(result['sources'], 1):
+                sources_data.append([
+                    i,
+                    source['document'],
+                    f"{source['score']:.2%}",
+                    source['preview']
+                ])
+
+            progress(1.0, desc="Answer ready!")
+
+            return answer, sources_data, "✅ Answer generated successfully!"
+
+        except Exception as e:
+            logger.error(f"Error answering question: {e}")
+            return f"❌ Error: {str(e)}", [], f"❌ Error: {str(e)}"
+
+    def create_summary(self, doc_selector, progress=gr.Progress()):
+        """Create document summary with progress tracking"""
+        if not doc_selector:
+            return "⚠️ Please select a document to summarize", ""
+
+        try:
+            # Initial status
+            progress(0, desc="Preparing to summarize...")
+
+            logger.info(f'doc_selector : {doc_selector}')
+            doc_id = doc_selector.split(" -|- ")[1]
+            document = self.documents.get(doc_id)
+
+            if not document:
+                return "", "❌ Document not found"
+
+            # Update status
+            status_msg = f"🔄 **Generating summary for:** {document.filename}\n\nPlease wait, this may take 10-20 seconds..."
+            progress(0.3, desc=f"Analyzing {document.filename}...")
+
+            logger.info(f"Creating summary for: {document.filename}")
+
+            # Generate summary
+            summary = self.summary_agent.process(
+                document_id=doc_id,
+                document_name=document.filename
+            )
+
+            progress(1.0, desc="Summary complete!")
+
+            # Format result
+            result = f"""## Summary of {summary.document_name}\n\n{summary.summary_text}\n\n"""
+
+            if summary.key_points:
+                result += "### Key Points\n\n"
+                for point in summary.key_points:
+                    result += f"- {point}\n"
+
+            return result, "✅ Summary generated successfully!"
+
+        except Exception as e:
+            logger.error(f"Error creating summary: {e}")
+            return "", f"❌ Error: {str(e)}"
+
+    def find_connections(self, doc_selector, top_k, progress=gr.Progress()):
+        """Find related documents with progress tracking"""
+        if not doc_selector:
+            return "⚠️ Please select a document", [], ""
+
+        try:
+            progress(0, desc="Preparing to find connections...")
+
+            doc_id = doc_selector.split(" -|- ")[1]
+            document = self.documents.get(doc_id)
+
+            if not document:
+                return "❌ Document not found", [], "❌ Document not found"
+
+            status = f"🔄 **Finding documents related to:** {document.filename}\n\nSearching knowledge base..."
+            progress(0.3, desc=f"Analyzing {document.filename}...")
+
+            logger.info(f"Finding connections for: {document.filename}")
+
+            result = self.connection_agent.process(document_id=doc_id, top_k=top_k)
+
+            progress(0.8, desc="Calculating similarity scores...")
+
+            if 'error' in result:
+                return f"❌ Error: {result['error']}", [], f"❌ Error: {result['error']}"
+
+            message = f"""## Related Documents\n\n**Source:** {result['source_document']}\n\n"""
+            message += f"**Found {result['num_related']} related documents:**\n\n"""
+
+            # Format for table
+            table_data = []
+            for i, rel in enumerate(result['related'], 1):
+                table_data.append([
+                    i,
+                    rel['document_name'],
+                    f"{rel['similarity']:.2%}",
+                    rel['preview']
+                ])
+
+            progress(1.0, desc="Connections found!")
+
+            return message, table_data, "✅ Related documents found!"
+
+        except Exception as e:
+            logger.error(f"Error finding connections: {e}")
+            return f"❌ Error: {str(e)}", [], f"❌ Error: {str(e)}"
+
+    def export_knowledge(self, format_choice):
+        """Export knowledge base"""
+        try:
+            logger.info(f"Exporting as {format_choice}")
+
+            # Get statistics
+            stats = self.ingestion_agent.get_statistics()
+
+            # Create export content
+            content = {
+                'title': 'Knowledge Base Export',
+                'summary': f"Total documents in knowledge base: {len(self.documents)}",
+                'sections': [
+                    {
+                        'title': 'Documents',
+                        'content': '\n'.join([f"- {doc.filename}" for doc in self.documents.values()])
+                    },
+                    {
+                        'title': 'Statistics',
+                        'content': f"Total chunks stored: {stats['total_chunks']}"
+                    }
+                ]
+            }
+
+            # Export
+            if format_choice == "Markdown":
+                output = self.export_agent.process(content, format="markdown")
+                filename = f"knowledge_export_{datetime.now().strftime('%Y%m%d_%H%M%S')}.md"
+            elif format_choice == "HTML":
+                output = self.export_agent.process(content, format="html")
+                filename = f"knowledge_export_{datetime.now().strftime('%Y%m%d_%H%M%S')}.html"
+            else:  # Text
+                output = self.export_agent.process(content, format="text")
+                filename = f"knowledge_export_{datetime.now().strftime('%Y%m%d_%H%M%S')}.txt"
+
+            # Save file
+            export_path = os.path.join(TEMP_UPLOAD_PATH, filename)
+            with open(export_path, 'w', encoding='utf-8') as f:
+                f.write(output)
+
+            return f"✅ Exported as {format_choice}", export_path
+
+        except Exception as e:
+            logger.error(f"Error exporting: {e}")
+            return f"❌ Error: {str(e)}", None
+
+    def get_statistics(self):
+        """Get knowledge base statistics"""
+        try:
+            stats = self.ingestion_agent.get_statistics()
+
+            total_docs = len(self.documents)
+            total_chunks = stats.get('total_chunks', 0)
+            total_chars = sum(doc.total_chars for doc in self.documents.values())
+
+            # Check if data is persisted
+            persistence_status = "✅ Enabled" if os.path.exists(DOCUMENTS_METADATA_PATH) else "⚠️ Not configured"
+            vectorstore_size = self._get_directory_size(VECTORSTORE_PATH)
+
+            stats_text = f"""## Knowledge Base Statistics
+
+**Persistence Status:** {persistence_status}
+**Total Documents:** {total_docs}
+**Total Chunks:** {total_chunks:,}
+**Total Characters:** {total_chars:,}
+**Vector Store Size:** {vectorstore_size}
+
+### Storage Locations
+- **Vector DB:** `{VECTORSTORE_PATH}/`
+- **Metadata:** `{DOCUMENTS_METADATA_PATH}`
+
+**📝 Note:** Your data persists across app restarts!
+
+**Recent Documents:**
+{chr(10).join([f"- {doc.filename} ({doc.num_chunks} chunks)" for doc in list(self.documents.values())[-5:]])}
+"""
+            if self.documents:
+                stats_text += "\n".join([f"- {doc.filename} ({doc.num_chunks} chunks, added {doc.created_at.strftime('%Y-%m-%d')})"
+                                        for doc in list(self.documents.values())[-10:]])
+            else:
+                stats_text += "\n*No documents yet. Upload some to get started!*"
+
+            return stats_text
+
+        except Exception as e:
+            return f"❌ Error: {str(e)}"
+
+    def _get_directory_size(self, path):
+        """Calculate directory size"""
+        try:
+            total_size = 0
+            for dirpath, dirnames, filenames in os.walk(path):
+                for filename in filenames:
+                    filepath = os.path.join(dirpath, filename)
+                    if os.path.exists(filepath):
+                        total_size += os.path.getsize(filepath)
+
+            # Convert to human readable
+            for unit in ['B', 'KB', 'MB', 'GB']:
+                if total_size < 1024.0:
+                    return f"{total_size:.1f} {unit}"
+                total_size /= 1024.0
+            return f"{total_size:.1f} TB"
+        except:
+            return "Unknown"
+
+    def get_document_list(self):
+        """Get list of documents for dropdown"""
+        new_choices = [f"{doc.filename} -|- {doc.id}" for doc in self.documents.values()]
+        return gr.update(choices=new_choices, value=None)
+
+
+    def delete_document(self, doc_selector):
+        """Delete a document from the knowledge base"""
+        if not doc_selector:
+            return "⚠️ Please select a document to delete", self.get_document_list()
+
+        try:
+            doc_id = doc_selector.split(" - ")[0]
+            document = self.documents.get(doc_id)
+
+            if not document:
+                return "❌ Document not found", self.get_document_list()
+
+            # Delete from ChromaDB
+            success = self.ingestion_agent.delete_document(doc_id)
+
+            if success:
+                # Remove from documents dict
+                filename = document.filename
+                del self.documents[doc_id]
+
+                # Save updated metadata
+                self._save_documents_metadata()
+
+                return f"✅ Deleted: {filename}", self.get_document_list()
+            else:
+                return f"❌ Error deleting document", self.get_document_list()
+
+        except Exception as e:
+            logger.error(f"Error deleting document: {e}")
+            return f"❌ Error: {str(e)}", self.get_document_list()
+
+    def clear_all_documents(self):
+        """Clear entire knowledge base"""
+        try:
+            # Delete collection
+            self.client.delete_collection("knowledge_base")
+
+            # Recreate empty collection
+            self.collection = self.client.create_collection(
+                name="knowledge_base",
+                metadata={"description": "Personal knowledge management collection"}
+            )
+
+            # Update agents with new collection
+            self.ingestion_agent.collection = self.collection
+            self.question_agent.collection = self.collection
+            self.summary_agent.collection = self.collection
+            self.connection_agent.collection = self.collection
+
+            # Clear documents
+            self.documents = {}
+            self._save_documents_metadata()
+
+            return "✅ All documents cleared from knowledge base"
+
+        except Exception as e:
+            logger.error(f"Error clearing database: {e}")
+            return f"❌ Error: {str(e)}"
+
+
+def create_ui():
+    """Create Gradio interface"""
+
+    # Initialize app
+    app = KnowledgeHub()
+
+    # Custom CSS
+    custom_css = """
+    .main-header {
+        text-align: center;
+        background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
+        color: white;
+        padding: 30px;
+        border-radius: 10px;
+        margin-bottom: 20px;
+    }
+    .stat-box {
+        background: #f8f9fa;
+        padding: 15px;
+        border-radius: 8px;
+        border-left: 4px solid #667eea;
+    }
+    """
+
+    with gr.Blocks(title="KnowledgeHub", css=custom_css, theme=gr.themes.Soft()) as interface:
+
+        # Header
+        gr.HTML("""
+        <div class="main-header">
+            <h1>🧠 KnowledgeHub</h1>
+            <p>Personal Knowledge Management & Research Assistant</p>
+            <p style="font-size: 14px; opacity: 0.9;">
+                Powered by Ollama (Llama 3.2) • Fully Local & Private
+            </p>
+        </div>
+        """)
+
+        # Main tabs
+        with gr.Tabs():
+
+            # Tab 1: Upload Documents
+            with gr.Tab("📤 Upload Documents"):
+                gr.Markdown("### Upload your documents to build your knowledge base")
+                gr.Markdown("*Supported formats: PDF, DOCX, TXT, MD, HTML, PY*")
+                gr.Markdown("*💡 Tip: You can select multiple files at once!*")
+
+                with gr.Row():
+                    with gr.Column():
+                        file_input = gr.File(
+                            label="Select Document(s)",
+                            file_types=[".pdf", ".docx", ".txt", ".md", ".html", ".py"],
+                            file_count="multiple"  # Enable multiple file selection
+                        )
+                        upload_btn = gr.Button("📤 Upload & Process", variant="primary")
+
+                    with gr.Column():
+                        upload_status = gr.Markdown("Ready to upload documents")
+
+                # Results table for batch uploads
+                with gr.Row():
+                    upload_results = gr.Dataframe(
+                        headers=["Status", "Filename", "Chunks", "Size"],
+                        label="Upload Results",
+                        wrap=True,
+                        visible=True
+                    )
+
+                with gr.Row():
+                    document_preview = gr.Textbox(
+                        label="Document Preview (First Uploaded)",
+                        lines=10,
+                        max_lines=15
+                    )
+
+                upload_btn.click(
+                    fn=app.upload_document,
+                    inputs=[file_input],
+                    outputs=[upload_status, document_preview, upload_results]
+                )
+
+            # Tab 2: Ask Questions
+            with gr.Tab("❓ Ask Questions"):
+                gr.Markdown("### Ask questions about your documents")
+                gr.Markdown("*Uses RAG (Retrieval Augmented Generation) to answer based on your knowledge base*")
+
+                with gr.Row():
+                    with gr.Column(scale=3):
+                        question_input = gr.Textbox(
+                            label="Your Question",
+                            placeholder="What would you like to know?",
+                            lines=3
+                        )
+
+                    with gr.Column(scale=1):
+                        top_k_slider = gr.Slider(
+                            minimum=1,
+                            maximum=10,
+                            value=5,
+                            step=1,
+                            label="Number of sources"
+                        )
+                        ask_btn = gr.Button("🔍 Ask", variant="primary")
+
+                qa_status = gr.Markdown("Ready to answer questions")
+                answer_output = gr.Markdown(label="Answer")
+
+                sources_table = gr.Dataframe(
+                    headers=["#", "Document", "Relevance", "Preview"],
+                    label="Sources",
+                    wrap=True
+                )
+
+                ask_btn.click(
+                    fn=app.ask_question,
+                    inputs=[question_input, top_k_slider],
+                    outputs=[answer_output, sources_table, qa_status]
+                )
+
+            # Tab 3: Summarize
+            with gr.Tab("📝 Summarize"):
+                gr.Markdown("### Generate summaries and extract key points")
+
+                with gr.Row():
+                    with gr.Column():
+                        doc_selector = gr.Dropdown(
+                            choices=[],
+                            label="Select Document",
+                            info="Choose a document to summarize",
+                            allow_custom_value=True
+                        )
+                        refresh_btn = gr.Button("🔄 Refresh List")
+                        summarize_btn = gr.Button("📝 Generate Summary", variant="primary")
+                        summary_status = gr.Markdown("Ready to generate summaries")
+
+                    with gr.Column(scale=2):
+                        summary_output = gr.Markdown(label="Summary")
+
+                summarize_btn.click(
+                    fn=app.create_summary,
+                    inputs=[doc_selector],
+                    outputs=[summary_output, summary_status]
+                )
+
+                refresh_btn.click(
+                    fn=app.get_document_list,
+                    outputs=[doc_selector]
+                )
+
+            # Tab 4: Find Connections
+            with gr.Tab("🔗 Find Connections"):
+                gr.Markdown("### Discover relationships between documents")
+
+                with gr.Row():
+                    with gr.Column():
+                        conn_doc_selector = gr.Dropdown(
+                            choices=[],
+                            label="Select Document",
+                            info="Find documents related to this one",
+                            allow_custom_value=True
+                        )
+                        conn_top_k = gr.Slider(
+                            minimum=1,
+                            maximum=10,
+                            value=5,
+                            step=1,
+                            label="Number of related documents"
+                        )
+                        refresh_conn_btn = gr.Button("🔄 Refresh List")
+                        find_btn = gr.Button("🔗 Find Connections", variant="primary")
+                        connection_status = gr.Markdown("Ready to find connections")
+
+                connection_output = gr.Markdown(label="Connections")
+
+                connections_table = gr.Dataframe(
+                    headers=["#", "Document", "Similarity", "Preview"],
+                    label="Related Documents",
+                    wrap=True
+                )
+
+                find_btn.click(
+                    fn=app.find_connections,
+                    inputs=[conn_doc_selector, conn_top_k],
+                    outputs=[connection_output, connections_table, connection_status]
+                )
+
+                refresh_conn_btn.click(
+                    fn=app.get_document_list,
+                    outputs=[conn_doc_selector]
+                )
+
+            # Tab 5: Export
+            with gr.Tab("💾 Export"):
+                gr.Markdown("### Export your knowledge base")
+
+                with gr.Row():
+                    with gr.Column():
+                        format_choice = gr.Radio(
+                            choices=["Markdown", "HTML", "Text"],
+                            value="Markdown",
+                            label="Export Format"
+                        )
+                        export_btn = gr.Button("💾 Export", variant="primary")
+
+                    with gr.Column():
+                        export_status = gr.Markdown("Ready to export")
+                        export_file = gr.File(label="Download Export")
+
+                export_btn.click(
+                    fn=app.export_knowledge,
+                    inputs=[format_choice],
+                    outputs=[export_status, export_file]
+                )
+
+            # Tab 6: Manage Documents
+            with gr.Tab("🗂️ Manage Documents"):
+                gr.Markdown("### Manage your document library")
+
+                with gr.Row():
+                    with gr.Column():
+                        gr.Markdown("#### Delete Document")
+                        delete_doc_selector = gr.Dropdown(
+                            choices=[],
+                            label="Select Document to Delete",
+                            info="Choose a document to remove from knowledge base"
+                        )
+                        with gr.Row():
+                            refresh_delete_btn = gr.Button("🔄 Refresh List")
+                            delete_btn = gr.Button("🗑️ Delete Document", variant="stop")
+                        delete_status = gr.Markdown("")
+
+                    with gr.Column():
+                        gr.Markdown("#### Clear All Documents")
+                        gr.Markdown("⚠️ **Warning:** This will delete your entire knowledge base!")
+                        clear_confirm = gr.Textbox(
+                            label="Type 'DELETE ALL' to confirm",
+                            placeholder="DELETE ALL"
+                        )
+                        clear_all_btn = gr.Button("🗑️ Clear All Documents", variant="stop")
+                        clear_status = gr.Markdown("")
+
+                def confirm_and_clear(confirm_text):
+                    if confirm_text.strip() == "DELETE ALL":
+                        return app.clear_all_documents()
+                    else:
+                        return "⚠️ Please type 'DELETE ALL' to confirm"
+
+                delete_btn.click(
+                    fn=app.delete_document,
+                    inputs=[delete_doc_selector],
+                    outputs=[delete_status, delete_doc_selector]
+                )
+
+                refresh_delete_btn.click(
+                    fn=app.get_document_list,
+                    outputs=[delete_doc_selector]
+                )
+
+                clear_all_btn.click(
+                    fn=confirm_and_clear,
+                    inputs=[clear_confirm],
+                    outputs=[clear_status]
+                )
+
+            # Tab 7: Statistics
+            with gr.Tab("📊 Statistics"):
+                gr.Markdown("### Knowledge Base Overview")
+                
+                stats_output = gr.Markdown()
+                stats_btn = gr.Button("🔄 Refresh Statistics", variant="primary")
+                
+                stats_btn.click(
+                    fn=app.get_statistics,
+                    outputs=[stats_output]
+                )
+                
+                # Auto-load stats on tab open
+                interface.load(
+                    fn=app.get_statistics,
+                    outputs=[stats_output]
+                )
+        
+        # Footer
+        gr.HTML("""
+        <div style="text-align: center; margin-top: 30px; padding: 20px; color: #666;">
+            <p>🔒 All processing happens locally on your machine • Your data never leaves your computer</p>
+            <p style="font-size: 12px;">Powered by Ollama, ChromaDB, and Sentence Transformers</p>
+        </div>
+        """)
+    
+    return interface
+
+
+if __name__ == "__main__":
+    logger.info("Starting KnowledgeHub...")
+    
+    # Create and launch interface
+    interface = create_ui()
+    interface.launch(
+        server_name="127.0.0.1",
+        server_port=7860,
+        share=False,
+        inbrowser=True
+    )