Add DevOps AI Assistant with Gradio interface and knowledge base functionality

2025-10-30 13:31:15 +03:00
parent 4d4e2478e4
commit aa3ddf2538
2 changed files with 403 additions and 0 deletions
--- a/week5/community-contributions/salah/devops-ai-assistance/app.py
+++ b/week5/community-contributions/salah/devops-ai-assistance/app.py
@@ -0,0 +1,196 @@
 import os
 import gradio as gr
 from devops_ai_assistance import create_assistant, DevOpsAIAssistant
 assistant = None
 status_info = None
 def initialize_assistant(kb_path: str):
    """Initialize the assistant with knowledge base"""
    global assistant, status_info
    try:
        kb_path = kb_path.strip()
        if not kb_path:
            return "Error: Please provide a valid knowledge base path"
        print(f"\n🚀 Initializing with knowledge base: {kb_path}")
        assistant = create_assistant(kb_path)
        status_info = assistant.get_status()
        status_message = f"""
 ✅ **DevOps AI Assistant Initialized Successfully!**
 📊 **Knowledge Base Statistics:**
 - Documents Loaded: {status_info['documents_loaded']}
 - Chunks Created: {status_info['chunks_created']}
 - Vectors in Store: {status_info['vectors_in_store']}
 - Knowledge Base Path: {status_info['knowledge_base_path']}
 🎯 **Ready to Answer Questions About:**
 - Kubernetes infrastructure configuration
 - ArgoCD deployment manifests
 - Helm charts and values
 - Infrastructure as Code (IaC)
 - DevOps best practices in your environment
 Start by asking questions about your k8s cluster infrastructure!
 """
        return status_message
    except Exception as e:
        error_msg = f"Error initializing assistant: {str(e)}"
        print(f"❌ {error_msg}")
        return f"❌ {error_msg}"
 def chat_with_assistant(message: str, history):
    """Chat function for the assistant"""
    global assistant
    if not assistant:
        bot_response = "❌ Assistant not initialized. Please provide a knowledge base path first."
        history.append((message, bot_response))
        return history, ""
    if not message.strip():
        bot_response = "Please enter a question about your DevOps infrastructure."
        history.append((message, bot_response))
        return history, ""
    try:
        result = assistant.ask(message)
        answer = result.get('answer', '')
        sources_text = ""
        if result.get('sources'):
            sources_text = "\n\n📚 **Sources:**\n"
            for i, source in enumerate(result['sources'], 1):
                source_file = source.get('source', 'Unknown')
                file_type = source.get('file_type', 'Unknown')
                sources_text += f"\n{i}. **{source_file}** ({file_type})"
        bot_response = answer + sources_text if sources_text else answer
    except Exception as e:
        bot_response = f"Error processing question: {str(e)}"
    history.append((message, bot_response))
    return history, ""
 def create_interface():
    """Create the Gradio interface"""
    global assistant
    with gr.Blocks(title="DevOps AI Assistant") as interface:
        gr.Markdown("# 🤖 DevOps AI Assistant")
        gr.Markdown("Intelligent Q&A system for your Kubernetes infrastructure powered by RAG and LLM")
        gr.Markdown("## 🔧 Configuration")
        gr.Markdown("Enter the path to your GitOps repository (knowledge base) to initialize the assistant")
        with gr.Row():
            kb_path_input = gr.Textbox(
                label="Knowledge Base Path",
                placeholder="/workspace/aau/repositories/infra-gitops/",
                lines=1,
                value="/workspace/aau/repositories/infra-gitops/"
            )
            init_button = gr.Button("🚀 Initialize Assistant")
        status_output = gr.Markdown(value="⏳ Waiting for initialization...")
        gr.Markdown("## 💬 Chat Interface")
        chatbot = gr.Chatbot(
            label="Conversation",
            height=500,
            show_copy_button=True,
            avatar_images=("👤", "🤖"),
            bubble_full_width=False
        )
        with gr.Row():
            msg_input = gr.Textbox(
                label="Your Question",
                placeholder="Ask about your k8s infrastructure, ArgoCD, Helm charts, etc...",
                lines=2,
                scale=5
            )
            send_button = gr.Button("Send 💬", scale=1)
        with gr.Row():
            clear_button = gr.Button("🗑️ Clear Chat", scale=2)
        with gr.Accordion("📋 Example Questions", open=False):
            gr.Markdown("""
 **Infrastructure & Deployment:**
 - How is the Kubernetes cluster configured?
 - What ArgoCD applications are deployed?
 - Show me the Helm chart values for nginx
 - What storage solutions are available?
 **Monitoring & Observability:**
 - How is Prometheus configured?
 - What monitoring exporters are installed?
 - Tell me about the metrics server setup
 **Security & Access:**
 - How are RBAC policies configured?
 - What authentication methods are used?
 - Explain the network policies
 **DevOps Practices:**
 - What is the deployment pipeline?
 - How are secrets managed?
 - Show me the backup strategy
            """)
        init_button.click(
            initialize_assistant,
            inputs=[kb_path_input],
            outputs=[status_output]
        )
        msg_input.submit(
            chat_with_assistant,
            inputs=[msg_input, chatbot],
            outputs=[chatbot, msg_input]
        )
        send_button.click(
            chat_with_assistant,
            inputs=[msg_input, chatbot],
            outputs=[chatbot, msg_input]
        )
        clear_button.click(lambda: [], outputs=chatbot)
    return interface
 def main():
    """Main entry point"""
    print("\n" + "=" * 60)
    print("🚀 DevOps AI Assistant - RAG System")
    print("=" * 60)
    print("Starting Gradio server...")
    print("\nAccess the application at: http://127.0.0.1:7860")
    print("=" * 60 + "\n")
    interface = create_interface()
    interface.launch(
        server_name="0.0.0.0",
        server_port=7860,
        share=False,
        show_error=True,
        show_api=False
    )
 if __name__ == "__main__":
    main()
--- a/week5/community-contributions/salah/devops-ai-assistance/devops_ai_assistance.py
+++ b/week5/community-contributions/salah/devops-ai-assistance/devops_ai_assistance.py
@@ -0,0 +1,207 @@
 import os
 from pathlib import Path
 from typing import List, Optional
 import json
 import tempfile
 import shutil
 from langchain_core.documents import Document
 from langchain_community.document_loaders import DirectoryLoader, TextLoader
 from langchain_text_splitters import RecursiveCharacterTextSplitter
 from langchain_huggingface import HuggingFaceEmbeddings
 from langchain_community.vectorstores import Chroma
 from langchain_openai import ChatOpenAI
 from langchain_classic.memory import ConversationBufferMemory
 from langchain_classic.chains import ConversationalRetrievalChain
 class DevOpsKnowledgeBase:
    def __init__(self, knowledge_base_path: str, embedding_model: str = "all-MiniLM-L6-v2"):
        self.knowledge_base_path = Path(knowledge_base_path)
        self.embedding_model_name = embedding_model
        self.embedding_model = None
        self.vectorstore = None
        self.documents = []
        self.chunks = []
        self.temp_db_dir = None
    def load_documents(self) -> List[Document]:
        self.documents = []
        if not self.knowledge_base_path.exists():
            raise ValueError(f"Knowledge base path does not exist: {self.knowledge_base_path}")
        supported_extensions = {'.yaml', '.yml', '.md', '.txt', '.json'}
        print(f"Loading documents from {self.knowledge_base_path}...")
        for file_path in self.knowledge_base_path.rglob("*"):
            if file_path.is_file() and file_path.suffix.lower() in supported_extensions:
                try:
                    with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
                        content = f.read().strip()
                    if content and len(content) > 50:
                        relative_path = file_path.relative_to(self.knowledge_base_path)
                        doc = Document(
                            page_content=content,
                            metadata={
                                "source": str(relative_path),
                                "file_type": file_path.suffix.lower(),
                                "path": str(file_path)
                            }
                        )
                        self.documents.append(doc)
                except Exception as e:
                    print(f"Skipped {file_path.name}: {str(e)}")
        print(f"Loaded {len(self.documents)} documents")
        return self.documents
    def chunk_documents(self, chunk_size: int = 1000, chunk_overlap: int = 200) -> List[Document]:
        if not self.documents:
            raise ValueError("No documents loaded. Call load_documents() first.")
        print(f"Splitting {len(self.documents)} documents into chunks...")
        text_splitter = RecursiveCharacterTextSplitter(
            chunk_size=chunk_size,
            chunk_overlap=chunk_overlap,
            separators=["\n\n", "\n", " ", ""]
        )
        self.chunks = text_splitter.split_documents(self.documents)
        print(f"Created {len(self.chunks)} chunks")
        return self.chunks
    def initialize_embedding_model(self):
        print(f"Initializing embedding model: {self.embedding_model_name}...")
        self.embedding_model = HuggingFaceEmbeddings(model_name=self.embedding_model_name)
        print("Embedding model initialized")
    def create_vectorstore(self) -> Chroma:
        if not self.chunks:
            raise ValueError("No chunks available. Call chunk_documents() first.")
        if not self.embedding_model:
            raise ValueError("Embedding model not initialized. Call initialize_embedding_model() first.")
        print("Creating vector store...")
        if self.temp_db_dir:
            try:
                shutil.rmtree(self.temp_db_dir)
            except:
                pass
        self.temp_db_dir = tempfile.mkdtemp(prefix="devops_kb_")
        self.vectorstore = Chroma.from_documents(
            documents=self.chunks,
            embedding=self.embedding_model,
            persist_directory=self.temp_db_dir
        )
        doc_count = self.vectorstore._collection.count()
        print(f"Vector store created with {doc_count} documents")
        return self.vectorstore
    def initialize(self):
        print("Initializing DevOps Knowledge Base...")
        print("=" * 60)
        self.load_documents()
        self.chunk_documents()
        self.initialize_embedding_model()
        self.create_vectorstore()
        print("\nKnowledge base initialized successfully!")
        return self.vectorstore
 class DevOpsAIAssistant:
    def __init__(self, knowledge_base_path: str, embedding_model: str = "all-MiniLM-L6-v2"):
        self.knowledge_base = DevOpsKnowledgeBase(knowledge_base_path, embedding_model)
        self.vectorstore = None
        self.conversation_chain = None
        self.memory = None
        self.llm = None
    def setup(self):
        print("Setting up DevOps AI Assistant...")
        self.vectorstore = self.knowledge_base.initialize()
        api_key = os.getenv('OPENAI_API_KEY')
        if not api_key:
            raise ValueError("OPENAI_API_KEY environment variable not set")
        print("Initializing OpenAI LLM...")
        self.llm = ChatOpenAI(
            model_name="gpt-4o-mini",
            temperature=0.3,
            api_key=api_key
        )
        print("Setting up conversation memory...")
        self.memory = ConversationBufferMemory(
            memory_key="chat_history",
            return_messages=True,
            output_key='answer'
        )
        print("Creating conversation chain...")
        retriever = self.vectorstore.as_retriever(search_kwargs={"k": 5})
        self.conversation_chain = ConversationalRetrievalChain.from_llm(
            llm=self.llm,
            retriever=retriever,
            memory=self.memory,
            return_source_documents=True,
            verbose=False
        )
        print("DevOps AI Assistant ready!")
        return self
    def ask(self, question: str) -> dict:
        if not self.conversation_chain:
            raise ValueError("Assistant not initialized. Call setup() first.")
        result = self.conversation_chain.invoke({"question": question})
        response = {
            "answer": result.get('answer', ''),
            "sources": []
        }
        if result.get('source_documents'):
            for doc in result['source_documents']:
                response["sources"].append({
                    "content": doc.page_content[:300],
                    "source": doc.metadata.get('source', 'Unknown'),
                    "file_type": doc.metadata.get('file_type', 'Unknown')
                })
        return response
    def get_status(self) -> dict:
        if not self.vectorstore:
            return {"status": "not_initialized"}
        doc_count = self.vectorstore._collection.count()
        return {
            "status": "ready",
            "documents_loaded": len(self.knowledge_base.documents),
            "chunks_created": len(self.knowledge_base.chunks),
            "vectors_in_store": doc_count,
            "knowledge_base_path": str(self.knowledge_base.knowledge_base_path)
        }
 def create_assistant(knowledge_base_path: str) -> DevOpsAIAssistant:
    assistant = DevOpsAIAssistant(knowledge_base_path)
    assistant.setup()
    return assistant