Add DevOps AI Assistant with Gradio interface and knowledge base functionality
This commit is contained in:
196
week5/community-contributions/salah/devops-ai-assistance/app.py
Normal file
196
week5/community-contributions/salah/devops-ai-assistance/app.py
Normal file
@@ -0,0 +1,196 @@
|
|||||||
|
import os
|
||||||
|
import gradio as gr
|
||||||
|
from devops_ai_assistance import create_assistant, DevOpsAIAssistant
|
||||||
|
|
||||||
|
|
||||||
|
assistant = None
|
||||||
|
status_info = None
|
||||||
|
|
||||||
|
|
||||||
|
def initialize_assistant(kb_path: str):
|
||||||
|
"""Initialize the assistant with knowledge base"""
|
||||||
|
global assistant, status_info
|
||||||
|
|
||||||
|
try:
|
||||||
|
kb_path = kb_path.strip()
|
||||||
|
if not kb_path:
|
||||||
|
return "Error: Please provide a valid knowledge base path"
|
||||||
|
|
||||||
|
print(f"\n🚀 Initializing with knowledge base: {kb_path}")
|
||||||
|
assistant = create_assistant(kb_path)
|
||||||
|
status_info = assistant.get_status()
|
||||||
|
|
||||||
|
status_message = f"""
|
||||||
|
✅ **DevOps AI Assistant Initialized Successfully!**
|
||||||
|
|
||||||
|
📊 **Knowledge Base Statistics:**
|
||||||
|
- Documents Loaded: {status_info['documents_loaded']}
|
||||||
|
- Chunks Created: {status_info['chunks_created']}
|
||||||
|
- Vectors in Store: {status_info['vectors_in_store']}
|
||||||
|
- Knowledge Base Path: {status_info['knowledge_base_path']}
|
||||||
|
|
||||||
|
🎯 **Ready to Answer Questions About:**
|
||||||
|
- Kubernetes infrastructure configuration
|
||||||
|
- ArgoCD deployment manifests
|
||||||
|
- Helm charts and values
|
||||||
|
- Infrastructure as Code (IaC)
|
||||||
|
- DevOps best practices in your environment
|
||||||
|
|
||||||
|
Start by asking questions about your k8s cluster infrastructure!
|
||||||
|
"""
|
||||||
|
return status_message
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
error_msg = f"Error initializing assistant: {str(e)}"
|
||||||
|
print(f"❌ {error_msg}")
|
||||||
|
return f"❌ {error_msg}"
|
||||||
|
|
||||||
|
|
||||||
|
def chat_with_assistant(message: str, history):
|
||||||
|
"""Chat function for the assistant"""
|
||||||
|
global assistant
|
||||||
|
|
||||||
|
if not assistant:
|
||||||
|
bot_response = "❌ Assistant not initialized. Please provide a knowledge base path first."
|
||||||
|
history.append((message, bot_response))
|
||||||
|
return history, ""
|
||||||
|
|
||||||
|
if not message.strip():
|
||||||
|
bot_response = "Please enter a question about your DevOps infrastructure."
|
||||||
|
history.append((message, bot_response))
|
||||||
|
return history, ""
|
||||||
|
|
||||||
|
try:
|
||||||
|
result = assistant.ask(message)
|
||||||
|
answer = result.get('answer', '')
|
||||||
|
|
||||||
|
sources_text = ""
|
||||||
|
if result.get('sources'):
|
||||||
|
sources_text = "\n\n📚 **Sources:**\n"
|
||||||
|
for i, source in enumerate(result['sources'], 1):
|
||||||
|
source_file = source.get('source', 'Unknown')
|
||||||
|
file_type = source.get('file_type', 'Unknown')
|
||||||
|
sources_text += f"\n{i}. **{source_file}** ({file_type})"
|
||||||
|
|
||||||
|
bot_response = answer + sources_text if sources_text else answer
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
bot_response = f"Error processing question: {str(e)}"
|
||||||
|
|
||||||
|
history.append((message, bot_response))
|
||||||
|
return history, ""
|
||||||
|
|
||||||
|
|
||||||
|
def create_interface():
|
||||||
|
"""Create the Gradio interface"""
|
||||||
|
global assistant
|
||||||
|
|
||||||
|
with gr.Blocks(title="DevOps AI Assistant") as interface:
|
||||||
|
|
||||||
|
gr.Markdown("# 🤖 DevOps AI Assistant")
|
||||||
|
gr.Markdown("Intelligent Q&A system for your Kubernetes infrastructure powered by RAG and LLM")
|
||||||
|
|
||||||
|
gr.Markdown("## 🔧 Configuration")
|
||||||
|
gr.Markdown("Enter the path to your GitOps repository (knowledge base) to initialize the assistant")
|
||||||
|
|
||||||
|
with gr.Row():
|
||||||
|
kb_path_input = gr.Textbox(
|
||||||
|
label="Knowledge Base Path",
|
||||||
|
placeholder="/workspace/aau/repositories/infra-gitops/",
|
||||||
|
lines=1,
|
||||||
|
value="/workspace/aau/repositories/infra-gitops/"
|
||||||
|
)
|
||||||
|
init_button = gr.Button("🚀 Initialize Assistant")
|
||||||
|
|
||||||
|
status_output = gr.Markdown(value="⏳ Waiting for initialization...")
|
||||||
|
|
||||||
|
gr.Markdown("## 💬 Chat Interface")
|
||||||
|
|
||||||
|
chatbot = gr.Chatbot(
|
||||||
|
label="Conversation",
|
||||||
|
height=500,
|
||||||
|
show_copy_button=True,
|
||||||
|
avatar_images=("👤", "🤖"),
|
||||||
|
bubble_full_width=False
|
||||||
|
)
|
||||||
|
|
||||||
|
with gr.Row():
|
||||||
|
msg_input = gr.Textbox(
|
||||||
|
label="Your Question",
|
||||||
|
placeholder="Ask about your k8s infrastructure, ArgoCD, Helm charts, etc...",
|
||||||
|
lines=2,
|
||||||
|
scale=5
|
||||||
|
)
|
||||||
|
send_button = gr.Button("Send 💬", scale=1)
|
||||||
|
|
||||||
|
with gr.Row():
|
||||||
|
clear_button = gr.Button("🗑️ Clear Chat", scale=2)
|
||||||
|
|
||||||
|
with gr.Accordion("📋 Example Questions", open=False):
|
||||||
|
gr.Markdown("""
|
||||||
|
**Infrastructure & Deployment:**
|
||||||
|
- How is the Kubernetes cluster configured?
|
||||||
|
- What ArgoCD applications are deployed?
|
||||||
|
- Show me the Helm chart values for nginx
|
||||||
|
- What storage solutions are available?
|
||||||
|
|
||||||
|
**Monitoring & Observability:**
|
||||||
|
- How is Prometheus configured?
|
||||||
|
- What monitoring exporters are installed?
|
||||||
|
- Tell me about the metrics server setup
|
||||||
|
|
||||||
|
**Security & Access:**
|
||||||
|
- How are RBAC policies configured?
|
||||||
|
- What authentication methods are used?
|
||||||
|
- Explain the network policies
|
||||||
|
|
||||||
|
**DevOps Practices:**
|
||||||
|
- What is the deployment pipeline?
|
||||||
|
- How are secrets managed?
|
||||||
|
- Show me the backup strategy
|
||||||
|
""")
|
||||||
|
|
||||||
|
init_button.click(
|
||||||
|
initialize_assistant,
|
||||||
|
inputs=[kb_path_input],
|
||||||
|
outputs=[status_output]
|
||||||
|
)
|
||||||
|
|
||||||
|
msg_input.submit(
|
||||||
|
chat_with_assistant,
|
||||||
|
inputs=[msg_input, chatbot],
|
||||||
|
outputs=[chatbot, msg_input]
|
||||||
|
)
|
||||||
|
|
||||||
|
send_button.click(
|
||||||
|
chat_with_assistant,
|
||||||
|
inputs=[msg_input, chatbot],
|
||||||
|
outputs=[chatbot, msg_input]
|
||||||
|
)
|
||||||
|
|
||||||
|
clear_button.click(lambda: [], outputs=chatbot)
|
||||||
|
|
||||||
|
return interface
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
"""Main entry point"""
|
||||||
|
print("\n" + "=" * 60)
|
||||||
|
print("🚀 DevOps AI Assistant - RAG System")
|
||||||
|
print("=" * 60)
|
||||||
|
print("Starting Gradio server...")
|
||||||
|
print("\nAccess the application at: http://127.0.0.1:7860")
|
||||||
|
print("=" * 60 + "\n")
|
||||||
|
|
||||||
|
interface = create_interface()
|
||||||
|
interface.launch(
|
||||||
|
server_name="0.0.0.0",
|
||||||
|
server_port=7860,
|
||||||
|
share=False,
|
||||||
|
show_error=True,
|
||||||
|
show_api=False
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
@@ -0,0 +1,207 @@
|
|||||||
|
import os
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import List, Optional
|
||||||
|
import json
|
||||||
|
import tempfile
|
||||||
|
import shutil
|
||||||
|
|
||||||
|
from langchain_core.documents import Document
|
||||||
|
from langchain_community.document_loaders import DirectoryLoader, TextLoader
|
||||||
|
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
||||||
|
from langchain_huggingface import HuggingFaceEmbeddings
|
||||||
|
from langchain_community.vectorstores import Chroma
|
||||||
|
from langchain_openai import ChatOpenAI
|
||||||
|
from langchain_classic.memory import ConversationBufferMemory
|
||||||
|
from langchain_classic.chains import ConversationalRetrievalChain
|
||||||
|
|
||||||
|
|
||||||
|
class DevOpsKnowledgeBase:
|
||||||
|
def __init__(self, knowledge_base_path: str, embedding_model: str = "all-MiniLM-L6-v2"):
|
||||||
|
self.knowledge_base_path = Path(knowledge_base_path)
|
||||||
|
self.embedding_model_name = embedding_model
|
||||||
|
self.embedding_model = None
|
||||||
|
self.vectorstore = None
|
||||||
|
self.documents = []
|
||||||
|
self.chunks = []
|
||||||
|
self.temp_db_dir = None
|
||||||
|
|
||||||
|
def load_documents(self) -> List[Document]:
|
||||||
|
self.documents = []
|
||||||
|
|
||||||
|
if not self.knowledge_base_path.exists():
|
||||||
|
raise ValueError(f"Knowledge base path does not exist: {self.knowledge_base_path}")
|
||||||
|
|
||||||
|
supported_extensions = {'.yaml', '.yml', '.md', '.txt', '.json'}
|
||||||
|
|
||||||
|
print(f"Loading documents from {self.knowledge_base_path}...")
|
||||||
|
|
||||||
|
for file_path in self.knowledge_base_path.rglob("*"):
|
||||||
|
if file_path.is_file() and file_path.suffix.lower() in supported_extensions:
|
||||||
|
try:
|
||||||
|
with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
|
||||||
|
content = f.read().strip()
|
||||||
|
|
||||||
|
if content and len(content) > 50:
|
||||||
|
relative_path = file_path.relative_to(self.knowledge_base_path)
|
||||||
|
doc = Document(
|
||||||
|
page_content=content,
|
||||||
|
metadata={
|
||||||
|
"source": str(relative_path),
|
||||||
|
"file_type": file_path.suffix.lower(),
|
||||||
|
"path": str(file_path)
|
||||||
|
}
|
||||||
|
)
|
||||||
|
self.documents.append(doc)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Skipped {file_path.name}: {str(e)}")
|
||||||
|
|
||||||
|
print(f"Loaded {len(self.documents)} documents")
|
||||||
|
return self.documents
|
||||||
|
|
||||||
|
def chunk_documents(self, chunk_size: int = 1000, chunk_overlap: int = 200) -> List[Document]:
|
||||||
|
if not self.documents:
|
||||||
|
raise ValueError("No documents loaded. Call load_documents() first.")
|
||||||
|
|
||||||
|
print(f"Splitting {len(self.documents)} documents into chunks...")
|
||||||
|
|
||||||
|
text_splitter = RecursiveCharacterTextSplitter(
|
||||||
|
chunk_size=chunk_size,
|
||||||
|
chunk_overlap=chunk_overlap,
|
||||||
|
separators=["\n\n", "\n", " ", ""]
|
||||||
|
)
|
||||||
|
|
||||||
|
self.chunks = text_splitter.split_documents(self.documents)
|
||||||
|
print(f"Created {len(self.chunks)} chunks")
|
||||||
|
return self.chunks
|
||||||
|
|
||||||
|
def initialize_embedding_model(self):
|
||||||
|
print(f"Initializing embedding model: {self.embedding_model_name}...")
|
||||||
|
self.embedding_model = HuggingFaceEmbeddings(model_name=self.embedding_model_name)
|
||||||
|
print("Embedding model initialized")
|
||||||
|
|
||||||
|
def create_vectorstore(self) -> Chroma:
|
||||||
|
if not self.chunks:
|
||||||
|
raise ValueError("No chunks available. Call chunk_documents() first.")
|
||||||
|
|
||||||
|
if not self.embedding_model:
|
||||||
|
raise ValueError("Embedding model not initialized. Call initialize_embedding_model() first.")
|
||||||
|
|
||||||
|
print("Creating vector store...")
|
||||||
|
|
||||||
|
if self.temp_db_dir:
|
||||||
|
try:
|
||||||
|
shutil.rmtree(self.temp_db_dir)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
self.temp_db_dir = tempfile.mkdtemp(prefix="devops_kb_")
|
||||||
|
|
||||||
|
self.vectorstore = Chroma.from_documents(
|
||||||
|
documents=self.chunks,
|
||||||
|
embedding=self.embedding_model,
|
||||||
|
persist_directory=self.temp_db_dir
|
||||||
|
)
|
||||||
|
|
||||||
|
doc_count = self.vectorstore._collection.count()
|
||||||
|
print(f"Vector store created with {doc_count} documents")
|
||||||
|
return self.vectorstore
|
||||||
|
|
||||||
|
def initialize(self):
|
||||||
|
print("Initializing DevOps Knowledge Base...")
|
||||||
|
print("=" * 60)
|
||||||
|
|
||||||
|
self.load_documents()
|
||||||
|
self.chunk_documents()
|
||||||
|
self.initialize_embedding_model()
|
||||||
|
self.create_vectorstore()
|
||||||
|
|
||||||
|
print("\nKnowledge base initialized successfully!")
|
||||||
|
return self.vectorstore
|
||||||
|
|
||||||
|
|
||||||
|
class DevOpsAIAssistant:
|
||||||
|
def __init__(self, knowledge_base_path: str, embedding_model: str = "all-MiniLM-L6-v2"):
|
||||||
|
self.knowledge_base = DevOpsKnowledgeBase(knowledge_base_path, embedding_model)
|
||||||
|
self.vectorstore = None
|
||||||
|
self.conversation_chain = None
|
||||||
|
self.memory = None
|
||||||
|
self.llm = None
|
||||||
|
|
||||||
|
def setup(self):
|
||||||
|
print("Setting up DevOps AI Assistant...")
|
||||||
|
|
||||||
|
self.vectorstore = self.knowledge_base.initialize()
|
||||||
|
|
||||||
|
api_key = os.getenv('OPENAI_API_KEY')
|
||||||
|
if not api_key:
|
||||||
|
raise ValueError("OPENAI_API_KEY environment variable not set")
|
||||||
|
|
||||||
|
print("Initializing OpenAI LLM...")
|
||||||
|
self.llm = ChatOpenAI(
|
||||||
|
model_name="gpt-4o-mini",
|
||||||
|
temperature=0.3,
|
||||||
|
api_key=api_key
|
||||||
|
)
|
||||||
|
|
||||||
|
print("Setting up conversation memory...")
|
||||||
|
self.memory = ConversationBufferMemory(
|
||||||
|
memory_key="chat_history",
|
||||||
|
return_messages=True,
|
||||||
|
output_key='answer'
|
||||||
|
)
|
||||||
|
|
||||||
|
print("Creating conversation chain...")
|
||||||
|
retriever = self.vectorstore.as_retriever(search_kwargs={"k": 5})
|
||||||
|
|
||||||
|
self.conversation_chain = ConversationalRetrievalChain.from_llm(
|
||||||
|
llm=self.llm,
|
||||||
|
retriever=retriever,
|
||||||
|
memory=self.memory,
|
||||||
|
return_source_documents=True,
|
||||||
|
verbose=False
|
||||||
|
)
|
||||||
|
|
||||||
|
print("DevOps AI Assistant ready!")
|
||||||
|
return self
|
||||||
|
|
||||||
|
def ask(self, question: str) -> dict:
|
||||||
|
if not self.conversation_chain:
|
||||||
|
raise ValueError("Assistant not initialized. Call setup() first.")
|
||||||
|
|
||||||
|
result = self.conversation_chain.invoke({"question": question})
|
||||||
|
|
||||||
|
response = {
|
||||||
|
"answer": result.get('answer', ''),
|
||||||
|
"sources": []
|
||||||
|
}
|
||||||
|
|
||||||
|
if result.get('source_documents'):
|
||||||
|
for doc in result['source_documents']:
|
||||||
|
response["sources"].append({
|
||||||
|
"content": doc.page_content[:300],
|
||||||
|
"source": doc.metadata.get('source', 'Unknown'),
|
||||||
|
"file_type": doc.metadata.get('file_type', 'Unknown')
|
||||||
|
})
|
||||||
|
|
||||||
|
return response
|
||||||
|
|
||||||
|
def get_status(self) -> dict:
|
||||||
|
if not self.vectorstore:
|
||||||
|
return {"status": "not_initialized"}
|
||||||
|
|
||||||
|
doc_count = self.vectorstore._collection.count()
|
||||||
|
|
||||||
|
return {
|
||||||
|
"status": "ready",
|
||||||
|
"documents_loaded": len(self.knowledge_base.documents),
|
||||||
|
"chunks_created": len(self.knowledge_base.chunks),
|
||||||
|
"vectors_in_store": doc_count,
|
||||||
|
"knowledge_base_path": str(self.knowledge_base.knowledge_base_path)
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def create_assistant(knowledge_base_path: str) -> DevOpsAIAssistant:
|
||||||
|
assistant = DevOpsAIAssistant(knowledge_base_path)
|
||||||
|
assistant.setup()
|
||||||
|
return assistant
|
||||||
Reference in New Issue
Block a user