Implement GMAIL RAG assistant
This commit is contained in:
223
week5/community-contributions/emmy/gmail_rag/app.py
Normal file
223
week5/community-contributions/emmy/gmail_rag/app.py
Normal file
@@ -0,0 +1,223 @@
|
||||
import os
|
||||
import gradio as gr
|
||||
from dotenv import load_dotenv
|
||||
from langchain_community.vectorstores import Chroma
|
||||
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
|
||||
from langchain.chains import RetrievalQA
|
||||
from langchain.prompts import PromptTemplate
|
||||
|
||||
load_dotenv()
|
||||
|
||||
# ---- Settings ----
|
||||
CHROMA_DIR = "chroma"
|
||||
EMBED_MODEL = "text-embedding-3-small"
|
||||
LLM_MODEL = "gpt-4o-mini"
|
||||
|
||||
# Initialize components
|
||||
embeddings = OpenAIEmbeddings(model=EMBED_MODEL)
|
||||
vectorstore = None
|
||||
qa_chain = None
|
||||
|
||||
|
||||
def initialize_qa_chain():
|
||||
"""Initialize the QA chain with the vector store."""
|
||||
global vectorstore, qa_chain
|
||||
|
||||
if not os.path.exists(CHROMA_DIR):
|
||||
return "❌ ChromaDB not found. Please run ingest_gmail_drive.py first to index your emails."
|
||||
|
||||
try:
|
||||
vectorstore = Chroma(
|
||||
persist_directory=CHROMA_DIR,
|
||||
embedding_function=embeddings
|
||||
)
|
||||
|
||||
# Create custom prompt
|
||||
prompt_template = """Use the following pieces of context from Gmail emails to answer the question.
|
||||
If you don't know the answer based on the context, just say you don't have that information in the emails.
|
||||
|
||||
Context:
|
||||
{context}
|
||||
|
||||
Question: {question}
|
||||
|
||||
Answer:"""
|
||||
|
||||
PROMPT = PromptTemplate(
|
||||
template=prompt_template,
|
||||
input_variables=["context", "question"]
|
||||
)
|
||||
|
||||
llm = ChatOpenAI(model=LLM_MODEL, temperature=0)
|
||||
|
||||
qa_chain = RetrievalQA.from_chain_type(
|
||||
llm=llm,
|
||||
chain_type="stuff",
|
||||
retriever=vectorstore.as_retriever(search_kwargs={"k": 5}),
|
||||
chain_type_kwargs={"prompt": PROMPT},
|
||||
return_source_documents=True
|
||||
)
|
||||
|
||||
return "✓ Ready to answer questions about your emails!"
|
||||
except Exception as e:
|
||||
return f"❌ Error initializing: {str(e)}"
|
||||
|
||||
|
||||
def query_emails(question, num_results=5):
|
||||
"""Query the email database."""
|
||||
if qa_chain is None:
|
||||
return "Please click 'Initialize System' first!", ""
|
||||
|
||||
if not question.strip():
|
||||
return "Please enter a question.", ""
|
||||
|
||||
try:
|
||||
# Get answer
|
||||
result = qa_chain({"query": question})
|
||||
answer = result['result']
|
||||
|
||||
# Format sources
|
||||
sources_text = "\n\n---\n\n**📧 Source Emails:**\n\n"
|
||||
for i, doc in enumerate(result['source_documents'][:num_results], 1):
|
||||
sources_text += f"**Email {i}:**\n"
|
||||
sources_text += f"- **Subject:** {doc.metadata.get('subject', 'N/A')}\n"
|
||||
sources_text += f"- **From:** {doc.metadata.get('from', 'N/A')}\n"
|
||||
sources_text += f"- **Date:** {doc.metadata.get('date', 'N/A')}\n"
|
||||
sources_text += f"- **Preview:** {doc.page_content[:200]}...\n\n"
|
||||
|
||||
return answer, sources_text
|
||||
except Exception as e:
|
||||
return f"❌ Error: {str(e)}", ""
|
||||
|
||||
|
||||
def search_emails(query_text, num_results=5):
|
||||
"""Direct vector similarity search."""
|
||||
if vectorstore is None:
|
||||
return "Please click 'Initialize System' first!"
|
||||
|
||||
if not query_text.strip():
|
||||
return "Please enter a search query."
|
||||
|
||||
try:
|
||||
docs = vectorstore.similarity_search(query_text, k=num_results)
|
||||
|
||||
results_text = f"**Found {len(docs)} relevant emails:**\n\n"
|
||||
for i, doc in enumerate(docs, 1):
|
||||
results_text += f"**Email {i}:**\n"
|
||||
results_text += f"- **Subject:** {doc.metadata.get('subject', 'N/A')}\n"
|
||||
results_text += f"- **From:** {doc.metadata.get('from', 'N/A')}\n"
|
||||
results_text += f"- **Date:** {doc.metadata.get('date', 'N/A')}\n"
|
||||
results_text += f"- **Content Preview:**\n{doc.page_content[:300]}...\n\n"
|
||||
results_text += "---\n\n"
|
||||
|
||||
return results_text
|
||||
except Exception as e:
|
||||
return f"❌ Error: {str(e)}"
|
||||
|
||||
|
||||
# Create Gradio Interface
|
||||
with gr.Blocks(title="Gmail RAG Assistant", theme=gr.themes.Soft()) as demo:
|
||||
gr.Markdown(
|
||||
"""
|
||||
# 📧 Gmail RAG Assistant
|
||||
Ask questions about your emails or search for specific content.
|
||||
"""
|
||||
)
|
||||
|
||||
with gr.Row():
|
||||
init_btn = gr.Button("🚀 Initialize System", variant="primary")
|
||||
status_text = gr.Textbox(label="Status", interactive=False)
|
||||
|
||||
init_btn.click(fn=initialize_qa_chain, outputs=status_text)
|
||||
|
||||
gr.Markdown("---")
|
||||
|
||||
with gr.Tab("💬 Ask Questions"):
|
||||
gr.Markdown("Ask natural language questions about your emails.")
|
||||
|
||||
with gr.Row():
|
||||
with gr.Column(scale=4):
|
||||
question_input = gr.Textbox(
|
||||
label="Your Question",
|
||||
placeholder="What is the latest message from Andela?",
|
||||
lines=2
|
||||
)
|
||||
with gr.Column(scale=1):
|
||||
qa_num_results = gr.Slider(
|
||||
minimum=1,
|
||||
maximum=10,
|
||||
value=5,
|
||||
step=1,
|
||||
label="Sources to Show"
|
||||
)
|
||||
|
||||
qa_btn = gr.Button("Ask Question", variant="primary")
|
||||
|
||||
answer_output = gr.Markdown(label="Answer")
|
||||
sources_output = gr.Markdown(label="Sources")
|
||||
|
||||
qa_btn.click(
|
||||
fn=query_emails,
|
||||
inputs=[question_input, qa_num_results],
|
||||
outputs=[answer_output, sources_output]
|
||||
)
|
||||
|
||||
# Example questions
|
||||
gr.Examples(
|
||||
examples=[
|
||||
["What is the latest message from Andela?"],
|
||||
["Summarize emails about project updates"],
|
||||
["What meetings do I have scheduled?"],
|
||||
["Find emails about invoices or payments"],
|
||||
],
|
||||
inputs=question_input
|
||||
)
|
||||
|
||||
with gr.Tab("🔍 Search Emails"):
|
||||
gr.Markdown("Search for emails using semantic similarity.")
|
||||
|
||||
with gr.Row():
|
||||
with gr.Column(scale=4):
|
||||
search_input = gr.Textbox(
|
||||
label="Search Query",
|
||||
placeholder="project deadline",
|
||||
lines=2
|
||||
)
|
||||
with gr.Column(scale=1):
|
||||
search_num_results = gr.Slider(
|
||||
minimum=1,
|
||||
maximum=10,
|
||||
value=5,
|
||||
step=1,
|
||||
label="Results"
|
||||
)
|
||||
|
||||
search_btn = gr.Button("Search", variant="primary")
|
||||
search_output = gr.Markdown(label="Search Results")
|
||||
|
||||
search_btn.click(
|
||||
fn=search_emails,
|
||||
inputs=[search_input, search_num_results],
|
||||
outputs=search_output
|
||||
)
|
||||
|
||||
gr.Examples(
|
||||
examples=[
|
||||
["Andela"],
|
||||
["meeting schedule"],
|
||||
["invoice payment"],
|
||||
["project status update"],
|
||||
],
|
||||
inputs=search_input
|
||||
)
|
||||
|
||||
gr.Markdown(
|
||||
"""
|
||||
---
|
||||
**Note:** Make sure you've run `ingest_gmail_drive.py` first to index your emails.
|
||||
"""
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
demo.launch()
|
||||
Reference in New Issue
Block a user