sach91 bootcamp week8 exercise

This commit is contained in:
sach91
2025-10-30 15:42:04 +05:30
parent 3fa7a3dad5
commit ef48ed539d
20 changed files with 3124 additions and 0 deletions

View File

@@ -0,0 +1,13 @@
"""
models
"""
from .knowledge_graph import KnowledgeGraph
from .document import Document, DocumentChunk, SearchResult, Summary
__all__ = [
'KnowledgeGraph',
'Document',
'DocumentChunk',
'SearchResult',
'Summary'
]

View File

@@ -0,0 +1,82 @@
"""
Document data models
"""
from dataclasses import dataclass, field
from typing import List, Dict, Optional
from datetime import datetime
@dataclass
class DocumentChunk:
"""Represents a chunk of a document"""
id: str
document_id: str
content: str
chunk_index: int
metadata: Dict = field(default_factory=dict)
def __str__(self):
preview = self.content[:100] + "..." if len(self.content) > 100 else self.content
return f"Chunk {self.chunk_index}: {preview}"
@dataclass
class Document:
"""Represents a complete document"""
id: str
filename: str
filepath: str
content: str
chunks: List[DocumentChunk]
metadata: Dict = field(default_factory=dict)
created_at: datetime = field(default_factory=datetime.now)
@property
def num_chunks(self) -> int:
return len(self.chunks)
@property
def total_chars(self) -> int:
return len(self.content)
@property
def extension(self) -> str:
return self.metadata.get('extension', '')
def __str__(self):
return f"Document: {self.filename} ({self.num_chunks} chunks, {self.total_chars} chars)"
def to_dict(self) -> Dict:
"""Convert to dictionary for storage"""
return {
'id': self.id,
'filename': self.filename,
'filepath': self.filepath,
'content': self.content[:500] + '...' if len(self.content) > 500 else self.content,
'num_chunks': self.num_chunks,
'total_chars': self.total_chars,
'extension': self.extension,
'created_at': self.created_at.isoformat(),
'metadata': self.metadata
}
@dataclass
class SearchResult:
"""Represents a search result from the vector database"""
chunk: DocumentChunk
score: float
document_id: str
document_name: str
def __str__(self):
return f"{self.document_name} (score: {self.score:.2f})"
@dataclass
class Summary:
"""Represents a document summary"""
document_id: str
document_name: str
summary_text: str
key_points: List[str] = field(default_factory=list)
created_at: datetime = field(default_factory=datetime.now)
def __str__(self):
return f"Summary of {self.document_name}: {self.summary_text[:100]}..."

View File

@@ -0,0 +1,110 @@
"""
Knowledge Graph data models
"""
from dataclasses import dataclass, field
from typing import List, Dict, Set
from datetime import datetime
@dataclass
class KnowledgeNode:
"""Represents a concept or entity in the knowledge graph"""
id: str
name: str
node_type: str # 'document', 'concept', 'entity', 'topic'
description: str = ""
metadata: Dict = field(default_factory=dict)
created_at: datetime = field(default_factory=datetime.now)
def __str__(self):
return f"{self.node_type.capitalize()}: {self.name}"
@dataclass
class KnowledgeEdge:
"""Represents a relationship between nodes"""
source_id: str
target_id: str
relationship: str # 'related_to', 'cites', 'contains', 'similar_to'
weight: float = 1.0
metadata: Dict = field(default_factory=dict)
def __str__(self):
return f"{self.source_id} --[{self.relationship}]--> {self.target_id}"
@dataclass
class KnowledgeGraph:
"""Represents the complete knowledge graph"""
nodes: Dict[str, KnowledgeNode] = field(default_factory=dict)
edges: List[KnowledgeEdge] = field(default_factory=list)
def add_node(self, node: KnowledgeNode):
"""Add a node to the graph"""
self.nodes[node.id] = node
def add_edge(self, edge: KnowledgeEdge):
"""Add an edge to the graph"""
if edge.source_id in self.nodes and edge.target_id in self.nodes:
self.edges.append(edge)
def get_neighbors(self, node_id: str) -> List[str]:
"""Get all nodes connected to a given node"""
neighbors = set()
for edge in self.edges:
if edge.source_id == node_id:
neighbors.add(edge.target_id)
elif edge.target_id == node_id:
neighbors.add(edge.source_id)
return list(neighbors)
def get_related_documents(self, node_id: str, max_depth: int = 2) -> Set[str]:
"""Get all documents related to a node within max_depth hops"""
related = set()
visited = set()
queue = [(node_id, 0)]
while queue:
current_id, depth = queue.pop(0)
if current_id in visited or depth > max_depth:
continue
visited.add(current_id)
# If this is a document node, add it
if current_id in self.nodes and self.nodes[current_id].node_type == 'document':
related.add(current_id)
# Add neighbors to queue
if depth < max_depth:
for neighbor_id in self.get_neighbors(current_id):
if neighbor_id not in visited:
queue.append((neighbor_id, depth + 1))
return related
def to_networkx(self):
"""Convert to NetworkX graph for visualization"""
try:
import networkx as nx
G = nx.Graph()
# Add nodes
for node_id, node in self.nodes.items():
G.add_node(node_id,
name=node.name,
type=node.node_type,
description=node.description)
# Add edges
for edge in self.edges:
G.add_edge(edge.source_id, edge.target_id,
relationship=edge.relationship,
weight=edge.weight)
return G
except ImportError:
return None
def __str__(self):
return f"KnowledgeGraph: {len(self.nodes)} nodes, {len(self.edges)} edges"