sach91 bootcamp week8 exercise
This commit is contained in:
@@ -0,0 +1,82 @@
|
||||
"""
|
||||
Document data models
|
||||
"""
|
||||
from dataclasses import dataclass, field
|
||||
from typing import List, Dict, Optional
|
||||
from datetime import datetime
|
||||
|
||||
@dataclass
|
||||
class DocumentChunk:
|
||||
"""Represents a chunk of a document"""
|
||||
id: str
|
||||
document_id: str
|
||||
content: str
|
||||
chunk_index: int
|
||||
metadata: Dict = field(default_factory=dict)
|
||||
|
||||
def __str__(self):
|
||||
preview = self.content[:100] + "..." if len(self.content) > 100 else self.content
|
||||
return f"Chunk {self.chunk_index}: {preview}"
|
||||
|
||||
@dataclass
|
||||
class Document:
|
||||
"""Represents a complete document"""
|
||||
id: str
|
||||
filename: str
|
||||
filepath: str
|
||||
content: str
|
||||
chunks: List[DocumentChunk]
|
||||
metadata: Dict = field(default_factory=dict)
|
||||
created_at: datetime = field(default_factory=datetime.now)
|
||||
|
||||
@property
|
||||
def num_chunks(self) -> int:
|
||||
return len(self.chunks)
|
||||
|
||||
@property
|
||||
def total_chars(self) -> int:
|
||||
return len(self.content)
|
||||
|
||||
@property
|
||||
def extension(self) -> str:
|
||||
return self.metadata.get('extension', '')
|
||||
|
||||
def __str__(self):
|
||||
return f"Document: {self.filename} ({self.num_chunks} chunks, {self.total_chars} chars)"
|
||||
|
||||
def to_dict(self) -> Dict:
|
||||
"""Convert to dictionary for storage"""
|
||||
return {
|
||||
'id': self.id,
|
||||
'filename': self.filename,
|
||||
'filepath': self.filepath,
|
||||
'content': self.content[:500] + '...' if len(self.content) > 500 else self.content,
|
||||
'num_chunks': self.num_chunks,
|
||||
'total_chars': self.total_chars,
|
||||
'extension': self.extension,
|
||||
'created_at': self.created_at.isoformat(),
|
||||
'metadata': self.metadata
|
||||
}
|
||||
|
||||
@dataclass
|
||||
class SearchResult:
|
||||
"""Represents a search result from the vector database"""
|
||||
chunk: DocumentChunk
|
||||
score: float
|
||||
document_id: str
|
||||
document_name: str
|
||||
|
||||
def __str__(self):
|
||||
return f"{self.document_name} (score: {self.score:.2f})"
|
||||
|
||||
@dataclass
|
||||
class Summary:
|
||||
"""Represents a document summary"""
|
||||
document_id: str
|
||||
document_name: str
|
||||
summary_text: str
|
||||
key_points: List[str] = field(default_factory=list)
|
||||
created_at: datetime = field(default_factory=datetime.now)
|
||||
|
||||
def __str__(self):
|
||||
return f"Summary of {self.document_name}: {self.summary_text[:100]}..."
|
||||
Reference in New Issue
Block a user