RAG API¶
This section provides detailed API documentation for the RAG (Retrieval Augmented Generation) components in Scoras.
Document Management¶
Document¶
class Document:
def __init__(
self,
content: str,
metadata: Optional[Dict[str, Any]] = None,
id: Optional[str] = None
):
"""
Initialize a Document for use in RAG systems.
Args:
content: Text content of the document
metadata: Optional metadata for the document
id: Optional unique identifier for the document
"""
def to_dict(self) -> Dict[str, Any]:
"""
Convert the document to a dictionary.
Returns:
Dictionary representation of the document
"""
@classmethod
def from_dict(cls, data: Dict[str, Any]) -> "Document":
"""
Create a document from a dictionary.
Args:
data: Dictionary containing document data
Returns:
Document instance
"""
@classmethod
def from_text_file(cls, file_path: str, metadata: Optional[Dict[str, Any]] = None) -> "Document":
"""
Create a document from a text file.
Args:
file_path: Path to the text file
metadata: Optional metadata for the document
Returns:
Document instance
"""
DocumentCollection¶
class DocumentCollection:
def __init__(
self,
documents: Optional[List[Document]] = None,
name: Optional[str] = None
):
"""
Initialize a DocumentCollection for managing multiple documents.
Args:
documents: Optional list of initial documents
name: Optional name for the collection
"""
def add_document(self, document: Document) -> None:
"""
Add a document to the collection.
Args:
document: Document to add
"""
def add_documents(self, documents: List[Document]) -> None:
"""
Add multiple documents to the collection.
Args:
documents: List of documents to add
"""
def get_document(self, id: str) -> Optional[Document]:
"""
Get a document by ID.
Args:
id: ID of the document to get
Returns:
Document if found, None otherwise
"""
def remove_document(self, id: str) -> bool:
"""
Remove a document by ID.
Args:
id: ID of the document to remove
Returns:
True if document was removed, False otherwise
"""
def clear(self) -> None:
"""Clear all documents from the collection."""
def save(self, file_path: str) -> None:
"""
Save the document collection to a file.
Args:
file_path: Path to save the collection to
"""
@classmethod
def load(cls, file_path: str) -> "DocumentCollection":
"""
Load a document collection from a file.
Args:
file_path: Path to load the collection from
Returns:
DocumentCollection instance
"""
Retrievers¶
BaseRetriever¶
class BaseRetriever:
def __init__(
self,
documents: List[Document],
enable_scoring: bool = True
):
"""
Initialize a base retriever.
Args:
documents: List of documents to retrieve from
enable_scoring: Whether to track complexity scoring
"""
async def retrieve(
self,
query: str,
top_k: int = 3
) -> List[Document]:
"""
Retrieve documents based on a query asynchronously.
Args:
query: Query to retrieve documents for
top_k: Number of documents to retrieve
Returns:
List of retrieved documents
"""
def retrieve_sync(
self,
query: str,
top_k: int = 3
) -> List[Document]:
"""
Retrieve documents based on a query synchronously.
Args:
query: Query to retrieve documents for
top_k: Number of documents to retrieve
Returns:
List of retrieved documents
"""
def add_document(self, document: Document) -> None:
"""
Add a document to the retriever.
Args:
document: Document to add
"""
def add_documents(self, documents: List[Document]) -> None:
"""
Add multiple documents to the retriever.
Args:
documents: List of documents to add
"""
def get_complexity_score(self) -> Dict[str, Any]:
"""
Get the complexity score for the retriever.
Returns:
Dictionary containing complexity score information
"""
SemanticRetriever¶
class SemanticRetriever(BaseRetriever):
def __init__(
self,
documents: List[Document],
embedding_model: str = "openai:text-embedding-3-small",
enable_scoring: bool = True
):
"""
Initialize a semantic retriever that uses embeddings for retrieval.
Args:
documents: List of documents to retrieve from
embedding_model: Model to use for embeddings
enable_scoring: Whether to track complexity scoring
"""
KeywordRetriever¶
class KeywordRetriever(BaseRetriever):
def __init__(
self,
documents: List[Document],
use_stemming: bool = True,
enable_scoring: bool = True
):
"""
Initialize a keyword retriever that uses keyword matching for retrieval.
Args:
documents: List of documents to retrieve from
use_stemming: Whether to use stemming for keyword matching
enable_scoring: Whether to track complexity scoring
"""
HybridRetriever¶
class HybridRetriever(BaseRetriever):
def __init__(
self,
documents: List[Document],
embedding_model: str = "openai:text-embedding-3-small",
semantic_weight: float = 0.7,
enable_scoring: bool = True
):
"""
Initialize a hybrid retriever that combines semantic and keyword retrieval.
Args:
documents: List of documents to retrieve from
embedding_model: Model to use for embeddings
semantic_weight: Weight to give to semantic retrieval (0.0-1.0)
enable_scoring: Whether to track complexity scoring
"""
ContextualRetriever¶
class ContextualRetriever(BaseRetriever):
def __init__(
self,
documents: List[Document],
embedding_model: str = "openai:text-embedding-3-small",
context_window_size: int = 3,
enable_scoring: bool = True
):
"""
Initialize a contextual retriever that adapts to conversation context.
Args:
documents: List of documents to retrieve from
embedding_model: Model to use for embeddings
context_window_size: Number of previous exchanges to consider as context
enable_scoring: Whether to track complexity scoring
"""
def add_context(self, message: str, role: str = "user") -> None:
"""
Add a message to the conversation context.
Args:
message: Message to add
role: Role of the message sender (e.g., "user", "assistant")
"""
def clear_context(self) -> None:
"""Clear the conversation context."""
RAG Systems¶
SimpleRAG¶
class SimpleRAG:
def __init__(
self,
agent: Agent,
documents: List[Document],
retriever_type: str = "semantic",
top_k: int = 3,
enable_scoring: bool = True
):
"""
Initialize a simple RAG system.
Args:
agent: Agent to use for generation
documents: List of documents for retrieval
retriever_type: Type of retriever to use ("semantic", "keyword", "hybrid")
top_k: Number of documents to retrieve
enable_scoring: Whether to track complexity scoring
"""
async def run(self, query: str) -> str:
"""
Run the RAG system asynchronously.
Args:
query: Query to process
Returns:
Generated response
"""
def run_sync(self, query: str) -> str:
"""
Run the RAG system synchronously.
Args:
query: Query to process
Returns:
Generated response
"""
async def stream(self, query: str) -> AsyncIterator[str]:
"""
Stream the RAG system's response asynchronously.
Args:
query: Query to process
Yields:
Chunks of the generated response
"""
def add_document(self, document: Document) -> None:
"""
Add a document to the RAG system.
Args:
document: Document to add
"""
def add_documents(self, documents: List[Document]) -> None:
"""
Add multiple documents to the RAG system.
Args:
documents: List of documents to add
"""
def get_complexity_score(self) -> Dict[str, Any]:
"""
Get the complexity score for the RAG system.
Returns:
Dictionary containing complexity score information
"""
SemanticRAG¶
class SemanticRAG(SimpleRAG):
def __init__(
self,
agent: Agent,
documents: List[Document],
embedding_model: str = "openai:text-embedding-3-small",
top_k: int = 3,
enable_scoring: bool = True
):
"""
Initialize a semantic RAG system with advanced embedding features.
Args:
agent: Agent to use for generation
documents: List of documents for retrieval
embedding_model: Model to use for embeddings
top_k: Number of documents to retrieve
enable_scoring: Whether to track complexity scoring
"""
ContextualRAG¶
class ContextualRAG(SimpleRAG):
def __init__(
self,
agent: Agent,
documents: List[Document],
embedding_model: str = "openai:text-embedding-3-small",
context_window_size: int = 3,
top_k: int = 3,
enable_scoring: bool = True
):
"""
Initialize a contextual RAG system that adapts to conversation context.
Args:
agent: Agent to use for generation
documents: List of documents for retrieval
embedding_model: Model to use for embeddings
context_window_size: Number of previous exchanges to consider as context
top_k: Number of documents to retrieve
enable_scoring: Whether to track complexity scoring
"""
RAG Factories¶
create_rag_system¶
def create_rag_system(
agent: Agent,
documents: List[Document],
rag_type: str = "simple",
**kwargs
) -> Union[SimpleRAG, SemanticRAG, ContextualRAG]:
"""
Factory function to create different types of RAG systems.
Args:
agent: Agent to use for generation
documents: List of documents for retrieval
rag_type: Type of RAG system to create ("simple", "semantic", "contextual")
**kwargs: Additional arguments specific to the RAG type
Returns:
Instantiated RAG system of the specified type
"""