RAG API¶

This section provides detailed API documentation for the RAG (Retrieval Augmented Generation) components in Scoras.

Document Management¶

Document¶

class Document:
    def __init__(
        self,
        content: str,
        metadata: Optional[Dict[str, Any]] = None,
        id: Optional[str] = None
    ):
        """
        Initialize a Document for use in RAG systems.

        Args:
            content: Text content of the document
            metadata: Optional metadata for the document
            id: Optional unique identifier for the document
        """

    def to_dict(self) -> Dict[str, Any]:
        """
        Convert the document to a dictionary.

        Returns:
            Dictionary representation of the document
        """

    @classmethod
    def from_dict(cls, data: Dict[str, Any]) -> "Document":
        """
        Create a document from a dictionary.

        Args:
            data: Dictionary containing document data

        Returns:
            Document instance
        """

    @classmethod
    def from_text_file(cls, file_path: str, metadata: Optional[Dict[str, Any]] = None) -> "Document":
        """
        Create a document from a text file.

        Args:
            file_path: Path to the text file
            metadata: Optional metadata for the document

        Returns:
            Document instance
        """

DocumentCollection¶

class DocumentCollection:
    def __init__(
        self,
        documents: Optional[List[Document]] = None,
        name: Optional[str] = None
    ):
        """
        Initialize a DocumentCollection for managing multiple documents.

        Args:
            documents: Optional list of initial documents
            name: Optional name for the collection
        """

    def add_document(self, document: Document) -> None:
        """
        Add a document to the collection.

        Args:
            document: Document to add
        """

    def add_documents(self, documents: List[Document]) -> None:
        """
        Add multiple documents to the collection.

        Args:
            documents: List of documents to add
        """

    def get_document(self, id: str) -> Optional[Document]:
        """
        Get a document by ID.

        Args:
            id: ID of the document to get

        Returns:
            Document if found, None otherwise
        """

    def remove_document(self, id: str) -> bool:
        """
        Remove a document by ID.

        Args:
            id: ID of the document to remove

        Returns:
            True if document was removed, False otherwise
        """

    def clear(self) -> None:
        """Clear all documents from the collection."""

    def save(self, file_path: str) -> None:
        """
        Save the document collection to a file.

        Args:
            file_path: Path to save the collection to
        """

    @classmethod
    def load(cls, file_path: str) -> "DocumentCollection":
        """
        Load a document collection from a file.

        Args:
            file_path: Path to load the collection from

        Returns:
            DocumentCollection instance
        """

Retrievers¶

BaseRetriever¶

class BaseRetriever:
    def __init__(
        self,
        documents: List[Document],
        enable_scoring: bool = True
    ):
        """
        Initialize a base retriever.

        Args:
            documents: List of documents to retrieve from
            enable_scoring: Whether to track complexity scoring
        """

    async def retrieve(
        self,
        query: str,
        top_k: int = 3
    ) -> List[Document]:
        """
        Retrieve documents based on a query asynchronously.

        Args:
            query: Query to retrieve documents for
            top_k: Number of documents to retrieve

        Returns:
            List of retrieved documents
        """

    def retrieve_sync(
        self,
        query: str,
        top_k: int = 3
    ) -> List[Document]:
        """
        Retrieve documents based on a query synchronously.

        Args:
            query: Query to retrieve documents for
            top_k: Number of documents to retrieve

        Returns:
            List of retrieved documents
        """

    def add_document(self, document: Document) -> None:
        """
        Add a document to the retriever.

        Args:
            document: Document to add
        """

    def add_documents(self, documents: List[Document]) -> None:
        """
        Add multiple documents to the retriever.

        Args:
            documents: List of documents to add
        """

    def get_complexity_score(self) -> Dict[str, Any]:
        """
        Get the complexity score for the retriever.

        Returns:
            Dictionary containing complexity score information
        """

SemanticRetriever¶

class SemanticRetriever(BaseRetriever):
    def __init__(
        self,
        documents: List[Document],
        embedding_model: str = "openai:text-embedding-3-small",
        enable_scoring: bool = True
    ):
        """
        Initialize a semantic retriever that uses embeddings for retrieval.

        Args:
            documents: List of documents to retrieve from
            embedding_model: Model to use for embeddings
            enable_scoring: Whether to track complexity scoring
        """

KeywordRetriever¶

class KeywordRetriever(BaseRetriever):
    def __init__(
        self,
        documents: List[Document],
        use_stemming: bool = True,
        enable_scoring: bool = True
    ):
        """
        Initialize a keyword retriever that uses keyword matching for retrieval.

        Args:
            documents: List of documents to retrieve from
            use_stemming: Whether to use stemming for keyword matching
            enable_scoring: Whether to track complexity scoring
        """

HybridRetriever¶

class HybridRetriever(BaseRetriever):
    def __init__(
        self,
        documents: List[Document],
        embedding_model: str = "openai:text-embedding-3-small",
        semantic_weight: float = 0.7,
        enable_scoring: bool = True
    ):
        """
        Initialize a hybrid retriever that combines semantic and keyword retrieval.

        Args:
            documents: List of documents to retrieve from
            embedding_model: Model to use for embeddings
            semantic_weight: Weight to give to semantic retrieval (0.0-1.0)
            enable_scoring: Whether to track complexity scoring
        """

ContextualRetriever¶

class ContextualRetriever(BaseRetriever):
    def __init__(
        self,
        documents: List[Document],
        embedding_model: str = "openai:text-embedding-3-small",
        context_window_size: int = 3,
        enable_scoring: bool = True
    ):
        """
        Initialize a contextual retriever that adapts to conversation context.

        Args:
            documents: List of documents to retrieve from
            embedding_model: Model to use for embeddings
            context_window_size: Number of previous exchanges to consider as context
            enable_scoring: Whether to track complexity scoring
        """

    def add_context(self, message: str, role: str = "user") -> None:
        """
        Add a message to the conversation context.

        Args:
            message: Message to add
            role: Role of the message sender (e.g., "user", "assistant")
        """

    def clear_context(self) -> None:
        """Clear the conversation context."""

RAG Systems¶

SimpleRAG¶

class SimpleRAG:
    def __init__(
        self,
        agent: Agent,
        documents: List[Document],
        retriever_type: str = "semantic",
        top_k: int = 3,
        enable_scoring: bool = True
    ):
        """
        Initialize a simple RAG system.

        Args:
            agent: Agent to use for generation
            documents: List of documents for retrieval
            retriever_type: Type of retriever to use ("semantic", "keyword", "hybrid")
            top_k: Number of documents to retrieve
            enable_scoring: Whether to track complexity scoring
        """

    async def run(self, query: str) -> str:
        """
        Run the RAG system asynchronously.

        Args:
            query: Query to process

        Returns:
            Generated response
        """

    def run_sync(self, query: str) -> str:
        """
        Run the RAG system synchronously.

        Args:
            query: Query to process

        Returns:
            Generated response
        """

    async def stream(self, query: str) -> AsyncIterator[str]:
        """
        Stream the RAG system's response asynchronously.

        Args:
            query: Query to process

        Yields:
            Chunks of the generated response
        """

    def add_document(self, document: Document) -> None:
        """
        Add a document to the RAG system.

        Args:
            document: Document to add
        """

    def add_documents(self, documents: List[Document]) -> None:
        """
        Add multiple documents to the RAG system.

        Args:
            documents: List of documents to add
        """

    def get_complexity_score(self) -> Dict[str, Any]:
        """
        Get the complexity score for the RAG system.

        Returns:
            Dictionary containing complexity score information
        """

SemanticRAG¶

class SemanticRAG(SimpleRAG):
    def __init__(
        self,
        agent: Agent,
        documents: List[Document],
        embedding_model: str = "openai:text-embedding-3-small",
        top_k: int = 3,
        enable_scoring: bool = True
    ):
        """
        Initialize a semantic RAG system with advanced embedding features.

        Args:
            agent: Agent to use for generation
            documents: List of documents for retrieval
            embedding_model: Model to use for embeddings
            top_k: Number of documents to retrieve
            enable_scoring: Whether to track complexity scoring
        """

ContextualRAG¶

class ContextualRAG(SimpleRAG):
    def __init__(
        self,
        agent: Agent,
        documents: List[Document],
        embedding_model: str = "openai:text-embedding-3-small",
        context_window_size: int = 3,
        top_k: int = 3,
        enable_scoring: bool = True
    ):
        """
        Initialize a contextual RAG system that adapts to conversation context.

        Args:
            agent: Agent to use for generation
            documents: List of documents for retrieval
            embedding_model: Model to use for embeddings
            context_window_size: Number of previous exchanges to consider as context
            top_k: Number of documents to retrieve
            enable_scoring: Whether to track complexity scoring
        """

RAG Factories¶

create_rag_system¶

def create_rag_system(
    agent: Agent,
    documents: List[Document],
    rag_type: str = "simple",
    **kwargs
) -> Union[SimpleRAG, SemanticRAG, ContextualRAG]:
    """
    Factory function to create different types of RAG systems.

    Args:
        agent: Agent to use for generation
        documents: List of documents for retrieval
        rag_type: Type of RAG system to create ("simple", "semantic", "contextual")
        **kwargs: Additional arguments specific to the RAG type

    Returns:
        Instantiated RAG system of the specified type
    """