Skip to content

Commit

Permalink
Fix integration tests, update docstrings
Browse files Browse the repository at this point in the history
  • Loading branch information
akonarski-ds committed Oct 3, 2024
1 parent 52e5a94 commit 53edf48
Show file tree
Hide file tree
Showing 3 changed files with 15 additions and 12 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ async def ingest_document(
Ingest a document.
Args:
document: The document or document metadata to ingest.
document: The document or metadata of the document to ingest.
document_processor: The document processor to use. If not provided, the document processor will be
determined based on the document metadata.
"""
Expand All @@ -98,10 +98,10 @@ async def ingest_document(

async def insert_elements(self, elements: list[Element]) -> None:
"""
Insert an elements into the vector store.
Insert Elements into the vector store.
Args:
elements: The element to insert.
elements: The list of Elements to insert.
"""
vectors = await self.embedder.embed_text([element.get_key() for element in elements])
entries = [element.to_vector_db_entry(vector) for element, vector in zip(elements, vectors)]
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# Ragbits

Repository for internal experiment with our upcoming LLM framework.
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import pytest

from ragbits.document_search.documents.document import DocumentMeta, DocumentType
from ragbits.document_search.ingestion.document_processor import DocumentProcessor
from ragbits.document_search.ingestion.document_processor import DocumentProcessorRouter
from ragbits.document_search.ingestion.providers.unstructured import (
DEFAULT_PARTITION_KWARGS,
UNSTRUCTURED_API_KEY_ENV,
Expand All @@ -19,28 +19,28 @@
reason="Unstructured API environment variables not set",
)
async def test_document_processor_processes_text_document_with_unstructured_provider():
document_processor = DocumentProcessor.from_config()
document_processor = DocumentProcessorRouter.from_config()
document_meta = DocumentMeta.create_text_document_from_literal("Name of Peppa's brother is George.")

elements = await document_processor.process(document_meta)
elements = await document_processor.get_provider(document_meta).process(document_meta)

assert isinstance(document_processor._providers[DocumentType.TXT], UnstructuredProvider)
assert len(elements) == 1
assert elements[0].content == "Name of Peppa's brother is George"
assert elements[0].content == "Name of Peppa's brother is George."


@pytest.mark.skipif(
env_vars_not_set([UNSTRUCTURED_API_URL_ENV, UNSTRUCTURED_API_KEY_ENV]),
reason="Unstructured API environment variables not set",
)
async def test_document_processor_processes_md_document_with_unstructured_provider():
document_processor = DocumentProcessor.from_config()
document_meta = DocumentMeta.from_local_path(Path(__file__).parent.parent.parent.parent.parent / "README.md")
document_processor = DocumentProcessorRouter.from_config()
document_meta = DocumentMeta.from_local_path(Path(__file__).parent / "test_file.md")

elements = await document_processor.process(document_meta)
elements = await document_processor.get_provider(document_meta).process(document_meta)

assert len(elements) > 0
assert elements[0].content == "Ragbits"
assert len(elements) == 1
assert elements[0].content == "Ragbits\n\nRepository for internal experiment with our upcoming LLM framework."


@pytest.mark.skipif(
Expand Down

0 comments on commit 53edf48

Please sign in to comment.