Skip to content

ArrowRow

ArrowRow

Bases: DataModel

DataModel for reading row from Arrow-supported file.

open

open()

Stream row contents from indexed file.

Source code in datachain/lib/file.py
@contextmanager
def open(self):
    """Stream row contents from indexed file."""
    from pyarrow.dataset import dataset

    if self.file._caching_enabled:
        self.file.ensure_cached()
        path = self.file.get_local_path()
        ds = dataset(path, **self.kwargs)

    else:
        path = self.file.get_path()
        ds = dataset(path, filesystem=self.file.get_fs(), **self.kwargs)

    return ds.take([self.index]).to_reader()

read

read()

Returns row contents as dict.

Source code in datachain/lib/file.py
def read(self):
    """Returns row contents as dict."""
    with self.open() as record_batch:
        return record_batch.to_pylist()[0]