Skip to content

TarVFile

TarVFile

Bases: VFile

Virtual file model for files extracted from tar archives.

open classmethod

open(file: File, location: list[dict])

Stream file from tar archive based on location in archive.

Source code in datachain/lib/file.py
@classmethod
def open(cls, file: "File", location: list[dict]):
    """Stream file from tar archive based on location in archive."""
    if len(location) > 1:
        raise VFileError(file, "multiple 'location's are not supported yet")

    loc = location[0]

    if (offset := loc.get("offset", None)) is None:
        raise VFileError(file, "'offset' is not specified")

    if (size := loc.get("size", None)) is None:
        raise VFileError(file, "'size' is not specified")

    if (parent := loc.get("parent", None)) is None:
        raise VFileError(file, "'parent' is not specified")

    tar_file = File(**parent)
    tar_file._set_stream(file._catalog)

    client = file._catalog.get_client(tar_file.source)
    fd = client.open_object(tar_file, use_cache=file._caching_enabled)
    return FileSlice(fd, offset, size, file.name)