Stream file from tar archive based on location in archive.
Source code in datachain/lib/file.py
| @classmethod
def open(cls, file: "File", location: list[dict]):
"""Stream file from tar archive based on location in archive."""
if len(location) > 1:
raise VFileError(file, "multiple 'location's are not supported yet")
loc = location[0]
if (offset := loc.get("offset", None)) is None:
raise VFileError(file, "'offset' is not specified")
if (size := loc.get("size", None)) is None:
raise VFileError(file, "'size' is not specified")
if (parent := loc.get("parent", None)) is None:
raise VFileError(file, "'parent' is not specified")
tar_file = File(**parent)
tar_file._set_stream(file._catalog)
client = file._catalog.get_client(tar_file.source)
fd = client.open_object(tar_file, use_cache=file._caching_enabled)
return FileSlice(fd, offset, size, file.name)
|