Skip to content

TextFile

TextFile is inherited from File with additional methods for working with text files.

TextFile is generated when a DataChain is created from storage, using type="text" param:

from datachain import DataChain

dc = DataChain.from_storage("s3://bucket-name/", type="text")

TextFile

TextFile(**kwargs)

Bases: File

DataModel for reading text files.

Source code in datachain/lib/file.py
def __init__(self, **kwargs):
    super().__init__(**kwargs)
    self._catalog = None
    self._caching_enabled: bool = False

open

open(mode: Literal['rb', 'r'] = 'r')

Open the file and return a file object (default to text mode).

Source code in datachain/lib/file.py
@contextmanager
def open(self, mode: Literal["rb", "r"] = "r"):
    """Open the file and return a file object (default to text mode)."""
    with super().open(mode=mode) as stream:
        yield stream

read_text

read_text()

Returns file contents as text.

Source code in datachain/lib/file.py
def read_text(self):
    """Returns file contents as text."""
    with self.open() as stream:
        return stream.read()

save

save(destination: str)

Writes it's content to destination

Source code in datachain/lib/file.py
def save(self, destination: str):
    """Writes it's content to destination"""
    destination = stringify_path(destination)

    client: Client = self._catalog.get_client(destination)
    with client.fs.open(destination, mode="w") as f:
        f.write(self.read_text())