kd.data.py.HuggingFace

kd.data.py.HuggingFace#

class kauldron.data.py.HuggingFace(path: str, config: str | None = None, *, _fake_refs: type[_FakeRefsUnset] | dict[str, _FakeRootCfg] = <class 'kauldron.utils.config_util._FakeRefsUnset'>, batch_size: int | None = None, seed: int | collections.abc.Sequence[int] | numpy.ndarray | jaxtyping.UInt32[Array, '2'] | jaxtyping.UInt32[ndarray, '2'] | jax.Array | None = _FakeRootCfg('cfg.seed'), transforms: tr_normalize.Transformations = <factory>, num_epochs: Optional[int] = None, batch_drop_remainder: bool = True, num_workers: int = 16, read_options: grain.ReadOptions | None = None, enable_profiling: bool = False, per_worker_buffer_size: int = 1, worker_init_fn: Callable[[int, int], None] | None = None, shuffle: bool, split: str, data_dir: epath.PathLike | None = None, cache_dir: epath.PathLike | None = None)[source]

Bases: kauldron.data.py.base.DataSourceBase

HuggingFace loader.

path: str
config: str | None = None
split: str
data_dir: epath.PathLike | None = None
cache_dir: epath.PathLike | None = None
property data_source: grain._src.python.data_sources.RandomAccessDataSource