Skip to content

tilecache

darts_utils.tilecache

Caching functionality for xarray datasets.

logger module-attribute

logger = logging.getLogger(
    __name__.replace("darts_", "darts.")
)

manager module-attribute

manager = darts_utils.bands.BandManager(
    {
        "blue": darts_utils.bands.BandCodec.optical(),
        "red": darts_utils.bands.BandCodec.optical(),
        "green": darts_utils.bands.BandCodec.optical(),
        "nir": darts_utils.bands.BandCodec.optical(),
        "s2_scl": darts_utils.bands.BandCodec.mask(11),
        "planet_udm": darts_utils.bands.BandCodec.mask(8),
        "quality_data_mask": darts_utils.bands.BandCodec.mask(
            2
        ),
        "dem": darts_utils.bands.BandCodec(
            disk_dtype="float32",
            memory_dtype="float32",
            valid_range=(-100, 3000),
            scale_factor=0.1,
            offset=-100.0,
            fill_value=-1,
        ),
        "arcticdem_data_mask": darts_utils.bands.BandCodec(
            disk_dtype="bool",
            memory_dtype="uint8",
            valid_range=(0, 1),
        ),
        "tc_brightness": darts_utils.bands.BandCodec.tc(),
        "tc_greenness": darts_utils.bands.BandCodec.tc(),
        "tc_wetness": darts_utils.bands.BandCodec.tc(),
        "ndvi": darts_utils.bands.BandCodec.ndi(),
        "relative_elevation": darts_utils.bands.BandCodec(
            disk_dtype="int16",
            memory_dtype="float32",
            valid_range=(-50, 50),
            scale_factor=100 / 30000,
            offset=-50.0,
            fill_value=-1,
        ),
        "slope": darts_utils.bands.BandCodec(
            disk_dtype="int16",
            memory_dtype="float32",
            valid_range=(0, 90),
            scale_factor=1 / 100,
            offset=0.0,
            fill_value=-1,
        ),
        "aspect": darts_utils.bands.BandCodec(
            disk_dtype="int16",
            memory_dtype="float32",
            valid_range=(0, 360),
            scale_factor=1 / 10,
            offset=0.0,
            fill_value=-1,
        ),
        "hillshade": darts_utils.bands.BandCodec(
            disk_dtype="int16",
            memory_dtype="float32",
            valid_range=(0, 1),
            scale_factor=1 / 10000,
            offset=0.0,
            fill_value=-1,
        ),
        "curvature": darts_utils.bands.BandCodec.ndi(),
        "probabilities": darts_utils.bands.BandCodec.percentage(),
        "probabilities-*": darts_utils.bands.BandCodec.percentage(),
        "binarized_segmentation": darts_utils.bands.BandCodec.bool(),
        "binarized_segmentation-*": darts_utils.bands.BandCodec.bool(),
        "extent": darts_utils.bands.BandCodec.bool(),
    }
)

XarrayCacheManager

XarrayCacheManager(
    cache_dir: str | pathlib.Path | None = None,
)

Manager for caching xarray datasets.

Example
    def process_tile(tile_id: str):
        # Initialize cache manager
        preprocess_cache = Path("preprocess_cache")
        cache_manager = XarrayCacheManager(preprocess_cache)

        def create_tile():
            # Your existing tile creation logic goes here
            return create_tile(...)  # Replace with actual implementation

        # Get cached tile or create and cache it
        tile = cache_manager.get_or_create(
            identifier=tile_id,
            creation_func=create_tile
        )

        return tile

Initialize the cache manager.

Parameters:

  • cache_dir (str | pathlib.Path | None, default: None ) –

    Directory path for caching files

Source code in darts-utils/src/darts_utils/tilecache.py
def __init__(self, cache_dir: str | Path | None = None):
    """Initialize the cache manager.

    Args:
        cache_dir (str | Path | None): Directory path for caching files

    """
    self.cache_dir = Path(cache_dir) if isinstance(cache_dir, str) else cache_dir

cache_dir instance-attribute

exists

exists(identifier: str) -> bool

Check if a cached Dataset exists.

Parameters:

  • identifier (str) –

    Unique identifier for the cached file

Returns:

  • bool ( bool ) –

    True if the Dataset exists in cache, False otherwise

Source code in darts-utils/src/darts_utils/tilecache.py
def exists(self, identifier: str) -> bool:
    """Check if a cached Dataset exists.

    Args:
        identifier (str): Unique identifier for the cached file

    Returns:
        bool: True if the Dataset exists in cache, False otherwise

    """
    if not self.cache_dir:
        return False

    cache_path = self.cache_dir / f"{identifier}.nc"
    return cache_path.exists()

get_or_create

get_or_create(
    identifier: str,
    creation_func: callable,
    force: bool,
    use_band_manager: bool = True,
    *args: tuple[typing.Any, ...],
    **kwargs: dict[str, typing.Any],
) -> xarray.Dataset

Get cached Dataset or create and cache it if it doesn't exist.

Parameters:

  • identifier (str) –

    Unique identifier for the cached file

  • creation_func (callable) –

    Function to create the Dataset if not cached

  • force (bool) –

    If True, forces reprocessing even if cached

  • use_band_manager (bool, default: True ) –

    If True, uses the band manager save and load the data. Defaults to True.

  • *args (tuple[typing.Any, ...], default: () ) –

    Arguments to pass to creation_func

  • **kwargs (dict[str, typing.Any], default: {} ) –

    Keyword arguments to pass to creation_func

Returns:

  • xarray.Dataset

    xr.Dataset: The Dataset (either loaded from cache or newly created)

Source code in darts-utils/src/darts_utils/tilecache.py
def get_or_create(
    self,
    identifier: str,
    creation_func: callable,
    force: bool,
    use_band_manager: bool = True,
    *args: tuple[Any, ...],
    **kwargs: dict[str, Any],
) -> xr.Dataset:
    """Get cached Dataset or create and cache it if it doesn't exist.

    Args:
        identifier (str): Unique identifier for the cached file
        creation_func (callable): Function to create the Dataset if not cached
        force (bool): If True, forces reprocessing even if cached
        use_band_manager (bool): If True, uses the band manager save and load the data. Defaults to True.
        *args: Arguments to pass to creation_func
        **kwargs: Keyword arguments to pass to creation_func

    Returns:
        xr.Dataset: The Dataset (either loaded from cache or newly created)

    """
    cached_dataset = None if force else self.load_from_cache(identifier, use_band_manager)
    if not force:
        logger.debug(f"Cache hit for '{identifier}': {cached_dataset is not None}")

    if cached_dataset is not None:
        return cached_dataset

    dataset = creation_func(*args, **kwargs)
    if cached_dataset is None:
        self.save_to_cache(dataset, identifier, use_band_manager)
    return dataset

load_from_cache

load_from_cache(
    identifier: str, use_band_manager: bool = True
) -> xarray.Dataset | None

Load a Dataset from cache if it exists.

Parameters:

  • identifier (str) –

    Unique identifier for the cached file

  • use_band_manager (bool, default: True ) –

    If True, uses the band manager to load the data. Defaults to True.

Returns:

  • xarray.Dataset | None

    xr.Dataset | None: Dataset if found in cache, otherwise None

Source code in darts-utils/src/darts_utils/tilecache.py
def load_from_cache(self, identifier: str, use_band_manager: bool = True) -> xr.Dataset | None:
    """Load a Dataset from cache if it exists.

    Args:
        identifier (str): Unique identifier for the cached file
        use_band_manager (bool): If True, uses the band manager to load the data. Defaults to True.

    Returns:
        xr.Dataset | None: Dataset if found in cache, otherwise None

    """
    if not self.cache_dir:
        return None

    cache_path = self.cache_dir / f"{identifier}.nc"
    if not cache_path.exists():
        return None
    if use_band_manager:
        dataset = manager.open(cache_path)
    else:
        # ! Unknown why, but decode_coords="all" sometimes fails! Falls back to manually set
        # dataset = xr.open_dataset(path, engine="h5netcdf", decode_coords="all", decode_cf=True).load()
        dataset = xr.open_dataset(cache_path, engine="h5netcdf").set_coords("spatial_ref").load()
    return dataset

save_to_cache

save_to_cache(
    dataset: xarray.Dataset,
    identifier: str,
    use_band_manager: bool = True,
) -> bool

Save a Dataset to cache.

Parameters:

  • dataset (xarray.Dataset) –

    Dataset to cache

  • identifier (str) –

    Unique identifier for the cached file

  • use_band_manager (bool, default: True ) –

    If True, uses the band manager to save the data. Defaults to True.

Returns:

  • bool ( bool ) –

    Success of operation

Source code in darts-utils/src/darts_utils/tilecache.py
def save_to_cache(self, dataset: xr.Dataset, identifier: str, use_band_manager: bool = True) -> bool:
    """Save a Dataset to cache.

    Args:
        dataset (xr.Dataset): Dataset to cache
        identifier (str): Unique identifier for the cached file
        use_band_manager (bool): If True, uses the band manager to save the data. Defaults to True.

    Returns:
        bool: Success of operation

    """
    if not self.cache_dir:
        return False

    self.cache_dir.mkdir(exist_ok=True, parents=True)
    cache_path = self.cache_dir / f"{identifier}.nc"
    logger.debug(f"Caching {identifier=} to {cache_path}")
    if use_band_manager:
        manager.to_netcdf(dataset, cache_path)
    else:
        dataset.to_netcdf(cache_path, engine="h5netcdf")
    return True