Skip to content

darts_preprocessing.legacy

PLANET scene based preprocessing.

logger module-attribute

logger = logging.getLogger(
    __name__.replace("darts_", "darts.")
)

calculate_ndvi

calculate_ndvi(optical: xarray.Dataset) -> xarray.Dataset

Calculate NDVI (Normalized Difference Vegetation Index) from spectral bands.

NDVI is a widely-used vegetation index that indicates photosynthetic activity and vegetation health. Values range from -1 to 1, with higher values indicating denser, healthier vegetation.

Parameters:

  • optical (xarray.Dataset) –

    Dataset containing spectral bands: - nir (float32): Near-infrared reflectance [0-1] - red (float32): Red reflectance [0-1]

Returns:

  • xarray.Dataset

    xr.DataArray: NDVI values with attributes: - long_name: "NDVI"

Note

Formula: NDVI = (NIR - Red) / (NIR + Red)

Input bands are clipped to [0, 1] before calculation to avoid numerical instabilities from negative reflectance values or sensor artifacts. The final result is also clipped to ensure values remain in the valid [-1, 1] range.

Example

Calculate NDVI from optical data:

from darts_preprocessing import calculate_ndvi

# optical contains 'nir' and 'red' bands
ndvi = calculate_ndvi(optical)

# Mask vegetation
vegetation_mask = ndvi > 0.3
Source code in darts-preprocessing/src/darts_preprocessing/engineering/indices.py
@stopwatch("Calculating NDVI", printer=logger.debug)
def calculate_ndvi(optical: xr.Dataset) -> xr.Dataset:
    """Calculate NDVI (Normalized Difference Vegetation Index) from spectral bands.

    NDVI is a widely-used vegetation index that indicates photosynthetic activity and
    vegetation health. Values range from -1 to 1, with higher values indicating denser,
    healthier vegetation.

    Args:
        optical (xr.Dataset): Dataset containing spectral bands:
            - nir (float32): Near-infrared reflectance [0-1]
            - red (float32): Red reflectance [0-1]

    Returns:
        xr.DataArray: NDVI values with attributes:
            - long_name: "NDVI"

    Note:
        Formula: NDVI = (NIR - Red) / (NIR + Red)

        Input bands are clipped to [0, 1] before calculation to avoid numerical instabilities
        from negative reflectance values or sensor artifacts. The final result is also clipped
        to ensure values remain in the valid [-1, 1] range.

    Example:
        Calculate NDVI from optical data:

        ```python
        from darts_preprocessing import calculate_ndvi

        # optical contains 'nir' and 'red' bands
        ndvi = calculate_ndvi(optical)

        # Mask vegetation
        vegetation_mask = ndvi > 0.3
        ```

    """
    nir = optical["nir"].clip(0, 1)
    r = optical["red"].clip(0, 1)
    ndvi = (nir - r) / (nir + r)
    ndvi = ndvi.clip(-1, 1)
    ndvi = ndvi.assign_attrs({"long_name": "NDVI"}).rename("ndvi")
    return ndvi

calculate_slope

calculate_slope(
    arcticdem_ds: xarray.Dataset,
) -> xarray.Dataset

Calculate slope of the terrain surface from an ArcticDEM Dataset.

Slope represents the rate of change of elevation, indicating terrain steepness.

Parameters:

  • arcticdem_ds (xarray.Dataset) –

    Dataset containing: - dem (float32): Digital Elevation Model

Returns:

  • xarray.Dataset

    xr.Dataset: Input Dataset with new data variable added:

  • xarray.Dataset
    • slope (float32): Slope in degrees [0-90].

    • long_name: "Slope"

    • units: "degrees"
    • source: "ArcticDEM"
Note

Slope is calculated using finite difference methods on the DEM. Values approaching 90° indicate near-vertical terrain.

Example
from darts_preprocessing import calculate_slope

arcticdem_with_slope = calculate_slope(arcticdem_ds)

# Mask steep terrain
steep_areas = arcticdem_with_slope.slope > 30
Source code in darts-preprocessing/src/darts_preprocessing/engineering/arcticdem.py
@stopwatch("Calculating slope", printer=logger.debug)
def calculate_slope(arcticdem_ds: xr.Dataset) -> xr.Dataset:
    """Calculate slope of the terrain surface from an ArcticDEM Dataset.

    Slope represents the rate of change of elevation, indicating terrain steepness.

    Args:
        arcticdem_ds (xr.Dataset): Dataset containing:
            - dem (float32): Digital Elevation Model

    Returns:
        xr.Dataset: Input Dataset with new data variable added:

        - slope (float32): Slope in degrees [0-90].

            - long_name: "Slope"
            - units: "degrees"
            - source: "ArcticDEM"

    Note:
        Slope is calculated using finite difference methods on the DEM.
        Values approaching 90° indicate near-vertical terrain.

    Example:
        ```python
        from darts_preprocessing import calculate_slope

        arcticdem_with_slope = calculate_slope(arcticdem_ds)

        # Mask steep terrain
        steep_areas = arcticdem_with_slope.slope > 30
        ```

    """
    slope_deg = slope(arcticdem_ds.dem)
    slope_deg.attrs = {
        "long_name": "Slope",
        "units": "degrees",
        "description": "The slope of the terrain surface in degrees.",
        "source": "ArcticDEM",
    }
    arcticdem_ds["slope"] = slope_deg.compute()
    return arcticdem_ds

calculate_topographic_position_index

calculate_topographic_position_index(
    arcticdem_ds: xarray.Dataset,
    outer_radius: int,
    inner_radius: int,
) -> xarray.Dataset

Calculate the Topographic Position Index (TPI) from an ArcticDEM Dataset.

TPI measures the relative topographic position of a point by comparing its elevation to the mean elevation of the surrounding neighborhood. Positive values indicate higher positions (ridges), negative values indicate lower positions (valleys).

Parameters:

  • arcticdem_ds (xarray.Dataset) –

    The ArcticDEM Dataset containing the 'dem' variable (float32).

  • outer_radius (int) –

    The outer radius of the neighborhood in meters. Can also be specified as string with units (e.g., "100m" or "10px").

  • inner_radius (int) –

    The inner radius of the annulus kernel in meters. If > 0, creates an annulus (ring) instead of a circle. Set to 0 for a circular kernel. Can also be specified as string with units (e.g., "50m" or "5px").

Returns:

  • xarray.Dataset

    xr.Dataset: The input Dataset with a new data variable added:

  • xarray.Dataset
    • tpi (float32): Topographic Position Index values.

    • long_name: "Topographic Position Index (TPI)"

    • description: Details about the kernel used
Note

Kernel shape combinations:

  • inner_radius=0: Circular kernel comparing each cell to all neighbors within outer_radius
  • inner_radius>0: Annulus kernel comparing each cell to neighbors in a ring between inner_radius and outer_radius. Useful for multi-scale terrain analysis.

The actual radii used are rounded to the nearest pixel based on the DEM resolution.

Example

Calculate TPI with circular and annulus kernels:

from darts_preprocessing import calculate_topographic_position_index

# Circular kernel (100m radius)
arcticdem_with_tpi = calculate_topographic_position_index(
    arcticdem_ds=arcticdem,
    outer_radius=100,
    inner_radius=0
)

# Annulus kernel (50-100m ring)
arcticdem_multi_scale = calculate_topographic_position_index(
    arcticdem_ds=arcticdem,
    outer_radius=100,
    inner_radius=50
)
Source code in darts-preprocessing/src/darts_preprocessing/engineering/arcticdem.py
@stopwatch.f("Calculating TPI", printer=logger.debug, print_kwargs=["outer_radius", "inner_radius"])
def calculate_topographic_position_index(arcticdem_ds: xr.Dataset, outer_radius: int, inner_radius: int) -> xr.Dataset:
    """Calculate the Topographic Position Index (TPI) from an ArcticDEM Dataset.

    TPI measures the relative topographic position of a point by comparing its elevation to
    the mean elevation of the surrounding neighborhood. Positive values indicate higher
    positions (ridges), negative values indicate lower positions (valleys).

    Args:
        arcticdem_ds (xr.Dataset): The ArcticDEM Dataset containing the 'dem' variable (float32).
        outer_radius (int): The outer radius of the neighborhood in meters.
            Can also be specified as string with units (e.g., "100m" or "10px").
        inner_radius (int): The inner radius of the annulus kernel in meters.
            If > 0, creates an annulus (ring) instead of a circle. Set to 0 for a circular kernel.
            Can also be specified as string with units (e.g., "50m" or "5px").

    Returns:
        xr.Dataset: The input Dataset with a new data variable added:

        - tpi (float32): Topographic Position Index values.

            - long_name: "Topographic Position Index (TPI)"
            - description: Details about the kernel used

    Note:
        Kernel shape combinations:

        - inner_radius=0: Circular kernel comparing each cell to all neighbors within outer_radius
        - inner_radius>0: Annulus kernel comparing each cell to neighbors in a ring between
          inner_radius and outer_radius. Useful for multi-scale terrain analysis.

        The actual radii used are rounded to the nearest pixel based on the DEM resolution.

    Example:
        Calculate TPI with circular and annulus kernels:

        ```python
        from darts_preprocessing import calculate_topographic_position_index

        # Circular kernel (100m radius)
        arcticdem_with_tpi = calculate_topographic_position_index(
            arcticdem_ds=arcticdem,
            outer_radius=100,
            inner_radius=0
        )

        # Annulus kernel (50-100m ring)
        arcticdem_multi_scale = calculate_topographic_position_index(
            arcticdem_ds=arcticdem,
            outer_radius=100,
            inner_radius=50
        )
        ```

    """
    cellsize_x, cellsize_y = convolution.calc_cellsize(arcticdem_ds.dem)  # Should be equal to the resolution of the DEM
    # Use an annulus kernel if inner_radius is greater than 0
    outer_radius: Distance = Distance.parse(outer_radius, cellsize_x)
    if inner_radius > 0:
        inner_radius: Distance = Distance.parse(inner_radius, cellsize_x)
        kernel = convolution.annulus_kernel(cellsize_x, cellsize_y, outer_radius.meter, inner_radius.meter)
        attr_cell_description = (
            f"within a ring at a distance of {inner_radius}-{outer_radius} cells away from the focal cell."
        )
        logger.debug(
            f"Calculating Topographic Position Index with annulus kernel of {inner_radius}-{outer_radius} cells."
        )
    else:
        kernel = convolution.circle_kernel(cellsize_x, cellsize_y, outer_radius.meter)
        attr_cell_description = f"within a circle at a distance of {outer_radius} cells away from the focal cell."
        logger.debug(f"Calculating Topographic Position Index with circle kernel of {outer_radius} cells.")

    # Change dtype of kernel to float32 since we don't need the precision and f32 is faster
    kernel = kernel.astype("float32")

    if has_cuda_and_cupy() and arcticdem_ds.cupy.is_cupy:
        kernel = cp.asarray(kernel)

    tpi = arcticdem_ds.dem - convolution.convolution_2d(arcticdem_ds.dem, kernel) / kernel.sum()
    tpi.attrs = {
        "long_name": "Topographic Position Index",
        "units": "m",
        "description": "The difference between the elevation of a cell and the mean elevation of the surrounding"
        f"cells {attr_cell_description}",
        "source": "ArcticDEM",
    }

    arcticdem_ds["tpi"] = tpi.compute()

    return arcticdem_ds

preprocess_legacy_arcticdem_fast

preprocess_legacy_arcticdem_fast(
    ds_arcticdem: xarray.Dataset,
    tpi_outer_radius: int,
    tpi_inner_radius: int,
) -> xarray.Dataset

Preprocess the ArcticDEM data with legacy (DARTS v1) preprocessing steps.

Parameters:

  • ds_arcticdem (xarray.Dataset) –

    The ArcticDEM dataset.

  • tpi_outer_radius (int) –

    The outer radius of the annulus kernel for the tpi calculation in number of cells.

  • tpi_inner_radius (int) –

    The inner radius of the annulus kernel for the tpi calculation in number of cells.

Returns:

  • xarray.Dataset

    xr.Dataset: The preprocessed ArcticDEM dataset.

Source code in darts-preprocessing/src/darts_preprocessing/legacy.py
@stopwatch.f("Preprocessing arcticdem", printer=logger.debug, print_kwargs=["tpi_outer_radius", "tpi_inner_radius"])
def preprocess_legacy_arcticdem_fast(
    ds_arcticdem: xr.Dataset,
    tpi_outer_radius: int,
    tpi_inner_radius: int,
) -> xr.Dataset:
    """Preprocess the ArcticDEM data with legacy (DARTS v1) preprocessing steps.

    Args:
        ds_arcticdem (xr.Dataset): The ArcticDEM dataset.
        tpi_outer_radius (int): The outer radius of the annulus kernel for the tpi calculation in number of cells.
        tpi_inner_radius (int): The inner radius of the annulus kernel for the tpi calculation in number of cells.

    Returns:
        xr.Dataset: The preprocessed ArcticDEM dataset.

    """
    ds_arcticdem = calculate_topographic_position_index(ds_arcticdem, tpi_outer_radius, tpi_inner_radius)
    ds_arcticdem = calculate_slope(ds_arcticdem)

    return ds_arcticdem

preprocess_legacy_fast

preprocess_legacy_fast(
    ds_optical: xarray.Dataset,
    ds_arcticdem: xarray.Dataset,
    ds_tcvis: xarray.Dataset,
    tpi_outer_radius: int = 100,
    tpi_inner_radius: int = 0,
    device: typing.Literal["cuda", "cpu"]
    | int = darts_utils.cuda.DEFAULT_DEVICE,
) -> xarray.Dataset

Preprocess optical data with legacy (DARTS v1) preprocessing steps, but with new data concepts.

The processing steps are: - Calculate NDVI - Calculate slope and relative elevation from ArcticDEM - Merge everything into a single ds.

The main difference to preprocess_legacy is the new data concept of the arcticdem. Instead of using already preprocessed arcticdem data which are loaded from a VRT, this step expects the raw arcticdem data and calculates slope and relative elevation on the fly.

Parameters:

  • ds_optical (xarray.Dataset) –

    The Planet scene optical data or Sentinel 2 scene optical dataset including data_masks.

  • ds_arcticdem (xarray.Dataset) –

    The ArcticDEM dataset.

  • ds_tcvis (xarray.Dataset) –

    The TCVIS dataset.

  • tpi_outer_radius (int, default: 100 ) –

    The outer radius of the annulus kernel for the tpi calculation in m. Defaults to 100m.

  • tpi_inner_radius (int, default: 0 ) –

    The inner radius of the annulus kernel for the tpi calculation in m. Defaults to 0.

  • device (typing.Literal['cuda', 'cpu'] | int, default: darts_utils.cuda.DEFAULT_DEVICE ) –

    The device to run the tpi and slope calculations on. If "cuda" take the first device (0), if int take the specified device. Defaults to "cuda" if cuda is available, else "cpu".

Returns:

Source code in darts-preprocessing/src/darts_preprocessing/legacy.py
@stopwatch("Preprocessing", printer=logger.debug)
def preprocess_legacy_fast(
    ds_optical: xr.Dataset,
    ds_arcticdem: xr.Dataset,
    ds_tcvis: xr.Dataset,
    tpi_outer_radius: int = 100,
    tpi_inner_radius: int = 0,
    device: Literal["cuda", "cpu"] | int = DEFAULT_DEVICE,
) -> xr.Dataset:
    """Preprocess optical data with legacy (DARTS v1) preprocessing steps, but with new data concepts.

    The processing steps are:
    - Calculate NDVI
    - Calculate slope and relative elevation from ArcticDEM
    - Merge everything into a single ds.

    The main difference to preprocess_legacy is the new data concept of the arcticdem.
    Instead of using already preprocessed arcticdem data which are loaded from a VRT, this step expects the raw
    arcticdem data and calculates slope and relative elevation on the fly.

    Args:
        ds_optical (xr.Dataset): The Planet scene optical data or Sentinel 2 scene optical dataset including data_masks.
        ds_arcticdem (xr.Dataset): The ArcticDEM dataset.
        ds_tcvis (xr.Dataset): The TCVIS dataset.
        tpi_outer_radius (int, optional): The outer radius of the annulus kernel for the tpi calculation
            in m. Defaults to 100m.
        tpi_inner_radius (int, optional): The inner radius of the annulus kernel for the tpi calculation
            in m. Defaults to 0.
        device (Literal["cuda", "cpu"] | int, optional): The device to run the tpi and slope calculations on.
            If "cuda" take the first device (0), if int take the specified device.
            Defaults to "cuda" if cuda is available, else "cpu".

    Returns:
        xr.Dataset: The preprocessed dataset.

    """
    # Move to GPU for faster calculations
    ds_optical = move_to_device(ds_optical, device)
    # Calculate NDVI
    ds_optical["ndvi"] = calculate_ndvi(ds_optical)
    ds_optical = move_to_host(ds_optical)

    # Reproject TCVIS to optical data
    with stopwatch("Reprojecting TCVIS", printer=logger.debug):
        ds_tcvis = ds_tcvis.odc.reproject(ds_optical.odc.geobox, resampling="cubic")

    ds_optical["tc_brightness"] = ds_tcvis.tc_brightness
    ds_optical["tc_greenness"] = ds_tcvis.tc_greenness
    ds_optical["tc_wetness"] = ds_tcvis.tc_wetness

    # Calculate TPI and slope from ArcticDEM
    with stopwatch("Reprojecting ArcticDEM", printer=logger.debug):
        ds_arcticdem = ds_arcticdem.odc.reproject(ds_optical.odc.geobox.buffered(tpi_outer_radius), resampling="cubic")
    # Move to same device as optical
    ds_arcticdem = move_to_device(ds_arcticdem, device)
    ds_arcticdem = preprocess_legacy_arcticdem_fast(ds_arcticdem, tpi_outer_radius, tpi_inner_radius)
    ds_arcticdem = move_to_host(ds_arcticdem)

    ds_arcticdem = ds_arcticdem.odc.crop(ds_optical.odc.geobox.extent)
    # For some reason, we need to reindex, because the reproject + crop of the arcticdem sometimes results
    # in floating point errors. These error are at the order of 1e-10, hence, way below millimeter precision.
    ds_arcticdem = ds_arcticdem.reindex_like(ds_optical)

    ds_optical["dem"] = ds_arcticdem.dem
    ds_optical["relative_elevation"] = ds_arcticdem.tpi
    ds_optical["slope"] = ds_arcticdem.slope
    ds_optical["arcticdem_data_mask"] = ds_arcticdem.arcticdem_data_mask

    return ds_optical