Skip to content

darts_preprocessing

Data preprocessing and feature engineering for the DARTS dataset.

__version__ module-attribute

__version__ = importlib.metadata.version('darts-nextgen')

calculate_aspect

calculate_aspect(
    arcticdem_ds: xarray.Dataset,
) -> xarray.Dataset

Calculate aspect (compass direction) of the terrain surface from an ArcticDEM Dataset.

Aspect indicates the downslope direction of the maximum rate of change in elevation.

Parameters:

  • arcticdem_ds (xarray.Dataset) –

    Dataset containing: - dem (float32): Digital Elevation Model

Returns:

  • xarray.Dataset

    xr.Dataset: Input Dataset with new data variable added:

  • xarray.Dataset
    • aspect (float32): Aspect in degrees clockwise from north [0-360], or -1 for flat areas.

    • long_name: "Aspect"

    • units: "degrees"
    • description: Compass direction of slope
    • source: "ArcticDEM"
Note

Aspect values:

  • 0° or 360°: North-facing
  • 90°: East-facing
  • 180°: South-facing
  • 270°: West-facing
  • -1: Flat (no dominant direction)
Example
from darts_preprocessing import calculate_aspect

arcticdem_with_aspect = calculate_aspect(arcticdem_ds)

# Identify south-facing slopes (135-225 degrees)
south_facing = (arcticdem_with_aspect.aspect > 135) & (arcticdem_with_aspect.aspect < 225)
Source code in darts-preprocessing/src/darts_preprocessing/engineering/arcticdem.py
@stopwatch("Calculating aspect", printer=logger.debug)
def calculate_aspect(arcticdem_ds: xr.Dataset) -> xr.Dataset:
    """Calculate aspect (compass direction) of the terrain surface from an ArcticDEM Dataset.

    Aspect indicates the downslope direction of the maximum rate of change in elevation.

    Args:
        arcticdem_ds (xr.Dataset): Dataset containing:
            - dem (float32): Digital Elevation Model

    Returns:
        xr.Dataset: Input Dataset with new data variable added:

        - aspect (float32): Aspect in degrees clockwise from north [0-360], or -1 for flat areas.

            - long_name: "Aspect"
            - units: "degrees"
            - description: Compass direction of slope
            - source: "ArcticDEM"

    Note:
        Aspect values:

        - 0° or 360°: North-facing
        - 90°: East-facing
        - 180°: South-facing
        - 270°: West-facing
        - -1: Flat (no dominant direction)

    Example:
        ```python
        from darts_preprocessing import calculate_aspect

        arcticdem_with_aspect = calculate_aspect(arcticdem_ds)

        # Identify south-facing slopes (135-225 degrees)
        south_facing = (arcticdem_with_aspect.aspect > 135) & (arcticdem_with_aspect.aspect < 225)
        ```

    """
    aspect_deg = aspect(arcticdem_ds.dem)
    aspect_deg.attrs = {
        "long_name": "Aspect",
        "units": "degrees",
        "description": "The compass direction that the slope faces, in degrees clockwise from north.",
        "source": "ArcticDEM",
    }
    arcticdem_ds["aspect"] = aspect_deg.compute()
    return arcticdem_ds

calculate_ctvi

calculate_ctvi(optical: xarray.Dataset) -> xarray.DataArray

Calculate CTVI (Corrected Transformed Vegetation Index) from spectral bands.

CTVI is a corrected version of TVI that maintains the sign of the original NDVI values while applying the transformation.

Parameters:

  • optical (xarray.Dataset) –

    Dataset containing: - ndvi (float32): NDVI values (will be calculated if not present) - nir, red (float32): Required if NDVI not present

Returns:

  • xarray.DataArray

    xr.DataArray: CTVI values with attributes: - long_name: "CTVI"

Note

Formula: CTVI = (NDVI + 0.5) / |NDVI + 0.5| * sqrt(|NDVI + 0.5|)

If NDVI is already in the dataset, it will be reused to avoid recalculation.

References

Lemenkova, Polina. "Hyperspectral Vegetation Indices Calculated by Qgis Using Landsat Tm Image: a Case Study of Northern Iceland" Advanced Research in Life Sciences, vol. 4, no. 1, Sciendo, 2020, pp. 70-78. https://doi.org/10.2478/arls-2020-0021

Example
from darts_preprocessing import calculate_ctvi

ctvi = calculate_ctvi(optical)
Source code in darts-preprocessing/src/darts_preprocessing/engineering/indices.py
@stopwatch("Calculating CTVI", printer=logger.debug)
def calculate_ctvi(optical: xr.Dataset) -> xr.DataArray:
    """Calculate CTVI (Corrected Transformed Vegetation Index) from spectral bands.

    CTVI is a corrected version of TVI that maintains the sign of the original NDVI values
    while applying the transformation.

    Args:
        optical (xr.Dataset): Dataset containing:
            - ndvi (float32): NDVI values (will be calculated if not present)
            - nir, red (float32): Required if NDVI not present

    Returns:
        xr.DataArray: CTVI values with attributes:
            - long_name: "CTVI"

    Note:
        Formula: CTVI = (NDVI + 0.5) / |NDVI + 0.5| * sqrt(|NDVI + 0.5|)

        If NDVI is already in the dataset, it will be reused to avoid recalculation.

    References:
        Lemenkova, Polina.
        "Hyperspectral Vegetation Indices Calculated by Qgis Using Landsat Tm Image: a Case Study of Northern Iceland"
        Advanced Research in Life Sciences, vol. 4, no. 1, Sciendo, 2020, pp. 70-78.
        https://doi.org/10.2478/arls-2020-0021

    Example:
        ```python
        from darts_preprocessing import calculate_ctvi

        ctvi = calculate_ctvi(optical)
        ```

    """
    ndvi = optical["ndvi"] if "ndvi" in optical else calculate_ndvi(optical)
    ctvi = (ndvi + 0.5) / np.abs(ndvi + 0.5) * np.sqrt(np.abs(ndvi + 0.5))
    ctvi = ctvi.assign_attrs({"long_name": "CTVI"}).rename("ctvi")
    return ctvi

calculate_curvature

calculate_curvature(
    arcticdem_ds: xarray.Dataset,
) -> xarray.Dataset

Calculate curvature of the terrain surface from an ArcticDEM Dataset.

Curvature measures the rate of change of slope, indicating terrain convexity or concavity.

Parameters:

  • arcticdem_ds (xarray.Dataset) –

    Dataset containing: - dem (float32): Digital Elevation Model

Returns:

  • xarray.Dataset

    xr.Dataset: Input Dataset with new data variable added:

  • xarray.Dataset
    • curvature (float32): Curvature values.

    • long_name: "Curvature"

    • description: Rate of change of slope
    • source: "ArcticDEM"
Note

Curvature interpretation:

  • Positive values: Convex terrain (hills, ridges)
  • Negative values: Concave terrain (valleys, depressions)
  • Near zero: Planar terrain
Example
from darts_preprocessing import calculate_curvature

arcticdem_with_curv = calculate_curvature(arcticdem_ds)

# Identify ridges (convex areas)
ridges = arcticdem_with_curv.curvature > 0.1
Source code in darts-preprocessing/src/darts_preprocessing/engineering/arcticdem.py
@stopwatch("Calculating curvature", printer=logger.debug)
def calculate_curvature(arcticdem_ds: xr.Dataset) -> xr.Dataset:
    """Calculate curvature of the terrain surface from an ArcticDEM Dataset.

    Curvature measures the rate of change of slope, indicating terrain convexity or concavity.

    Args:
        arcticdem_ds (xr.Dataset): Dataset containing:
            - dem (float32): Digital Elevation Model

    Returns:
        xr.Dataset: Input Dataset with new data variable added:

        - curvature (float32): Curvature values.

            - long_name: "Curvature"
            - description: Rate of change of slope
            - source: "ArcticDEM"

    Note:
        Curvature interpretation:

        - Positive values: Convex terrain (hills, ridges)
        - Negative values: Concave terrain (valleys, depressions)
        - Near zero: Planar terrain

    Example:
        ```python
        from darts_preprocessing import calculate_curvature

        arcticdem_with_curv = calculate_curvature(arcticdem_ds)

        # Identify ridges (convex areas)
        ridges = arcticdem_with_curv.curvature > 0.1
        ```

    """
    curvature_da = curvature(arcticdem_ds.dem)
    curvature_da.attrs = {
        "long_name": "Curvature",
        "units": "",
        "description": "The curvature of the terrain surface.",
        "source": "ArcticDEM",
    }
    arcticdem_ds["curvature"] = curvature_da.compute()
    return arcticdem_ds

calculate_dissection_index

calculate_dissection_index(
    arcticdem_ds: xarray.Dataset, neighborhood_size: int
) -> xarray.Dataset

Calculate the Dissection Index (DI) from an ArcticDEM Dataset.

DI measures the degree to which a landscape has been cut by valleys and ravines. Values range from 0 (smooth, undissected) to 1 (highly dissected).

Parameters:

  • arcticdem_ds (xarray.Dataset) –

    Dataset containing: - dem (float32): Digital Elevation Model

  • neighborhood_size (int) –

    Neighborhood window size for DI calculation. Can be specified as string with units (e.g., "100m" or "10px").

Returns:

  • xarray.Dataset

    xr.Dataset: Input Dataset with new data variable added:

  • xarray.Dataset
    • di (float32): Dissection Index [0-1].

    • long_name: "Dissection Index"

    • description: Documents neighborhood size used
    • source: "ArcticDEM"
Note

The dissection index quantifies landscape dissection by comparing elevation ranges within the neighborhood window. Higher values indicate more deeply incised terrain with greater vertical relief.

The neighborhood_size parameter is converted to pixels based on DEM resolution.

Example
from darts_preprocessing import calculate_dissection_index

# Calculate DI with 100m neighborhood
arcticdem_with_di = calculate_dissection_index(
    arcticdem_ds=arcticdem,
    neighborhood_size=100
)

# Identify highly dissected terrain
dissected = arcticdem_with_di.di > 0.5
Source code in darts-preprocessing/src/darts_preprocessing/engineering/arcticdem.py
@stopwatch("Calculating Dissection Index", printer=logger.debug)
def calculate_dissection_index(arcticdem_ds: xr.Dataset, neighborhood_size: int) -> xr.Dataset:
    """Calculate the Dissection Index (DI) from an ArcticDEM Dataset.

    DI measures the degree to which a landscape has been cut by valleys and ravines.
    Values range from 0 (smooth, undissected) to 1 (highly dissected).

    Args:
        arcticdem_ds (xr.Dataset): Dataset containing:
            - dem (float32): Digital Elevation Model
        neighborhood_size (int): Neighborhood window size for DI calculation.
            Can be specified as string with units (e.g., "100m" or "10px").

    Returns:
        xr.Dataset: Input Dataset with new data variable added:

        - di (float32): Dissection Index [0-1].

            - long_name: "Dissection Index"
            - description: Documents neighborhood size used
            - source: "ArcticDEM"

    Note:
        The dissection index quantifies landscape dissection by comparing elevation
        ranges within the neighborhood window. Higher values indicate more deeply
        incised terrain with greater vertical relief.

        The neighborhood_size parameter is converted to pixels based on DEM resolution.

    Example:
        ```python
        from darts_preprocessing import calculate_dissection_index

        # Calculate DI with 100m neighborhood
        arcticdem_with_di = calculate_dissection_index(
            arcticdem_ds=arcticdem,
            neighborhood_size=100
        )

        # Identify highly dissected terrain
        dissected = arcticdem_with_di.di > 0.5
        ```

    """
    # Get neighborhood_size in pixels
    neighborhood_size: Distance = Distance.parse(neighborhood_size, arcticdem_ds.odc.geobox.resolution.x)

    di = dissection_index(arcticdem_ds.dem, window_size=neighborhood_size.pixel)

    di.attrs = {
        "long_name": "Dissection Index",
        "units": "",
        "description": (
            f"Dissection index calculated using a {neighborhood_size} neighborhood. "
            "Values range from 0 (smooth) to 1 (most rugged)."
        ),
        "source": "ArcticDEM",
    }

    arcticdem_ds["di"] = di.compute()
    return arcticdem_ds

calculate_evi

calculate_evi(
    optical: xarray.Dataset,
    g: float = 2.5,
    c1: float = 6,
    c2: float = 7.5,
    l: float = 1,
) -> xarray.DataArray

Calculate EVI (Enhanced Vegetation Index) from spectral bands.

EVI is optimized to enhance vegetation signal with improved sensitivity in high biomass regions and improved vegetation monitoring through decoupling of canopy background signal and reducing atmospheric influences.

Parameters:

  • optical (xarray.Dataset) –

    Dataset containing spectral bands: - nir (float32): Near-infrared reflectance [0-1] - red (float32): Red reflectance [0-1] - blue (float32): Blue reflectance [0-1]

  • g (float, default: 2.5 ) –

    Gain factor. Defaults to 2.5.

  • c1 (float, default: 6 ) –

    Aerosol resistance coefficient for red band. Defaults to 6.

  • c2 (float, default: 7.5 ) –

    Aerosol resistance coefficient for blue band. Defaults to 7.5.

  • l (float, default: 1 ) –

    Canopy background adjustment. Defaults to 1.

Returns:

  • xarray.DataArray

    xr.DataArray: EVI values with attributes: - long_name: "EVI"

Note

Formula: EVI = G * (NIR - Red) / (NIR + C1 * Red - C2 * Blue + L)

Input bands are clipped to [0, 1] to avoid numerical instabilities.

References

A Huete, K Didan, T Miura, E.P Rodriguez, X Gao, L.G Ferreira, Overview of the radiometric and biophysical performance of the MODIS vegetation indices, Remote Sensing of Environment, Volume 83, Issues 1-2, 2002, Pages 195-213, ISSN 0034-4257, https://doi.org/10.1016/S0034-4257(02)00096-2.

Example
from darts_preprocessing import calculate_evi

evi = calculate_evi(optical)
Source code in darts-preprocessing/src/darts_preprocessing/engineering/indices.py
@stopwatch("Calculating EVI", printer=logger.debug)
def calculate_evi(optical: xr.Dataset, g: float = 2.5, c1: float = 6, c2: float = 7.5, l: float = 1) -> xr.DataArray:  # noqa: E741
    """Calculate EVI (Enhanced Vegetation Index) from spectral bands.

    EVI is optimized to enhance vegetation signal with improved sensitivity in high biomass
    regions and improved vegetation monitoring through decoupling of canopy background signal
    and reducing atmospheric influences.

    Args:
        optical (xr.Dataset): Dataset containing spectral bands:
            - nir (float32): Near-infrared reflectance [0-1]
            - red (float32): Red reflectance [0-1]
            - blue (float32): Blue reflectance [0-1]
        g (float, optional): Gain factor. Defaults to 2.5.
        c1 (float, optional): Aerosol resistance coefficient for red band. Defaults to 6.
        c2 (float, optional): Aerosol resistance coefficient for blue band. Defaults to 7.5.
        l (float, optional): Canopy background adjustment. Defaults to 1.

    Returns:
        xr.DataArray: EVI values with attributes:
            - long_name: "EVI"

    Note:
        Formula: EVI = G * (NIR - Red) / (NIR + C1 * Red - C2 * Blue + L)

        Input bands are clipped to [0, 1] to avoid numerical instabilities.

    References:
        A Huete, K Didan, T Miura, E.P Rodriguez, X Gao, L.G Ferreira,
        Overview of the radiometric and biophysical performance of the MODIS vegetation indices,
        Remote Sensing of Environment, Volume 83, Issues 1-2, 2002, Pages 195-213, ISSN 0034-4257,
        https://doi.org/10.1016/S0034-4257(02)00096-2.

    Example:
        ```python
        from darts_preprocessing import calculate_evi

        evi = calculate_evi(optical)
        ```

    """
    nir = optical["nir"].clip(0, 1)
    r = optical["red"].clip(0, 1)
    b = optical["blue"].clip(0, 1)
    evi = g * (nir - r) / (nir + c1 * r - c2 * b + l)
    evi = evi.assign_attrs({"long_name": "EVI"}).rename("evi")
    return evi

calculate_exg

calculate_exg(optical: xarray.Dataset) -> xarray.DataArray

Calculate EXG (Excess Green Index) from spectral bands.

EXG highlights green vegetation by emphasizing the green band relative to red and blue. Widely used for crop/weed discrimination and precision agriculture.

Parameters:

  • optical (xarray.Dataset) –

    Dataset containing spectral bands: - green (float32): Green reflectance [0-1] - red (float32): Red reflectance [0-1] - blue (float32): Blue reflectance [0-1]

Returns:

  • xarray.DataArray

    xr.DataArray: EXG values with attributes: - long_name: "EXG"

Note

Formula: EXG = 2 * Green - Red - Blue

Input bands are clipped to [0, 1] to avoid numerical instabilities.

References

Upendar, K., Agrawal, K.N., Chandel, N.S. et al. Greenness identification using visible spectral colour indices for site specific weed management. Plant Physiol. Rep. 26, 179-187 (2021). https://doi.org/10.1007/s40502-020-00562-0

Example
from darts_preprocessing import calculate_exg

exg = calculate_exg(optical)

# Threshold for vegetation detection
vegetation = exg > 0
Source code in darts-preprocessing/src/darts_preprocessing/engineering/indices.py
@stopwatch("Calculating EXG", printer=logger.debug)
def calculate_exg(optical: xr.Dataset) -> xr.DataArray:
    """Calculate EXG (Excess Green Index) from spectral bands.

    EXG highlights green vegetation by emphasizing the green band relative to red and blue.
    Widely used for crop/weed discrimination and precision agriculture.

    Args:
        optical (xr.Dataset): Dataset containing spectral bands:
            - green (float32): Green reflectance [0-1]
            - red (float32): Red reflectance [0-1]
            - blue (float32): Blue reflectance [0-1]

    Returns:
        xr.DataArray: EXG values with attributes:
            - long_name: "EXG"

    Note:
        Formula: EXG = 2 * Green - Red - Blue

        Input bands are clipped to [0, 1] to avoid numerical instabilities.

    References:
        Upendar, K., Agrawal, K.N., Chandel, N.S. et al.
        Greenness identification using visible spectral colour indices for site specific weed management.
        Plant Physiol. Rep. 26, 179-187 (2021).
        https://doi.org/10.1007/s40502-020-00562-0

    Example:
        ```python
        from darts_preprocessing import calculate_exg

        exg = calculate_exg(optical)

        # Threshold for vegetation detection
        vegetation = exg > 0
        ```

    """
    g = optical["green"].clip(0, 1)
    r = optical["red"].clip(0, 1)
    b = optical["blue"].clip(0, 1)
    exg = 2 * g - r - b
    exg = exg.assign_attrs({"long_name": "EXG"}).rename("exg")
    return exg

calculate_gli

calculate_gli(optical: xarray.Dataset) -> xarray.DataArray

Calculate GLI (Green Leaf Index) from spectral bands.

GLI emphasizes green reflectance for vegetation detection using only visible bands. Suitable for RGB sensors and aerial imagery.

Parameters:

  • optical (xarray.Dataset) –

    Dataset containing spectral bands: - green (float32): Green reflectance - red (float32): Red reflectance - blue (float32): Blue reflectance

Returns:

  • xarray.DataArray

    xr.DataArray: GLI values with attributes: - long_name: "GLI"

Note

Formula: GLI = (2 * Green - Red - Blue) / (2 * Green + Red + Blue)

References

Eng, L.S., Ismail, R., Hashim, W., Baharum, A., 2019. The Use of VARI, GLI, and VIgreen Formulas in Detecting Vegetation In aerial Images. International Journal of Technology. Volume 10(7), pp. 1385-1394 https://doi.org/10.14716/ijtech.v10i7.3275

Example
from darts_preprocessing import calculate_gli

gli = calculate_gli(optical)
Source code in darts-preprocessing/src/darts_preprocessing/engineering/indices.py
@stopwatch("Calculating GLI", printer=logger.debug)
def calculate_gli(optical: xr.Dataset) -> xr.DataArray:
    """Calculate GLI (Green Leaf Index) from spectral bands.

    GLI emphasizes green reflectance for vegetation detection using only visible bands.
    Suitable for RGB sensors and aerial imagery.

    Args:
        optical (xr.Dataset): Dataset containing spectral bands:
            - green (float32): Green reflectance
            - red (float32): Red reflectance
            - blue (float32): Blue reflectance

    Returns:
        xr.DataArray: GLI values with attributes:
            - long_name: "GLI"

    Note:
        Formula: GLI = (2 * Green - Red - Blue) / (2 * Green + Red + Blue)

    References:
        Eng, L.S., Ismail, R., Hashim, W., Baharum, A., 2019.
        The Use of VARI, GLI, and VIgreen Formulas in Detecting Vegetation In aerial Images.
        International Journal of Technology. Volume 10(7), pp. 1385-1394
        https://doi.org/10.14716/ijtech.v10i7.3275

    Example:
        ```python
        from darts_preprocessing import calculate_gli

        gli = calculate_gli(optical)
        ```

    """
    g = optical["green"]
    r = optical["red"]
    b = optical["blue"]
    gli = (2 * g - r - b) / (2 * g + r + b)
    gli = gli.assign_attrs({"long_name": "GLI"}).rename("gli")
    return gli

calculate_gndvi

calculate_gndvi(
    optical: xarray.Dataset,
) -> xarray.DataArray

Calculate GNDVI (Green Normalized Difference Vegetation Index) from spectral bands.

GNDVI is similar to NDVI but uses the green band instead of red, making it more sensitive to chlorophyll content and useful for mid to late season vegetation monitoring.

Parameters:

  • optical (xarray.Dataset) –

    Dataset containing spectral bands: - nir (float32): Near-infrared reflectance [0-1] - green (float32): Green reflectance [0-1]

Returns:

  • xarray.DataArray

    xr.DataArray: GNDVI values with attributes: - long_name: "GNDVI" - Values clipped to [-1, 1] range

Note

Formula: GNDVI = (NIR - Green) / (NIR + Green)

Input bands are clipped to [0, 1] to avoid numerical instabilities.

Example
from darts_preprocessing import calculate_gndvi

gndvi = calculate_gndvi(optical)
Source code in darts-preprocessing/src/darts_preprocessing/engineering/indices.py
@stopwatch("Calculating GNDVI", printer=logger.debug)
def calculate_gndvi(optical: xr.Dataset) -> xr.DataArray:
    """Calculate GNDVI (Green Normalized Difference Vegetation Index) from spectral bands.

    GNDVI is similar to NDVI but uses the green band instead of red, making it more sensitive
    to chlorophyll content and useful for mid to late season vegetation monitoring.

    Args:
        optical (xr.Dataset): Dataset containing spectral bands:
            - nir (float32): Near-infrared reflectance [0-1]
            - green (float32): Green reflectance [0-1]

    Returns:
        xr.DataArray: GNDVI values with attributes:
            - long_name: "GNDVI"
            - Values clipped to [-1, 1] range

    Note:
        Formula: GNDVI = (NIR - Green) / (NIR + Green)

        Input bands are clipped to [0, 1] to avoid numerical instabilities.

    Example:
        ```python
        from darts_preprocessing import calculate_gndvi

        gndvi = calculate_gndvi(optical)
        ```

    """
    nir = optical["nir"].clip(0, 1)
    g = optical["green"].clip(0, 1)
    gndvi = (nir - g) / (nir + g)
    gndvi = gndvi.clip(-1, 1)
    gndvi = gndvi.assign_attrs({"long_name": "GNDVI"}).rename("gndvi")
    return gndvi

calculate_grvi

calculate_grvi(optical: xarray.Dataset) -> xarray.DataArray

Calculate GRVI (Green Red Vegetation Index) from spectral bands.

GRVI uses visible bands to detect vegetation, useful for high-resolution imagery where NIR may not be available or for specific vegetation discrimination tasks.

Parameters:

  • optical (xarray.Dataset) –

    Dataset containing spectral bands: - green (float32): Green reflectance [0-1] - red (float32): Red reflectance [0-1]

Returns:

  • xarray.DataArray

    xr.DataArray: GRVI values with attributes: - long_name: "GRVI"

Note

Formula: GRVI = (Green - Red) / (Green + Red)

Input bands are clipped to [0, 1] to avoid numerical instabilities.

References

Eng, L.S., Ismail, R., Hashim, W., Baharum, A., 2019. The Use of VARI, GLI, and VIgreen Formulas in Detecting Vegetation In aerial Images. International Journal of Technology. Volume 10(7), pp. 1385-1394 https://doi.org/10.14716/ijtech.v10i7.3275

Example
from darts_preprocessing import calculate_grvi

grvi = calculate_grvi(optical)
Source code in darts-preprocessing/src/darts_preprocessing/engineering/indices.py
@stopwatch("Calculating GRVI", printer=logger.debug)
def calculate_grvi(optical: xr.Dataset) -> xr.DataArray:
    """Calculate GRVI (Green Red Vegetation Index) from spectral bands.

    GRVI uses visible bands to detect vegetation, useful for high-resolution imagery
    where NIR may not be available or for specific vegetation discrimination tasks.

    Args:
        optical (xr.Dataset): Dataset containing spectral bands:
            - green (float32): Green reflectance [0-1]
            - red (float32): Red reflectance [0-1]

    Returns:
        xr.DataArray: GRVI values with attributes:
            - long_name: "GRVI"

    Note:
        Formula: GRVI = (Green - Red) / (Green + Red)

        Input bands are clipped to [0, 1] to avoid numerical instabilities.

    References:
        Eng, L.S., Ismail, R., Hashim, W., Baharum, A., 2019.
        The Use of VARI, GLI, and VIgreen Formulas in Detecting Vegetation In aerial Images.
        International Journal of Technology. Volume 10(7), pp. 1385-1394
        https://doi.org/10.14716/ijtech.v10i7.3275

    Example:
        ```python
        from darts_preprocessing import calculate_grvi

        grvi = calculate_grvi(optical)
        ```

    """
    g = optical["green"].clip(0, 1)
    r = optical["red"].clip(0, 1)
    grvi = (g - r) / (g + r)
    grvi = grvi.assign_attrs({"long_name": "GRVI"}).rename("grvi")
    return grvi

calculate_hillshade

calculate_hillshade(
    arcticdem_ds: xarray.Dataset,
    azimuth: int = 225,
    angle_altitude: int = 25,
) -> xarray.Dataset

Calculate hillshade of the terrain surface from an ArcticDEM Dataset.

Hillshade simulates illumination of terrain from a specified sun position, useful for visualization and terrain analysis.

Parameters:

  • arcticdem_ds (xarray.Dataset) –

    Dataset containing: - dem (float32): Digital Elevation Model

  • azimuth (int, default: 225 ) –

    Light source azimuth in degrees clockwise from north [0-360]. Defaults to 225 (southwest).

  • angle_altitude (int, default: 25 ) –

    Light source altitude angle in degrees above horizon [0-90]. Defaults to 25.

Returns:

  • xarray.Dataset

    xr.Dataset: Input Dataset with new data variable added:

  • xarray.Dataset
    • hillshade (float32): Illumination values [0-255], where 0 is shadow and 255 is fully lit.

    • long_name: "Hillshade"

    • description: Documents azimuth and angle_altitude used
    • source: "ArcticDEM"
Note

Common azimuth/altitude combinations:

  • 315°/45°: Classic northwest illumination (default for many GIS applications)
  • 225°/25°: Southwest with low sun (better for visualizing subtle features)

The hillshade calculation accounts for both slope and aspect of the terrain.

Example
from darts_preprocessing import calculate_hillshade

# Default southwest illumination
arcticdem_with_hs = calculate_hillshade(arcticdem_ds)

# Custom sun position
arcticdem_custom = calculate_hillshade(
    arcticdem_ds,
    azimuth=315,
    angle_altitude=45
)
Source code in darts-preprocessing/src/darts_preprocessing/engineering/arcticdem.py
@stopwatch.f("Calculating hillshade", printer=logger.debug, print_kwargs=["azimuth", "angle_altitude"])
def calculate_hillshade(arcticdem_ds: xr.Dataset, azimuth: int = 225, angle_altitude: int = 25) -> xr.Dataset:
    """Calculate hillshade of the terrain surface from an ArcticDEM Dataset.

    Hillshade simulates illumination of terrain from a specified sun position, useful
    for visualization and terrain analysis.

    Args:
        arcticdem_ds (xr.Dataset): Dataset containing:
            - dem (float32): Digital Elevation Model
        azimuth (int, optional): Light source azimuth in degrees clockwise from north [0-360].
            Defaults to 225 (southwest).
        angle_altitude (int, optional): Light source altitude angle in degrees above horizon [0-90].
            Defaults to 25.

    Returns:
        xr.Dataset: Input Dataset with new data variable added:

        - hillshade (float32): Illumination values [0-255], where 0 is shadow and 255 is fully lit.

            - long_name: "Hillshade"
            - description: Documents azimuth and angle_altitude used
            - source: "ArcticDEM"

    Note:
        Common azimuth/altitude combinations:

        - 315°/45°: Classic northwest illumination (default for many GIS applications)
        - 225°/25°: Southwest with low sun (better for visualizing subtle features)

        The hillshade calculation accounts for both slope and aspect of the terrain.

    Example:
        ```python
        from darts_preprocessing import calculate_hillshade

        # Default southwest illumination
        arcticdem_with_hs = calculate_hillshade(arcticdem_ds)

        # Custom sun position
        arcticdem_custom = calculate_hillshade(
            arcticdem_ds,
            azimuth=315,
            angle_altitude=45
        )
        ```

    """
    hillshade_da = hillshade(arcticdem_ds.dem, azimuth=azimuth, angle_altitude=angle_altitude)
    hillshade_da.attrs = {
        "long_name": "Hillshade",
        "units": "",
        "description": f"The hillshade based on azimuth {azimuth} and angle_altitude {angle_altitude}.",
        "source": "ArcticDEM",
    }
    arcticdem_ds["hillshade"] = hillshade_da.compute()
    return arcticdem_ds

calculate_ndvi

calculate_ndvi(optical: xarray.Dataset) -> xarray.Dataset

Calculate NDVI (Normalized Difference Vegetation Index) from spectral bands.

NDVI is a widely-used vegetation index that indicates photosynthetic activity and vegetation health. Values range from -1 to 1, with higher values indicating denser, healthier vegetation.

Parameters:

  • optical (xarray.Dataset) –

    Dataset containing spectral bands: - nir (float32): Near-infrared reflectance [0-1] - red (float32): Red reflectance [0-1]

Returns:

  • xarray.Dataset

    xr.DataArray: NDVI values with attributes: - long_name: "NDVI"

Note

Formula: NDVI = (NIR - Red) / (NIR + Red)

Input bands are clipped to [0, 1] before calculation to avoid numerical instabilities from negative reflectance values or sensor artifacts. The final result is also clipped to ensure values remain in the valid [-1, 1] range.

Example

Calculate NDVI from optical data:

from darts_preprocessing import calculate_ndvi

# optical contains 'nir' and 'red' bands
ndvi = calculate_ndvi(optical)

# Mask vegetation
vegetation_mask = ndvi > 0.3
Source code in darts-preprocessing/src/darts_preprocessing/engineering/indices.py
@stopwatch("Calculating NDVI", printer=logger.debug)
def calculate_ndvi(optical: xr.Dataset) -> xr.Dataset:
    """Calculate NDVI (Normalized Difference Vegetation Index) from spectral bands.

    NDVI is a widely-used vegetation index that indicates photosynthetic activity and
    vegetation health. Values range from -1 to 1, with higher values indicating denser,
    healthier vegetation.

    Args:
        optical (xr.Dataset): Dataset containing spectral bands:
            - nir (float32): Near-infrared reflectance [0-1]
            - red (float32): Red reflectance [0-1]

    Returns:
        xr.DataArray: NDVI values with attributes:
            - long_name: "NDVI"

    Note:
        Formula: NDVI = (NIR - Red) / (NIR + Red)

        Input bands are clipped to [0, 1] before calculation to avoid numerical instabilities
        from negative reflectance values or sensor artifacts. The final result is also clipped
        to ensure values remain in the valid [-1, 1] range.

    Example:
        Calculate NDVI from optical data:

        ```python
        from darts_preprocessing import calculate_ndvi

        # optical contains 'nir' and 'red' bands
        ndvi = calculate_ndvi(optical)

        # Mask vegetation
        vegetation_mask = ndvi > 0.3
        ```

    """
    nir = optical["nir"].clip(0, 1)
    r = optical["red"].clip(0, 1)
    ndvi = (nir - r) / (nir + r)
    ndvi = ndvi.clip(-1, 1)
    ndvi = ndvi.assign_attrs({"long_name": "NDVI"}).rename("ndvi")
    return ndvi

calculate_nrvi

calculate_nrvi(optical: xarray.Dataset) -> xarray.DataArray

Calculate NRVI (Normalized Ratio Vegetation Index) from spectral bands.

NRVI normalizes RVI to a range similar to NDVI, making it more comparable across different vegetation densities.

Parameters:

  • optical (xarray.Dataset) –

    Dataset containing: - rvi (float32): RVI values (will be calculated if not present) - nir, red (float32): Required if RVI not present

Returns:

  • xarray.DataArray

    xr.DataArray: NRVI values with attributes: - long_name: "NRVI"

Note

Formula: NRVI = (RVI - 1) / (RVI + 1) where RVI = Red / NIR

If RVI is already in the dataset, it will be reused to avoid recalculation.

Example
from darts_preprocessing import calculate_nrvi

nrvi = calculate_nrvi(optical)
Source code in darts-preprocessing/src/darts_preprocessing/engineering/indices.py
@stopwatch("Calculating NRVI", printer=logger.debug)
def calculate_nrvi(optical: xr.Dataset) -> xr.DataArray:
    """Calculate NRVI (Normalized Ratio Vegetation Index) from spectral bands.

    NRVI normalizes RVI to a range similar to NDVI, making it more comparable across
    different vegetation densities.

    Args:
        optical (xr.Dataset): Dataset containing:
            - rvi (float32): RVI values (will be calculated if not present)
            - nir, red (float32): Required if RVI not present

    Returns:
        xr.DataArray: NRVI values with attributes:
            - long_name: "NRVI"

    Note:
        Formula: NRVI = (RVI - 1) / (RVI + 1)
        where RVI = Red / NIR

        If RVI is already in the dataset, it will be reused to avoid recalculation.

    Example:
        ```python
        from darts_preprocessing import calculate_nrvi

        nrvi = calculate_nrvi(optical)
        ```

    """
    rvi = optical["rvi"] if "rvi" in optical else calculate_rvi(optical)
    nrvi = (rvi - 1) / (rvi + 1)
    nrvi = nrvi.assign_attrs({"long_name": "NRVI"}).rename("nrvi")
    return nrvi

calculate_rvi

calculate_rvi(optical: xarray.Dataset) -> xarray.DataArray

Calculate RVI (Ratio Vegetation Index) from spectral bands.

RVI is a simple ratio index sensitive to vegetation amount and biomass. Values typically range from 0 to over 30 for dense vegetation.

Parameters:

  • optical (xarray.Dataset) –

    Dataset containing spectral bands: - nir (float32): Near-infrared reflectance [0-1] - red (float32): Red reflectance [0-1]

Returns:

  • xarray.DataArray

    xr.DataArray: RVI values with attributes: - long_name: "RVI"

Note

Formula: RVI = Red / NIR

Input bands are clipped to [0, 1] to avoid numerical instabilities.

References

Lemenkova, Polina. "Hyperspectral Vegetation Indices Calculated by Qgis Using Landsat Tm Image: a Case Study of Northern Iceland" Advanced Research in Life Sciences, vol. 4, no. 1, Sciendo, 2020, pp. 70-78. https://doi.org/10.2478/arls-2020-0021

Example
from darts_preprocessing import calculate_rvi

rvi = calculate_rvi(optical)
Source code in darts-preprocessing/src/darts_preprocessing/engineering/indices.py
@stopwatch("Calculating RVI", printer=logger.debug)
def calculate_rvi(optical: xr.Dataset) -> xr.DataArray:
    """Calculate RVI (Ratio Vegetation Index) from spectral bands.

    RVI is a simple ratio index sensitive to vegetation amount and biomass. Values typically
    range from 0 to over 30 for dense vegetation.

    Args:
        optical (xr.Dataset): Dataset containing spectral bands:
            - nir (float32): Near-infrared reflectance [0-1]
            - red (float32): Red reflectance [0-1]

    Returns:
        xr.DataArray: RVI values with attributes:
            - long_name: "RVI"

    Note:
        Formula: RVI = Red / NIR

        Input bands are clipped to [0, 1] to avoid numerical instabilities.

    References:
        Lemenkova, Polina.
        "Hyperspectral Vegetation Indices Calculated by Qgis Using Landsat Tm Image: a Case Study of Northern Iceland"
        Advanced Research in Life Sciences, vol. 4, no. 1, Sciendo, 2020, pp. 70-78.
        https://doi.org/10.2478/arls-2020-0021

    Example:
        ```python
        from darts_preprocessing import calculate_rvi

        rvi = calculate_rvi(optical)
        ```

    """
    nir = optical["nir"].clip(0, 1)
    r = optical["red"].clip(0, 1)
    rvi = r / nir
    rvi = rvi.assign_attrs({"long_name": "RVI"}).rename("rvi")
    return rvi

calculate_savi

calculate_savi(
    optical: xarray.Dataset, s: float = 0.5
) -> xarray.DataArray

Calculate SAVI (Soil Adjusted Vegetation Index) from spectral bands.

SAVI minimizes soil brightness influences using a soil-brightness correction factor. Useful in areas with sparse vegetation or exposed soil.

Parameters:

  • optical (xarray.Dataset) –

    Dataset containing: - ndvi (float32): NDVI values (will be calculated if not present) - nir, red (float32): Required if NDVI not present

  • s (float, default: 0.5 ) –

    Soil adjustment factor. Common values: - 0.5: moderate vegetation cover (default) - 0.25: high vegetation cover - 1.0: low vegetation cover

Returns:

  • xarray.DataArray

    xr.DataArray: SAVI values with attributes: - long_name: "SAVI"

Note

Formula: SAVI = NDVI * (1 + s)

References

Lemenkova, Polina. "Hyperspectral Vegetation Indices Calculated by Qgis Using Landsat Tm Image: a Case Study of Northern Iceland" Advanced Research in Life Sciences, vol. 4, no. 1, Sciendo, 2020, pp. 70-78. https://doi.org/10.2478/arls-2020-0021

Example
from darts_preprocessing import calculate_savi

# For sparse vegetation
savi = calculate_savi(optical, s=1.0)
Source code in darts-preprocessing/src/darts_preprocessing/engineering/indices.py
@stopwatch("Calculating SAVI", printer=logger.debug)
def calculate_savi(optical: xr.Dataset, s: float = 0.5) -> xr.DataArray:
    """Calculate SAVI (Soil Adjusted Vegetation Index) from spectral bands.

    SAVI minimizes soil brightness influences using a soil-brightness correction factor.
    Useful in areas with sparse vegetation or exposed soil.

    Args:
        optical (xr.Dataset): Dataset containing:
            - ndvi (float32): NDVI values (will be calculated if not present)
            - nir, red (float32): Required if NDVI not present
        s (float, optional): Soil adjustment factor. Common values:
            - 0.5: moderate vegetation cover (default)
            - 0.25: high vegetation cover
            - 1.0: low vegetation cover

    Returns:
        xr.DataArray: SAVI values with attributes:
            - long_name: "SAVI"

    Note:
        Formula: SAVI = NDVI * (1 + s)

    References:
        Lemenkova, Polina.
        "Hyperspectral Vegetation Indices Calculated by Qgis Using Landsat Tm Image: a Case Study of Northern Iceland"
        Advanced Research in Life Sciences, vol. 4, no. 1, Sciendo, 2020, pp. 70-78.
        https://doi.org/10.2478/arls-2020-0021

    Example:
        ```python
        from darts_preprocessing import calculate_savi

        # For sparse vegetation
        savi = calculate_savi(optical, s=1.0)
        ```

    """
    ndvi = optical["ndvi"] if "ndvi" in optical else calculate_ndvi(optical)
    savi = ndvi * (1 + s)
    savi = savi.assign_attrs({"long_name": "SAVI"}).rename("savi")
    return savi

calculate_slope

calculate_slope(
    arcticdem_ds: xarray.Dataset,
) -> xarray.Dataset

Calculate slope of the terrain surface from an ArcticDEM Dataset.

Slope represents the rate of change of elevation, indicating terrain steepness.

Parameters:

  • arcticdem_ds (xarray.Dataset) –

    Dataset containing: - dem (float32): Digital Elevation Model

Returns:

  • xarray.Dataset

    xr.Dataset: Input Dataset with new data variable added:

  • xarray.Dataset
    • slope (float32): Slope in degrees [0-90].

    • long_name: "Slope"

    • units: "degrees"
    • source: "ArcticDEM"
Note

Slope is calculated using finite difference methods on the DEM. Values approaching 90° indicate near-vertical terrain.

Example
from darts_preprocessing import calculate_slope

arcticdem_with_slope = calculate_slope(arcticdem_ds)

# Mask steep terrain
steep_areas = arcticdem_with_slope.slope > 30
Source code in darts-preprocessing/src/darts_preprocessing/engineering/arcticdem.py
@stopwatch("Calculating slope", printer=logger.debug)
def calculate_slope(arcticdem_ds: xr.Dataset) -> xr.Dataset:
    """Calculate slope of the terrain surface from an ArcticDEM Dataset.

    Slope represents the rate of change of elevation, indicating terrain steepness.

    Args:
        arcticdem_ds (xr.Dataset): Dataset containing:
            - dem (float32): Digital Elevation Model

    Returns:
        xr.Dataset: Input Dataset with new data variable added:

        - slope (float32): Slope in degrees [0-90].

            - long_name: "Slope"
            - units: "degrees"
            - source: "ArcticDEM"

    Note:
        Slope is calculated using finite difference methods on the DEM.
        Values approaching 90° indicate near-vertical terrain.

    Example:
        ```python
        from darts_preprocessing import calculate_slope

        arcticdem_with_slope = calculate_slope(arcticdem_ds)

        # Mask steep terrain
        steep_areas = arcticdem_with_slope.slope > 30
        ```

    """
    slope_deg = slope(arcticdem_ds.dem)
    slope_deg.attrs = {
        "long_name": "Slope",
        "units": "degrees",
        "description": "The slope of the terrain surface in degrees.",
        "source": "ArcticDEM",
    }
    arcticdem_ds["slope"] = slope_deg.compute()
    return arcticdem_ds

calculate_spyndex

calculate_spyndex(
    ds_optical: xarray.Dataset,
    index: str,
    **kwargs: dict[str, typing.Any],
) -> xarray.DataArray

Compute a spectral index using the spyndex library.

This wrapper provides access to 200+ spectral indices from the spyndex library with automatic band mapping and parameter handling.

Parameters:

  • ds_optical (xarray.Dataset) –

    Dataset containing spectral bands. Band names should match spyndex common names (e.g., 'red', 'nir', 'blue', 'green').

  • index (str) –

    Name of the spectral index to compute. Run spyndex.indices to see all available indices (e.g., 'NDVI', 'EVI', 'SAVI').

  • **kwargs (dict[str, typing.Any], default: {} ) –

    Additional parameters or band overrides: - Band values: Override bands from ds_optical with scalar or array values (e.g., red=0.2) - Constants: Override default values for index-specific constants (e.g., L=0.5 for SAVI)

Returns:

  • xarray.DataArray

    xr.DataArray: Computed spectral index with attributes: - source: "spyndex" - long_name: Full name of the index - reference: Citation for the index - formula: Mathematical formula - author: Index contributor

Raises:

  • ValueError

    If a required band is missing from both ds_optical and kwargs.

  • ValueError

    If all bands are provided as scalar values.

Note

Band resolution priority:

  1. Bands in ds_optical (with common_name matching spyndex.bands)
  2. Values in kwargs (override ds_optical bands)
  3. Default values for constants (from spyndex.constants)

All optical bands are automatically clipped to [0, 1] before calculation.

At least one band must come from ds_optical as a DataArray (not all scalars).

Example

Compute various indices with spyndex:

from darts_preprocessing import calculate_spyndex
import spyndex

# List all available indices
print(list(spyndex.indices.keys()))

# Basic NDVI
ndvi = calculate_spyndex(ds_optical, "NDVI")

# SAVI with custom soil adjustment factor
savi = calculate_spyndex(ds_optical, "SAVI", L=0.5)

# EVI with custom parameters
evi = calculate_spyndex(ds_optical, "EVI", g=2.5, C1=6, C2=7.5, L=1)
Source code in darts-preprocessing/src/darts_preprocessing/engineering/spyndex.py
@stopwatch.f("Computing spectral index with spyndex", printer=logger.debug, print_kwargs=["index"])
def calculate_spyndex(ds_optical: xr.Dataset, index: str, **kwargs: dict[str, Any]) -> xr.DataArray:
    """Compute a spectral index using the spyndex library.

    This wrapper provides access to 200+ spectral indices from the spyndex library with
    automatic band mapping and parameter handling.

    Args:
        ds_optical (xr.Dataset): Dataset containing spectral bands. Band names should match
            spyndex common names (e.g., 'red', 'nir', 'blue', 'green').
        index (str): Name of the spectral index to compute. Run `spyndex.indices` to see
            all available indices (e.g., 'NDVI', 'EVI', 'SAVI').
        **kwargs: Additional parameters or band overrides:
            - Band values: Override bands from ds_optical with scalar or array values (e.g., red=0.2)
            - Constants: Override default values for index-specific constants (e.g., L=0.5 for SAVI)

    Returns:
        xr.DataArray: Computed spectral index with attributes:
            - source: "spyndex"
            - long_name: Full name of the index
            - reference: Citation for the index
            - formula: Mathematical formula
            - author: Index contributor

    Raises:
        ValueError: If a required band is missing from both ds_optical and kwargs.
        ValueError: If all bands are provided as scalar values.

    Note:
        Band resolution priority:

        1. Bands in ds_optical (with common_name matching spyndex.bands)
        2. Values in kwargs (override ds_optical bands)
        3. Default values for constants (from spyndex.constants)

        All optical bands are automatically clipped to [0, 1] before calculation.

        At least one band must come from ds_optical as a DataArray (not all scalars).

    Example:
        Compute various indices with spyndex:

        ```python
        from darts_preprocessing import calculate_spyndex
        import spyndex

        # List all available indices
        print(list(spyndex.indices.keys()))

        # Basic NDVI
        ndvi = calculate_spyndex(ds_optical, "NDVI")

        # SAVI with custom soil adjustment factor
        savi = calculate_spyndex(ds_optical, "SAVI", L=0.5)

        # EVI with custom parameters
        evi = calculate_spyndex(ds_optical, "EVI", g=2.5, C1=6, C2=7.5, L=1)
        ```

    """
    index: spyndex.axioms.SpectralIndex = spyndex.indices[index]

    params = {}
    atleast_one_dataarray = False
    for band in index.bands:
        is_constant = band in spyndex.constants
        is_in_kwargs = band in kwargs

        is_in_optical = False
        if not is_constant:
            spyndex_band: spyndex.axioms.Band = spyndex.bands.get(band)
            is_in_optical = spyndex_band is not None and spyndex_band.common_name in ds_optical

        # Case 4: band is missing -> error
        if not (is_constant or is_in_kwargs or is_in_optical):
            raise ValueError(f"Band '{band}' is required for index '{index.short_name}' but not provided in {kwargs=}.")
        # Case 3: band is in kwargs
        if is_in_kwargs:
            params[band] = kwargs[band]
            continue
        # Case 2: band is a constant
        if is_constant:
            constant: spyndex.axioms.Constant = spyndex.constants[band]
            params[band] = constant.default
            continue
        # Case 1: band is in optical
        params[band] = ds_optical[spyndex_band.common_name].clip(0, 1)
        atleast_one_dataarray = True

    if not atleast_one_dataarray:
        raise ValueError(f"At least one band must be a DataArray, got {params=}. Did you pass all bands as scalars?")

    idx = index.compute(params)
    assert isinstance(idx, xr.DataArray)

    idx = idx.assign_attrs(
        {
            "source": "spyndex",
            "long_name": index.long_name,
            "reference": index.reference,
            "formula": index.formula,
            "author": index.contributor,
        }
    ).rename(index.short_name.lower())
    return idx

calculate_terrain_ruggedness_index

calculate_terrain_ruggedness_index(
    arcticdem_ds: xarray.Dataset, neighborhood_size: int
) -> xarray.Dataset

Calculate the Terrain Ruggedness Index (TRI) from an ArcticDEM Dataset.

TRI quantifies topographic heterogeneity by measuring elevation differences between a cell and its surrounding cells. Higher values indicate more rugged terrain.

Parameters:

  • arcticdem_ds (xarray.Dataset) –

    Dataset containing: - dem (float32): Digital Elevation Model

  • neighborhood_size (int) –

    Neighborhood window size for TRI calculation. Can be specified as string with units (e.g., "100m" or "10px").

Returns:

  • xarray.Dataset

    xr.Dataset: Input Dataset with new data variable added:

  • xarray.Dataset
    • tri (float32): Terrain Ruggedness Index in meters.

    • long_name: "Terrain Ruggedness Index"

    • units: "m"
    • description: Documents kernel size used
    • source: "ArcticDEM"
Note

TRI methodology from Riley et al (1999):

  1. Measures elevation difference from center cell to 8 surrounding cells
  2. Squares and averages these differences
  3. Takes square root for final TRI value

The neighborhood_size parameter controls the kernel size. A square kernel is used, with the actual size rounded to the nearest pixel based on DEM resolution.

References

Riley, S.J., DeGloria, S.D., Elliot, R., 1999. A Terrain Ruggedness Index That Quantifies Topographic Heterogeneity. Intermountain Journal of Sciences, vol. 5, No. 1-4, pp. 23-27.

Example
from darts_preprocessing import calculate_terrain_ruggedness_index

# Calculate TRI with 100m neighborhood
arcticdem_with_tri = calculate_terrain_ruggedness_index(
    arcticdem_ds=arcticdem,
    neighborhood_size=100
)

# Identify highly rugged terrain
rugged = arcticdem_with_tri.tri > 10
Source code in darts-preprocessing/src/darts_preprocessing/engineering/arcticdem.py
@stopwatch("Calculating TRI", printer=logger.debug)
def calculate_terrain_ruggedness_index(arcticdem_ds: xr.Dataset, neighborhood_size: int) -> xr.Dataset:
    """Calculate the Terrain Ruggedness Index (TRI) from an ArcticDEM Dataset.

    TRI quantifies topographic heterogeneity by measuring elevation differences between
    a cell and its surrounding cells. Higher values indicate more rugged terrain.

    Args:
        arcticdem_ds (xr.Dataset): Dataset containing:
            - dem (float32): Digital Elevation Model
        neighborhood_size (int): Neighborhood window size for TRI calculation.
            Can be specified as string with units (e.g., "100m" or "10px").

    Returns:
        xr.Dataset: Input Dataset with new data variable added:

        - tri (float32): Terrain Ruggedness Index in meters.

            - long_name: "Terrain Ruggedness Index"
            - units: "m"
            - description: Documents kernel size used
            - source: "ArcticDEM"

    Note:
        TRI methodology from Riley et al (1999):

        1. Measures elevation difference from center cell to 8 surrounding cells
        2. Squares and averages these differences
        3. Takes square root for final TRI value

        The neighborhood_size parameter controls the kernel size. A square kernel is used,
        with the actual size rounded to the nearest pixel based on DEM resolution.

    References:
        Riley, S.J., DeGloria, S.D., Elliot, R., 1999.
        A Terrain Ruggedness Index That Quantifies Topographic Heterogeneity.
        Intermountain Journal of Sciences, vol. 5, No. 1-4, pp. 23-27.

    Example:
        ```python
        from darts_preprocessing import calculate_terrain_ruggedness_index

        # Calculate TRI with 100m neighborhood
        arcticdem_with_tri = calculate_terrain_ruggedness_index(
            arcticdem_ds=arcticdem,
            neighborhood_size=100
        )

        # Identify highly rugged terrain
        rugged = arcticdem_with_tri.tri > 10
        ```

    """
    cellsize_x, _cellsize_y = convolution.calc_cellsize(arcticdem_ds.dem)

    neighborhood_size: Distance = Distance.parse(neighborhood_size, cellsize_x)

    kernel = np.ones((neighborhood_size.pixel, neighborhood_size.pixel), dtype=float)
    kernel[neighborhood_size.pixel // 2, neighborhood_size.pixel // 2] = 0  # Set the center cell to 0
    kernel = convolution.custom_kernel(kernel)
    logger.debug(f"Calculating Terrain Ruggedness Index with square kernel of radius {neighborhood_size} cells.")

    # Change dtype of kernel to float32 since we don't need the precision and f32 is faster
    kernel = kernel.astype("float32")

    if has_cuda_and_cupy() and arcticdem_ds.cupy.is_cupy:
        kernel = cp.asarray(kernel)

    # Kernel compute of TRI as described here:
    # https://sites.utexas.edu/utarima/files/2024/02/terrain_roughness_index.pdf
    dem_squared = arcticdem_ds.dem**2
    focal_sum = convolution.convolution_2d(arcticdem_ds.dem, kernel)
    focal_sum_squared = convolution.convolution_2d(dem_squared, kernel)
    tri = np.sqrt((kernel.size - 1) * dem_squared - 2 * arcticdem_ds.dem * focal_sum + focal_sum_squared)

    tri.attrs = {
        "long_name": "Terrain Ruggedness Index",
        "units": "m",
        "description": (
            "The difference between the elevation of a cell and the mean elevation of the surrounding"
            f" cells within a square kernel of radius {neighborhood_size} cells."
        ),
        "source": "ArcticDEM",
    }

    arcticdem_ds["tri"] = tri.compute()
    return arcticdem_ds

calculate_tgi

calculate_tgi(optical: xarray.Dataset) -> xarray.DataArray

Calculate TGI (Triangular Greenness Index) from spectral bands.

TGI is sensitive to chlorophyll content and can estimate leaf area index without calibration. Particularly useful for crop monitoring.

Parameters:

  • optical (xarray.Dataset) –

    Dataset containing spectral bands: - red (float32): Red reflectance [0-1] - green (float32): Green reflectance [0-1] - blue (float32): Blue reflectance [0-1]

Returns:

  • xarray.DataArray

    xr.DataArray: TGI values with attributes: - long_name: "TGI"

Note

Formula: TGI = -0.5 * [190 * (Red - Green) - 120 * (Red - Blue)]

Input bands are clipped to [0, 1] to avoid numerical instabilities.

References

E. Raymond Hunt, Paul C. Doraiswamy, James E. McMurtrey, Craig S.T. Daughtry, Eileen M. Perry, Bakhyt Akhmedov, A visible band index for remote sensing leaf chlorophyll content at the canopy scale, International Journal of Applied Earth Observation and Geoinformation, Volume 21, 2013, Pages 103-112, ISSN 1569-8432, https://doi.org/10.1016/j.jag.2012.07.020.

Example
from darts_preprocessing import calculate_tgi

tgi = calculate_tgi(optical)
Source code in darts-preprocessing/src/darts_preprocessing/engineering/indices.py
@stopwatch("Calculating TGI", printer=logger.debug)
def calculate_tgi(optical: xr.Dataset) -> xr.DataArray:
    """Calculate TGI (Triangular Greenness Index) from spectral bands.

    TGI is sensitive to chlorophyll content and can estimate leaf area index without
    calibration. Particularly useful for crop monitoring.

    Args:
        optical (xr.Dataset): Dataset containing spectral bands:
            - red (float32): Red reflectance [0-1]
            - green (float32): Green reflectance [0-1]
            - blue (float32): Blue reflectance [0-1]

    Returns:
        xr.DataArray: TGI values with attributes:
            - long_name: "TGI"

    Note:
        Formula: TGI = -0.5 * [190 * (Red - Green) - 120 * (Red - Blue)]

        Input bands are clipped to [0, 1] to avoid numerical instabilities.

    References:
        E. Raymond Hunt, Paul C. Doraiswamy, James E. McMurtrey, Craig S.T. Daughtry, Eileen M. Perry, Bakhyt Akhmedov,
        A visible band index for remote sensing leaf chlorophyll content at the canopy scale,
        International Journal of Applied Earth Observation and Geoinformation,
        Volume 21, 2013, Pages 103-112, ISSN 1569-8432,
        https://doi.org/10.1016/j.jag.2012.07.020.

    Example:
        ```python
        from darts_preprocessing import calculate_tgi

        tgi = calculate_tgi(optical)
        ```

    """
    r = optical["red"].clip(0, 1)
    g = optical["green"].clip(0, 1)
    b = optical["blue"].clip(0, 1)
    tgi = -0.5 * (190 * (r - g) - 120 * (r - b))
    tgi = tgi.assign_attrs({"long_name": "TGI"}).rename("tgi")
    return tgi

calculate_topographic_position_index

calculate_topographic_position_index(
    arcticdem_ds: xarray.Dataset,
    outer_radius: int,
    inner_radius: int,
) -> xarray.Dataset

Calculate the Topographic Position Index (TPI) from an ArcticDEM Dataset.

TPI measures the relative topographic position of a point by comparing its elevation to the mean elevation of the surrounding neighborhood. Positive values indicate higher positions (ridges), negative values indicate lower positions (valleys).

Parameters:

  • arcticdem_ds (xarray.Dataset) –

    The ArcticDEM Dataset containing the 'dem' variable (float32).

  • outer_radius (int) –

    The outer radius of the neighborhood in meters. Can also be specified as string with units (e.g., "100m" or "10px").

  • inner_radius (int) –

    The inner radius of the annulus kernel in meters. If > 0, creates an annulus (ring) instead of a circle. Set to 0 for a circular kernel. Can also be specified as string with units (e.g., "50m" or "5px").

Returns:

  • xarray.Dataset

    xr.Dataset: The input Dataset with a new data variable added:

  • xarray.Dataset
    • tpi (float32): Topographic Position Index values.

    • long_name: "Topographic Position Index (TPI)"

    • description: Details about the kernel used
Note

Kernel shape combinations:

  • inner_radius=0: Circular kernel comparing each cell to all neighbors within outer_radius
  • inner_radius>0: Annulus kernel comparing each cell to neighbors in a ring between inner_radius and outer_radius. Useful for multi-scale terrain analysis.

The actual radii used are rounded to the nearest pixel based on the DEM resolution.

Example

Calculate TPI with circular and annulus kernels:

from darts_preprocessing import calculate_topographic_position_index

# Circular kernel (100m radius)
arcticdem_with_tpi = calculate_topographic_position_index(
    arcticdem_ds=arcticdem,
    outer_radius=100,
    inner_radius=0
)

# Annulus kernel (50-100m ring)
arcticdem_multi_scale = calculate_topographic_position_index(
    arcticdem_ds=arcticdem,
    outer_radius=100,
    inner_radius=50
)
Source code in darts-preprocessing/src/darts_preprocessing/engineering/arcticdem.py
@stopwatch.f("Calculating TPI", printer=logger.debug, print_kwargs=["outer_radius", "inner_radius"])
def calculate_topographic_position_index(arcticdem_ds: xr.Dataset, outer_radius: int, inner_radius: int) -> xr.Dataset:
    """Calculate the Topographic Position Index (TPI) from an ArcticDEM Dataset.

    TPI measures the relative topographic position of a point by comparing its elevation to
    the mean elevation of the surrounding neighborhood. Positive values indicate higher
    positions (ridges), negative values indicate lower positions (valleys).

    Args:
        arcticdem_ds (xr.Dataset): The ArcticDEM Dataset containing the 'dem' variable (float32).
        outer_radius (int): The outer radius of the neighborhood in meters.
            Can also be specified as string with units (e.g., "100m" or "10px").
        inner_radius (int): The inner radius of the annulus kernel in meters.
            If > 0, creates an annulus (ring) instead of a circle. Set to 0 for a circular kernel.
            Can also be specified as string with units (e.g., "50m" or "5px").

    Returns:
        xr.Dataset: The input Dataset with a new data variable added:

        - tpi (float32): Topographic Position Index values.

            - long_name: "Topographic Position Index (TPI)"
            - description: Details about the kernel used

    Note:
        Kernel shape combinations:

        - inner_radius=0: Circular kernel comparing each cell to all neighbors within outer_radius
        - inner_radius>0: Annulus kernel comparing each cell to neighbors in a ring between
          inner_radius and outer_radius. Useful for multi-scale terrain analysis.

        The actual radii used are rounded to the nearest pixel based on the DEM resolution.

    Example:
        Calculate TPI with circular and annulus kernels:

        ```python
        from darts_preprocessing import calculate_topographic_position_index

        # Circular kernel (100m radius)
        arcticdem_with_tpi = calculate_topographic_position_index(
            arcticdem_ds=arcticdem,
            outer_radius=100,
            inner_radius=0
        )

        # Annulus kernel (50-100m ring)
        arcticdem_multi_scale = calculate_topographic_position_index(
            arcticdem_ds=arcticdem,
            outer_radius=100,
            inner_radius=50
        )
        ```

    """
    cellsize_x, cellsize_y = convolution.calc_cellsize(arcticdem_ds.dem)  # Should be equal to the resolution of the DEM
    # Use an annulus kernel if inner_radius is greater than 0
    outer_radius: Distance = Distance.parse(outer_radius, cellsize_x)
    if inner_radius > 0:
        inner_radius: Distance = Distance.parse(inner_radius, cellsize_x)
        kernel = convolution.annulus_kernel(cellsize_x, cellsize_y, outer_radius.meter, inner_radius.meter)
        attr_cell_description = (
            f"within a ring at a distance of {inner_radius}-{outer_radius} cells away from the focal cell."
        )
        logger.debug(
            f"Calculating Topographic Position Index with annulus kernel of {inner_radius}-{outer_radius} cells."
        )
    else:
        kernel = convolution.circle_kernel(cellsize_x, cellsize_y, outer_radius.meter)
        attr_cell_description = f"within a circle at a distance of {outer_radius} cells away from the focal cell."
        logger.debug(f"Calculating Topographic Position Index with circle kernel of {outer_radius} cells.")

    # Change dtype of kernel to float32 since we don't need the precision and f32 is faster
    kernel = kernel.astype("float32")

    if has_cuda_and_cupy() and arcticdem_ds.cupy.is_cupy:
        kernel = cp.asarray(kernel)

    tpi = arcticdem_ds.dem - convolution.convolution_2d(arcticdem_ds.dem, kernel) / kernel.sum()
    tpi.attrs = {
        "long_name": "Topographic Position Index",
        "units": "m",
        "description": "The difference between the elevation of a cell and the mean elevation of the surrounding"
        f"cells {attr_cell_description}",
        "source": "ArcticDEM",
    }

    arcticdem_ds["tpi"] = tpi.compute()

    return arcticdem_ds

calculate_ttvi

calculate_ttvi(optical: xarray.Dataset) -> xarray.DataArray

Calculate TTVI (Thiam's Transformed Vegetation Index) from spectral bands.

TTVI applies an absolute value transformation to NDVI before the square root, making it suitable for both positive and negative NDVI values.

Parameters:

  • optical (xarray.Dataset) –

    Dataset containing: - ndvi (float32): NDVI values (will be calculated if not present) - nir, red (float32): Required if NDVI not present

Returns:

  • xarray.DataArray

    xr.DataArray: TTVI values with attributes: - long_name: "TTVI"

Note

Formula: TTVI = sqrt(|NDVI| + 0.5)

If NDVI is already in the dataset, it will be reused to avoid recalculation.

References

Lemenkova, Polina. "Hyperspectral Vegetation Indices Calculated by Qgis Using Landsat Tm Image: a Case Study of Northern Iceland" Advanced Research in Life Sciences, vol. 4, no. 1, Sciendo, 2020, pp. 70-78. https://doi.org/10.2478/arls-2020-0021

Example
from darts_preprocessing import calculate_ttvi

ttvi = calculate_ttvi(optical)
Source code in darts-preprocessing/src/darts_preprocessing/engineering/indices.py
@stopwatch("Calculating TTVI", printer=logger.debug)
def calculate_ttvi(optical: xr.Dataset) -> xr.DataArray:
    """Calculate TTVI (Thiam's Transformed Vegetation Index) from spectral bands.

    TTVI applies an absolute value transformation to NDVI before the square root,
    making it suitable for both positive and negative NDVI values.

    Args:
        optical (xr.Dataset): Dataset containing:
            - ndvi (float32): NDVI values (will be calculated if not present)
            - nir, red (float32): Required if NDVI not present

    Returns:
        xr.DataArray: TTVI values with attributes:
            - long_name: "TTVI"

    Note:
        Formula: TTVI = sqrt(|NDVI| + 0.5)

        If NDVI is already in the dataset, it will be reused to avoid recalculation.

    References:
        Lemenkova, Polina.
        "Hyperspectral Vegetation Indices Calculated by Qgis Using Landsat Tm Image: a Case Study of Northern Iceland"
        Advanced Research in Life Sciences, vol. 4, no. 1, Sciendo, 2020, pp. 70-78.
        https://doi.org/10.2478/arls-2020-0021

    Example:
        ```python
        from darts_preprocessing import calculate_ttvi

        ttvi = calculate_ttvi(optical)
        ```

    """
    ndvi = optical["ndvi"] if "ndvi" in optical else calculate_ndvi(optical)
    ttvi = np.sqrt(np.abs(ndvi) + 0.5)
    ttvi = ttvi.assign_attrs({"long_name": "TTVI"}).rename("ttvi")
    return ttvi

calculate_tvi

calculate_tvi(optical: xarray.Dataset) -> xarray.DataArray

Calculate TVI (Transformed Vegetation Index) from spectral bands.

TVI applies a transformation to NDVI to enhance contrast and improve discrimination of vegetation conditions.

Parameters:

  • optical (xarray.Dataset) –

    Dataset containing: - ndvi (float32): NDVI values (will be calculated if not present) - nir, red (float32): Required if NDVI not present

Returns:

  • xarray.DataArray

    xr.DataArray: TVI values with attributes: - long_name: "TVI"

Note

Formula: TVI = sqrt(NDVI + 0.5)

If NDVI is already in the dataset, it will be reused to avoid recalculation.

References

Lemenkova, Polina. "Hyperspectral Vegetation Indices Calculated by Qgis Using Landsat Tm Image: a Case Study of Northern Iceland" Advanced Research in Life Sciences, vol. 4, no. 1, Sciendo, 2020, pp. 70-78. https://doi.org/10.2478/arls-2020-0021

Example
from darts_preprocessing import calculate_tvi

tvi = calculate_tvi(optical)
Source code in darts-preprocessing/src/darts_preprocessing/engineering/indices.py
@stopwatch("Calculating TVI", printer=logger.debug)
def calculate_tvi(optical: xr.Dataset) -> xr.DataArray:
    """Calculate TVI (Transformed Vegetation Index) from spectral bands.

    TVI applies a transformation to NDVI to enhance contrast and improve discrimination
    of vegetation conditions.

    Args:
        optical (xr.Dataset): Dataset containing:
            - ndvi (float32): NDVI values (will be calculated if not present)
            - nir, red (float32): Required if NDVI not present

    Returns:
        xr.DataArray: TVI values with attributes:
            - long_name: "TVI"

    Note:
        Formula: TVI = sqrt(NDVI + 0.5)

        If NDVI is already in the dataset, it will be reused to avoid recalculation.

    References:
        Lemenkova, Polina.
        "Hyperspectral Vegetation Indices Calculated by Qgis Using Landsat Tm Image: a Case Study of Northern Iceland"
        Advanced Research in Life Sciences, vol. 4, no. 1, Sciendo, 2020, pp. 70-78.
        https://doi.org/10.2478/arls-2020-0021

    Example:
        ```python
        from darts_preprocessing import calculate_tvi

        tvi = calculate_tvi(optical)
        ```

    """
    ndvi = optical["ndvi"] if "ndvi" in optical else calculate_ndvi(optical)
    tvi = np.sqrt(ndvi + 0.5)
    tvi = tvi.assign_attrs({"long_name": "TVI"}).rename("tvi")
    return tvi

calculate_vari

calculate_vari(optical: xarray.Dataset) -> xarray.DataArray

Calculate VARI (Visible Atmospherically Resistant Index) from spectral bands.

VARI uses only visible bands, designed to minimize atmospheric effects. Useful for RGB imagery without NIR band or for atmospheric correction validation.

Parameters:

  • optical (xarray.Dataset) –

    Dataset containing spectral bands: - green (float32): Green reflectance [0-1] - red (float32): Red reflectance [0-1] - blue (float32): Blue reflectance [0-1]

Returns:

  • xarray.DataArray

    xr.DataArray: VARI values with attributes: - long_name: "VARI"

Note

Formula: VARI = (Green - Red) / (Green + Red - Blue)

Input bands are clipped to [0, 1] to avoid numerical instabilities.

References

Eng, L.S., Ismail, R., Hashim, W., Baharum, A., 2019. The Use of VARI, GLI, and VIgreen Formulas in Detecting Vegetation In aerial Images. International Journal of Technology. Volume 10(7), pp. 1385-1394 https://doi.org/10.14716/ijtech.v10i7.3275

Example
from darts_preprocessing import calculate_vari

vari = calculate_vari(optical)
Source code in darts-preprocessing/src/darts_preprocessing/engineering/indices.py
@stopwatch("Calculating VARI", printer=logger.debug)
def calculate_vari(optical: xr.Dataset) -> xr.DataArray:
    """Calculate VARI (Visible Atmospherically Resistant Index) from spectral bands.

    VARI uses only visible bands, designed to minimize atmospheric effects. Useful for
    RGB imagery without NIR band or for atmospheric correction validation.

    Args:
        optical (xr.Dataset): Dataset containing spectral bands:
            - green (float32): Green reflectance [0-1]
            - red (float32): Red reflectance [0-1]
            - blue (float32): Blue reflectance [0-1]

    Returns:
        xr.DataArray: VARI values with attributes:
            - long_name: "VARI"

    Note:
        Formula: VARI = (Green - Red) / (Green + Red - Blue)

        Input bands are clipped to [0, 1] to avoid numerical instabilities.

    References:
        Eng, L.S., Ismail, R., Hashim, W., Baharum, A., 2019.
        The Use of VARI, GLI, and VIgreen Formulas in Detecting Vegetation In aerial Images.
        International Journal of Technology. Volume 10(7), pp. 1385-1394
        https://doi.org/10.14716/ijtech.v10i7.3275

    Example:
        ```python
        from darts_preprocessing import calculate_vari

        vari = calculate_vari(optical)
        ```

    """
    g = optical["green"].clip(0, 1)
    r = optical["red"].clip(0, 1)
    b = optical["blue"].clip(0, 1)
    vari = (g - r) / (g + r - b)
    vari = vari.assign_attrs({"long_name": "VARI"}).rename("vari")
    return vari

calculate_vdvi

calculate_vdvi(optical: xarray.Dataset) -> xarray.DataArray

Alias for GLI (Green Leaf Index) from an xarray Dataset containing spectral bands.

Source code in darts-preprocessing/src/darts_preprocessing/engineering/indices.py
def calculate_vdvi(optical: xr.Dataset) -> xr.DataArray:
    """Alias for GLI (Green Leaf Index) from an xarray Dataset containing spectral bands."""  # noqa: DOC201
    logger.warning("VDVI is an alias for GLI, using GLI calculation.")
    return calculate_gli(optical)

calculate_vector_ruggedness_measure

calculate_vector_ruggedness_measure(
    arcticdem_ds: xarray.Dataset, neighborhood_size: int
) -> xarray.Dataset

Calculate the Vector Ruggedness Measure (VRM) from an ArcticDEM Dataset.

VRM quantifies terrain ruggedness using vector analysis of slope and aspect, providing a measure independent of absolute elevation. Values range from 0 (smooth) to 1 (rugged).

Parameters:

  • arcticdem_ds (xarray.Dataset) –

    Dataset containing: - dem (float32): Digital Elevation Model - slope (float32): Slope in degrees (will be calculated if not present) - aspect (float32): Aspect in degrees (will be calculated if not present)

  • neighborhood_size (int) –

    Neighborhood window size for VRM calculation. Can be specified as string with units (e.g., "100m" or "10px").

Returns:

  • xarray.Dataset

    xr.Dataset: Input Dataset with new data variable added:

  • xarray.Dataset
    • vrm (float32): Vector Ruggedness Measure [0-1].

    • long_name: "Vector Ruggedness Measure"

    • description: Documents neighborhood size used
    • source: "ArcticDEM"
Note

VRM calculation:

  1. Converts slope and aspect to 3D unit vectors (x, y, z components)
  2. Sums vectors in the neighborhood window
  3. Calculates magnitude of resultant vector
  4. VRM = 1 - resultant magnitude

Flat areas (aspect = -1) are handled by setting aspect to 0.

Requires slope and aspect to be already calculated on the dataset.

References

Sappington, J.M., K.M. Longshore, and D.B. Thomson. 2007. Quantifying Landscape Ruggedness for Animal Habitat Analysis: A case Study Using Bighorn Sheep in the Mojave Desert. Journal of Wildlife Management. 71(5): 1419-1426.

Example
from darts_preprocessing import (
    calculate_slope, calculate_aspect,
    calculate_vector_ruggedness_measure
)

# VRM requires slope and aspect
arcticdem = calculate_slope(arcticdem)
arcticdem = calculate_aspect(arcticdem)
arcticdem_with_vrm = calculate_vector_ruggedness_measure(
    arcticdem_ds=arcticdem,
    neighborhood_size=100
)
Source code in darts-preprocessing/src/darts_preprocessing/engineering/arcticdem.py
@stopwatch("Calculating Vector Ruggedness Measure", printer=logger.debug)
def calculate_vector_ruggedness_measure(arcticdem_ds: xr.Dataset, neighborhood_size: int) -> xr.Dataset:
    """Calculate the Vector Ruggedness Measure (VRM) from an ArcticDEM Dataset.

    VRM quantifies terrain ruggedness using vector analysis of slope and aspect, providing
    a measure independent of absolute elevation. Values range from 0 (smooth) to 1 (rugged).

    Args:
        arcticdem_ds (xr.Dataset): Dataset containing:
            - dem (float32): Digital Elevation Model
            - slope (float32): Slope in degrees (will be calculated if not present)
            - aspect (float32): Aspect in degrees (will be calculated if not present)
        neighborhood_size (int): Neighborhood window size for VRM calculation.
            Can be specified as string with units (e.g., "100m" or "10px").

    Returns:
        xr.Dataset: Input Dataset with new data variable added:

        - vrm (float32): Vector Ruggedness Measure [0-1].

            - long_name: "Vector Ruggedness Measure"
            - description: Documents neighborhood size used
            - source: "ArcticDEM"

    Note:
        VRM calculation:

        1. Converts slope and aspect to 3D unit vectors (x, y, z components)
        2. Sums vectors in the neighborhood window
        3. Calculates magnitude of resultant vector
        4. VRM = 1 - resultant magnitude

        Flat areas (aspect = -1) are handled by setting aspect to 0.

        Requires slope and aspect to be already calculated on the dataset.

    References:
        Sappington, J.M., K.M. Longshore, and D.B. Thomson. 2007.
        Quantifying Landscape Ruggedness for Animal Habitat Analysis: A case Study Using Bighorn Sheep
        in the Mojave Desert. Journal of Wildlife Management. 71(5): 1419-1426.

    Example:
        ```python
        from darts_preprocessing import (
            calculate_slope, calculate_aspect,
            calculate_vector_ruggedness_measure
        )

        # VRM requires slope and aspect
        arcticdem = calculate_slope(arcticdem)
        arcticdem = calculate_aspect(arcticdem)
        arcticdem_with_vrm = calculate_vector_ruggedness_measure(
            arcticdem_ds=arcticdem,
            neighborhood_size=100
        )
        ```

    """
    # Calculate slope and aspect
    slope_rad = arcticdem_ds.slope * (np.pi / 180)  # Convert to radians
    aspect_rad = arcticdem_ds.aspect * (np.pi / 180)  # Convert to radians

    # Calculate x, y, and z components of unit vectors
    xy = np.sin(slope_rad)
    z = np.cos(slope_rad)

    # Handle flat areas (where aspect = -1)
    if has_cuda_and_cupy() and arcticdem_ds.cupy.is_cupy:
        aspect_rad.variable._data = cp.where(aspect_rad.variable._data == -1, 0, aspect_rad.variable._data)
        # aspect_rad = aspect_rad.copy(data=aspect_rad_raw)
    else:
        aspect_rad = xr.where(aspect_rad == -1, 0, aspect_rad)
    x = np.sin(aspect_rad) * xy
    y = np.cos(aspect_rad) * xy

    # Get neighborhood_size in pixels
    neighborhood_size: Distance = Distance.parse(neighborhood_size, arcticdem_ds.odc.geobox.resolution.x)
    # Create convolution kernel for focal sum
    kernel = np.ones((neighborhood_size.pixel, neighborhood_size.pixel), dtype=float) / neighborhood_size.pixel**2
    kernel = convolution.custom_kernel(kernel)

    # Change dtype of kernel to float32 since we don't need the precision and f32 is faster
    kernel = kernel.astype("float32")

    if has_cuda_and_cupy() and arcticdem_ds.cupy.is_cupy:
        kernel = cp.asarray(kernel)

    logger.debug(f"Calculating Vector Ruggedness Measure with square kernel of size {neighborhood_size} cells.")

    # Calculate sums of x, y, and z components in the neighborhood
    x_sum = convolution.convolution_2d(x, kernel)
    y_sum = convolution.convolution_2d(y, kernel)
    z_sum = convolution.convolution_2d(z, kernel)

    # Calculate the resultant vector magnitude
    vrm = 1 - np.sqrt(x_sum**2 + y_sum**2 + z_sum**2)

    vrm.attrs = {
        "long_name": "Vector Ruggedness Measure",
        "units": "",
        "description": (
            f"Vector ruggedness measure calculated using a {neighborhood_size} neighborhood. "
            "Values range from 0 (smooth) to 1 (most rugged)."
        ),
        "source": "ArcticDEM",
    }

    arcticdem_ds["vrm"] = vrm.compute()
    return arcticdem_ds

calculate_vigreen

calculate_vigreen(
    optical: xarray.Dataset,
) -> xarray.DataArray

Alias for VIGREEN (Vegetation Index Green) from an xarray Dataset containing spectral bands.

Source code in darts-preprocessing/src/darts_preprocessing/engineering/indices.py
def calculate_vigreen(optical: xr.Dataset) -> xr.DataArray:
    """Alias for VIGREEN (Vegetation Index Green) from an xarray Dataset containing spectral bands."""  # noqa: DOC201
    logger.warning("VIGREEN is an alias for GRVI, using GRVI calculation.")
    return calculate_grvi(optical)

preprocess_legacy_fast

preprocess_legacy_fast(
    ds_optical: xarray.Dataset,
    ds_arcticdem: xarray.Dataset,
    ds_tcvis: xarray.Dataset,
    tpi_outer_radius: int = 100,
    tpi_inner_radius: int = 0,
    device: typing.Literal["cuda", "cpu"]
    | int = darts_utils.cuda.DEFAULT_DEVICE,
) -> xarray.Dataset

Preprocess optical data with legacy (DARTS v1) preprocessing steps, but with new data concepts.

The processing steps are: - Calculate NDVI - Calculate slope and relative elevation from ArcticDEM - Merge everything into a single ds.

The main difference to preprocess_legacy is the new data concept of the arcticdem. Instead of using already preprocessed arcticdem data which are loaded from a VRT, this step expects the raw arcticdem data and calculates slope and relative elevation on the fly.

Parameters:

  • ds_optical (xarray.Dataset) –

    The Planet scene optical data or Sentinel 2 scene optical dataset including data_masks.

  • ds_arcticdem (xarray.Dataset) –

    The ArcticDEM dataset.

  • ds_tcvis (xarray.Dataset) –

    The TCVIS dataset.

  • tpi_outer_radius (int, default: 100 ) –

    The outer radius of the annulus kernel for the tpi calculation in m. Defaults to 100m.

  • tpi_inner_radius (int, default: 0 ) –

    The inner radius of the annulus kernel for the tpi calculation in m. Defaults to 0.

  • device (typing.Literal['cuda', 'cpu'] | int, default: darts_utils.cuda.DEFAULT_DEVICE ) –

    The device to run the tpi and slope calculations on. If "cuda" take the first device (0), if int take the specified device. Defaults to "cuda" if cuda is available, else "cpu".

Returns:

Source code in darts-preprocessing/src/darts_preprocessing/legacy.py
@stopwatch("Preprocessing", printer=logger.debug)
def preprocess_legacy_fast(
    ds_optical: xr.Dataset,
    ds_arcticdem: xr.Dataset,
    ds_tcvis: xr.Dataset,
    tpi_outer_radius: int = 100,
    tpi_inner_radius: int = 0,
    device: Literal["cuda", "cpu"] | int = DEFAULT_DEVICE,
) -> xr.Dataset:
    """Preprocess optical data with legacy (DARTS v1) preprocessing steps, but with new data concepts.

    The processing steps are:
    - Calculate NDVI
    - Calculate slope and relative elevation from ArcticDEM
    - Merge everything into a single ds.

    The main difference to preprocess_legacy is the new data concept of the arcticdem.
    Instead of using already preprocessed arcticdem data which are loaded from a VRT, this step expects the raw
    arcticdem data and calculates slope and relative elevation on the fly.

    Args:
        ds_optical (xr.Dataset): The Planet scene optical data or Sentinel 2 scene optical dataset including data_masks.
        ds_arcticdem (xr.Dataset): The ArcticDEM dataset.
        ds_tcvis (xr.Dataset): The TCVIS dataset.
        tpi_outer_radius (int, optional): The outer radius of the annulus kernel for the tpi calculation
            in m. Defaults to 100m.
        tpi_inner_radius (int, optional): The inner radius of the annulus kernel for the tpi calculation
            in m. Defaults to 0.
        device (Literal["cuda", "cpu"] | int, optional): The device to run the tpi and slope calculations on.
            If "cuda" take the first device (0), if int take the specified device.
            Defaults to "cuda" if cuda is available, else "cpu".

    Returns:
        xr.Dataset: The preprocessed dataset.

    """
    # Move to GPU for faster calculations
    ds_optical = move_to_device(ds_optical, device)
    # Calculate NDVI
    ds_optical["ndvi"] = calculate_ndvi(ds_optical)
    ds_optical = move_to_host(ds_optical)

    # Reproject TCVIS to optical data
    with stopwatch("Reprojecting TCVIS", printer=logger.debug):
        ds_tcvis = ds_tcvis.odc.reproject(ds_optical.odc.geobox, resampling="cubic")

    ds_optical["tc_brightness"] = ds_tcvis.tc_brightness
    ds_optical["tc_greenness"] = ds_tcvis.tc_greenness
    ds_optical["tc_wetness"] = ds_tcvis.tc_wetness

    # Calculate TPI and slope from ArcticDEM
    with stopwatch("Reprojecting ArcticDEM", printer=logger.debug):
        ds_arcticdem = ds_arcticdem.odc.reproject(ds_optical.odc.geobox.buffered(tpi_outer_radius), resampling="cubic")
    # Move to same device as optical
    ds_arcticdem = move_to_device(ds_arcticdem, device)
    ds_arcticdem = preprocess_legacy_arcticdem_fast(ds_arcticdem, tpi_outer_radius, tpi_inner_radius)
    ds_arcticdem = move_to_host(ds_arcticdem)

    ds_arcticdem = ds_arcticdem.odc.crop(ds_optical.odc.geobox.extent)
    # For some reason, we need to reindex, because the reproject + crop of the arcticdem sometimes results
    # in floating point errors. These error are at the order of 1e-10, hence, way below millimeter precision.
    ds_arcticdem = ds_arcticdem.reindex_like(ds_optical)

    ds_optical["dem"] = ds_arcticdem.dem
    ds_optical["relative_elevation"] = ds_arcticdem.tpi
    ds_optical["slope"] = ds_arcticdem.slope
    ds_optical["arcticdem_data_mask"] = ds_arcticdem.arcticdem_data_mask

    return ds_optical

preprocess_v2

preprocess_v2(
    ds_optical: xarray.Dataset,
    ds_arcticdem: xarray.Dataset | None,
    ds_tcvis: xarray.Dataset | None,
    tpi_outer_radius: int = 100,
    tpi_inner_radius: int = 0,
    device: typing.Literal["cuda", "cpu"]
    | int = darts_utils.cuda.DEFAULT_DEVICE,
) -> xarray.Dataset

Preprocess optical data with modern (DARTS v2) preprocessing steps.

This function combines optical imagery with terrain (ArcticDEM) and temporal vegetation indices (TCVIS) to create a multi-source feature dataset for segmentation. All auxiliary data sources are reprojected and cropped to match the optical data's extent and resolution.

Processing steps
  1. Calculate NDVI from optical bands
  2. If TCVIS provided: Reproject and merge Tasseled Cap trends
  3. If ArcticDEM provided: Calculate terrain features (TPI, slope, hillshade, aspect, curvature) using solar geometry from optical data attributes

Parameters:

  • ds_optical (xarray.Dataset) –

    Optical imagery dataset (PlanetScope or Sentinel-2) containing: - Required variables: blue, green, red, nir (float32, reflectance values) - Required variables: quality_data_mask, valid_data_mask (uint8) - Required attributes: azimuth (float), elevation (float) for hillshade calculation

  • ds_arcticdem (xarray.Dataset | None) –

    ArcticDEM dataset containing 'dem' (float32) and 'arcticdem_data_mask' (uint8). If None, terrain features are skipped.

  • ds_tcvis (xarray.Dataset | None) –

    TCVIS dataset containing tc_brightness, tc_greenness, tc_wetness (float). If None, TCVIS features are skipped.

  • tpi_outer_radius (int, default: 100 ) –

    Outer radius for TPI calculation in meters. Defaults to 100m.

  • tpi_inner_radius (int, default: 0 ) –

    Inner radius for TPI annulus kernel in meters. Set to 0 for circular kernel. Defaults to 0.

  • device (typing.Literal['cuda', 'cpu'] | int, default: darts_utils.cuda.DEFAULT_DEVICE ) –

    Device for GPU-accelerated computations (NDVI, TPI, slope). Use "cuda" for first GPU, int for specific GPU, or "cpu". Defaults to "cuda" if available, else "cpu".

Returns:

  • xarray.Dataset

    xr.Dataset: Preprocessed dataset with all input optical variables plus:

  • xarray.Dataset

    Added from optical processing: - ndvi (float32): Normalized Difference Vegetation Index Attributes: long_name="NDVI"

  • xarray.Dataset

    Added from TCVIS (if ds_tcvis provided): - tc_brightness (float): Tasseled Cap brightness trend - tc_greenness (float): Tasseled Cap greenness trend - tc_wetness (float): Tasseled Cap wetness trend

  • xarray.Dataset

    Added from ArcticDEM (if ds_arcticdem provided): - dem (float32): Elevation in meters - relative_elevation (float32): Topographic Position Index (TPI) Attributes: long_name="Topographic Position Index (TPI)" - slope (float32): Slope in degrees [0-90] Attributes: long_name="Slope" - hillshade (uint8): Hillshade values [0-255] Attributes: long_name="Hillshade" - aspect (float32): Aspect in degrees [0-360] Attributes: long_name="Aspect" - curvature (float32): Surface curvature Attributes: long_name="Curvature" - arcticdem_data_mask (uint8): DEM validity mask

Note

Attribute usage: - azimuth attribute from ds_optical: Used for hillshade calculation (solar azimuth angle). Falls back to 225° if missing or invalid. - elevation attribute from ds_optical: Used for hillshade calculation (solar elevation angle). Falls back to 25° if missing or invalid.

Processing behavior: - If both ds_tcvis and ds_arcticdem are None, only NDVI is calculated. - ArcticDEM is buffered by tpi_outer_radius before reprojection to avoid edge effects, then cropped back to optical extent after terrain feature calculation. - Reprojection uses cubic resampling for smooth terrain features. - GPU acceleration (if device="cuda") significantly speeds up TPI and slope calculations.

Example

Complete preprocessing with all data sources:

from darts_preprocessing import preprocess_v2
from darts_acquisition import load_cdse_s2_sr_scene, load_arcticdem, load_tcvis

# Load optical data
optical = load_cdse_s2_sr_scene(s2_scene_id, ...)

# Load auxiliary data
arcticdem = load_arcticdem(optical.odc.geobox, ...)
tcvis = load_tcvis(optical.odc.geobox, ...)

# Preprocess
preprocessed = preprocess_v2(
    ds_optical=optical,
    ds_arcticdem=arcticdem,
    ds_tcvis=tcvis,
    tpi_outer_radius=100,
    tpi_inner_radius=0,
    device="cuda"
)

# Result contains: blue, green, red, nir, ndvi, tc_brightness, tc_greenness,
# tc_wetness, dem, relative_elevation, slope, hillshade, aspect, curvature
Source code in darts-preprocessing/src/darts_preprocessing/v2.py
@stopwatch("Preprocessing", printer=logger.debug)
def preprocess_v2(
    ds_optical: xr.Dataset,
    ds_arcticdem: xr.Dataset | None,
    ds_tcvis: xr.Dataset | None,
    tpi_outer_radius: int = 100,
    tpi_inner_radius: int = 0,
    device: Literal["cuda", "cpu"] | int = DEFAULT_DEVICE,
) -> xr.Dataset:
    """Preprocess optical data with modern (DARTS v2) preprocessing steps.

    This function combines optical imagery with terrain (ArcticDEM) and temporal vegetation
    indices (TCVIS) to create a multi-source feature dataset for segmentation. All auxiliary
    data sources are reprojected and cropped to match the optical data's extent and resolution.

    Processing steps:
        1. Calculate NDVI from optical bands
        2. If TCVIS provided: Reproject and merge Tasseled Cap trends
        3. If ArcticDEM provided: Calculate terrain features (TPI, slope, hillshade, aspect, curvature)
           using solar geometry from optical data attributes

    Args:
        ds_optical (xr.Dataset): Optical imagery dataset (PlanetScope or Sentinel-2) containing:
            - Required variables: blue, green, red, nir (float32, reflectance values)
            - Required variables: quality_data_mask, valid_data_mask (uint8)
            - Required attributes: azimuth (float), elevation (float) for hillshade calculation
        ds_arcticdem (xr.Dataset | None): ArcticDEM dataset containing 'dem' (float32) and
            'arcticdem_data_mask' (uint8). If None, terrain features are skipped.
        ds_tcvis (xr.Dataset | None): TCVIS dataset containing tc_brightness, tc_greenness,
            tc_wetness (float). If None, TCVIS features are skipped.
        tpi_outer_radius (int, optional): Outer radius for TPI calculation in meters.
            Defaults to 100m.
        tpi_inner_radius (int, optional): Inner radius for TPI annulus kernel in meters.
            Set to 0 for circular kernel. Defaults to 0.
        device (Literal["cuda", "cpu"] | int, optional): Device for GPU-accelerated computations
            (NDVI, TPI, slope). Use "cuda" for first GPU, int for specific GPU, or "cpu".
            Defaults to "cuda" if available, else "cpu".

    Returns:
        xr.Dataset: Preprocessed dataset with all input optical variables plus:

        Added from optical processing:
            - ndvi (float32): Normalized Difference Vegetation Index
              Attributes: long_name="NDVI"

        Added from TCVIS (if ds_tcvis provided):
            - tc_brightness (float): Tasseled Cap brightness trend
            - tc_greenness (float): Tasseled Cap greenness trend
            - tc_wetness (float): Tasseled Cap wetness trend

        Added from ArcticDEM (if ds_arcticdem provided):
            - dem (float32): Elevation in meters
            - relative_elevation (float32): Topographic Position Index (TPI)
              Attributes: long_name="Topographic Position Index (TPI)"
            - slope (float32): Slope in degrees [0-90]
              Attributes: long_name="Slope"
            - hillshade (uint8): Hillshade values [0-255]
              Attributes: long_name="Hillshade"
            - aspect (float32): Aspect in degrees [0-360]
              Attributes: long_name="Aspect"
            - curvature (float32): Surface curvature
              Attributes: long_name="Curvature"
            - arcticdem_data_mask (uint8): DEM validity mask

    Note:
        Attribute usage:
        - `azimuth` attribute from ds_optical: Used for hillshade calculation (solar azimuth angle).
          Falls back to 225° if missing or invalid.
        - `elevation` attribute from ds_optical: Used for hillshade calculation (solar elevation angle).
          Falls back to 25° if missing or invalid.

        Processing behavior:
        - If both ds_tcvis and ds_arcticdem are None, only NDVI is calculated.
        - ArcticDEM is buffered by tpi_outer_radius before reprojection to avoid edge effects,
          then cropped back to optical extent after terrain feature calculation.
        - Reprojection uses cubic resampling for smooth terrain features.
        - GPU acceleration (if device="cuda") significantly speeds up TPI and slope calculations.

    Example:
        Complete preprocessing with all data sources:

        ```python
        from darts_preprocessing import preprocess_v2
        from darts_acquisition import load_cdse_s2_sr_scene, load_arcticdem, load_tcvis

        # Load optical data
        optical = load_cdse_s2_sr_scene(s2_scene_id, ...)

        # Load auxiliary data
        arcticdem = load_arcticdem(optical.odc.geobox, ...)
        tcvis = load_tcvis(optical.odc.geobox, ...)

        # Preprocess
        preprocessed = preprocess_v2(
            ds_optical=optical,
            ds_arcticdem=arcticdem,
            ds_tcvis=tcvis,
            tpi_outer_radius=100,
            tpi_inner_radius=0,
            device="cuda"
        )

        # Result contains: blue, green, red, nir, ndvi, tc_brightness, tc_greenness,
        # tc_wetness, dem, relative_elevation, slope, hillshade, aspect, curvature
        ```

    """
    # Move to GPU for faster calculations
    ds_optical = move_to_device(ds_optical, device)
    # Calculate NDVI
    ds_optical["ndvi"] = calculate_ndvi(ds_optical)
    ds_optical = move_to_host(ds_optical)

    if ds_tcvis is None and ds_arcticdem is None:
        logger.debug("No auxiliary data provided. Only NDVI was calculated.")
        return ds_optical

    if ds_tcvis is not None:
        # Reproject TCVIS to optical data
        with stopwatch("Reprojecting TCVIS", printer=logger.debug):
            # *: Reprojecting this way will not alter the datatype of the data!
            # Should be uint8 before and after reprojection.
            ds_tcvis = ds_tcvis.odc.reproject(ds_optical.odc.geobox, resampling="cubic")

        # !: Reprojecting with f64 coordinates and values behind the decimal point can result in a coordinate missmatch:
        # E.g. ds_optical has x coordinates [2.123, ...] then is can happen that the
        # reprojected ds_tcvis coordinates are [2.12300001, ...]
        # This results is all-nan assigments later when adding the variables of the reprojected dataset to the original
        assert (ds_optical.x == ds_tcvis.x).all(), "x coordinates do not match! See code comment above"
        assert (ds_optical.y == ds_tcvis.y).all(), "y coordinates do not match! See code comment above"

        # ?: Do ds_tcvis and ds_optical now share the same memory on the GPU?
        #  or do I need to delete ds_tcvis to free memory?
        # Same question for ArcticDEM
        ds_optical["tc_brightness"] = ds_tcvis.tc_brightness
        ds_optical["tc_greenness"] = ds_tcvis.tc_greenness
        ds_optical["tc_wetness"] = ds_tcvis.tc_wetness

    if ds_arcticdem is not None:
        # Calculate TPI and slope from ArcticDEM
        with stopwatch("Reprojecting ArcticDEM", printer=logger.debug):
            ds_arcticdem = ds_arcticdem.odc.reproject(
                ds_optical.odc.geobox.buffered(tpi_outer_radius), resampling="cubic"
            )
        # Move to same device as optical
        ds_arcticdem = move_to_device(ds_arcticdem, device)

        assert (ds_optical.x == ds_arcticdem.x).all(), "x coordinates do not match! See code comment above"
        assert (ds_optical.y == ds_arcticdem.y).all(), "y coordinates do not match! See code comment above"

        azimuth, angle_altitude = get_azimuth_and_elevation(ds_optical)
        ds_arcticdem = preprocess_arcticdem(
            ds_arcticdem,
            tpi_outer_radius,
            tpi_inner_radius,
            azimuth,
            angle_altitude,
        )
        ds_arcticdem = move_to_host(ds_arcticdem)

        # TODO: Check if crop can be done with apply_mask = False
        # -> Then the type conversion of the arcticdem data mask would not be necessary anymore
        # -> And this would also allow to keep the data on the GPU
        ds_arcticdem = ds_arcticdem.odc.crop(ds_optical.odc.geobox.extent)
        # For some reason, we need to reindex, because the reproject + crop of the arcticdem sometimes results
        # in floating point errors. These error are at the order of 1e-10, hence, way below millimeter precision.
        ds_arcticdem["x"] = ds_optical.x
        ds_arcticdem["y"] = ds_optical.y

        ds_optical["dem"] = ds_arcticdem.dem
        ds_optical["relative_elevation"] = ds_arcticdem.tpi
        ds_optical["slope"] = ds_arcticdem.slope
        ds_optical["hillshade"] = ds_arcticdem.hillshade
        ds_optical["aspect"] = ds_arcticdem.aspect
        ds_optical["curvature"] = ds_arcticdem.curvature
        ds_optical["arcticdem_data_mask"] = ds_arcticdem.arcticdem_data_mask.astype("uint8")

    return ds_optical