Skip to content

darts_preprocessing

Data preprocessing and feature engineering for the DARTS dataset.

Functions:

Attributes:

__version__ module-attribute

__version__ = importlib.metadata.version('darts-nextgen')

calculate_ndvi

calculate_ndvi(
    planet_scene_dataset: xarray.Dataset,
    nir_band: str = "nir",
    red_band: str = "red",
) -> xarray.Dataset

Calculate NDVI from an xarray Dataset containing spectral bands.

Example
ndvi_data = calculate_ndvi(planet_scene_dataset)

Parameters:

  • planet_scene_dataset (xarray.Dataset) –

    The xarray Dataset containing the spectral bands, where the bands are indexed along a dimension (e.g., 'band'). The Dataset should have dimensions including 'band', 'y', and 'x'.

  • nir_band (str, default: 'nir' ) –

    The name of the NIR band in the Dataset (default is "nir"). This name should correspond to the variable name for the NIR band in the 'band' dimension. Defaults to "nir".

  • red_band (str, default: 'red' ) –

    The name of the Red band in the Dataset (default is "red"). This name should correspond to the variable name for the Red band in the 'band' dimension. Defaults to "red".

Returns:

  • xarray.Dataset

    xr.Dataset: A new Dataset containing the calculated NDVI values. The resulting Dataset will have dimensions (band: 1, y: ..., x: ...) and will be named "ndvi".

Notes

NDVI (Normalized Difference Vegetation Index) is calculated using the formula: NDVI = (NIR - Red) / (NIR + Red)

This index is commonly used in remote sensing to assess vegetation health and density.

Source code in darts-preprocessing/src/darts_preprocessing/engineering/indices.py
@stopuhr.funkuhr("Calculating NDVI", printer=logger.debug, print_kwargs=["nir_band", "red_band"])
def calculate_ndvi(planet_scene_dataset: xr.Dataset, nir_band: str = "nir", red_band: str = "red") -> xr.Dataset:
    """Calculate NDVI from an xarray Dataset containing spectral bands.

    Example:
        ```python
        ndvi_data = calculate_ndvi(planet_scene_dataset)
        ```

    Args:
        planet_scene_dataset (xr.Dataset): The xarray Dataset containing the spectral bands, where the bands are indexed
            along a dimension (e.g., 'band'). The Dataset should have dimensions including 'band', 'y', and 'x'.
        nir_band (str, optional): The name of the NIR band in the Dataset (default is "nir"). This name should
            correspond to the variable name for the NIR band in the 'band' dimension. Defaults to "nir".
        red_band (str, optional): The name of the Red band in the Dataset (default is "red"). This name should
            correspond to the variable name for the Red band in the 'band' dimension. Defaults to "red".

    Returns:
        xr.Dataset: A new Dataset containing the calculated NDVI values. The resulting Dataset will have
            dimensions (band: 1, y: ..., x: ...) and will be named "ndvi".


    Notes:
        NDVI (Normalized Difference Vegetation Index) is calculated using the formula:
            NDVI = (NIR - Red) / (NIR + Red)

        This index is commonly used in remote sensing to assess vegetation health and density.

    """
    # Calculate NDVI using the formula
    nir = planet_scene_dataset[nir_band].astype("float32")
    r = planet_scene_dataset[red_band].astype("float32")
    ndvi = (nir - r) / (nir + r)

    # Scale to 0 - 20000 (for later conversion to uint16)
    ndvi = (ndvi.clip(-1, 1) + 1) * 1e4
    # Make nan to 0
    ndvi = ndvi.fillna(0).rio.write_nodata(0)
    # Convert to uint16
    ndvi = ndvi.astype("uint16")

    ndvi = ndvi.assign_attrs({"data_source": "planet", "long_name": "NDVI"}).to_dataset(name="ndvi")
    return ndvi

calculate_slope

calculate_slope(
    arcticdem_ds: xarray.Dataset,
) -> xarray.Dataset

Calculate the slope of the terrain surface from an ArcticDEM Dataset.

Parameters:

  • arcticdem_ds (xarray.Dataset) –

    The ArcticDEM Dataset containing the 'dem' variable.

Returns:

  • xarray.Dataset

    xr.Dataset: The input Dataset with the calculated slope added as a new variable 'slope'.

Source code in darts-preprocessing/src/darts_preprocessing/engineering/arcticdem.py
@stopuhr.funkuhr("Calculating slope", printer=logger.debug)
def calculate_slope(arcticdem_ds: xr.Dataset) -> xr.Dataset:
    """Calculate the slope of the terrain surface from an ArcticDEM Dataset.

    Args:
        arcticdem_ds (xr.Dataset): The ArcticDEM Dataset containing the 'dem' variable.

    Returns:
        xr.Dataset: The input Dataset with the calculated slope added as a new variable 'slope'.

    """
    slope_deg = slope(arcticdem_ds.dem)
    slope_deg.attrs = {
        "long_name": "Slope",
        "units": "degrees",
        "description": "The slope of the terrain surface in degrees.",
        "source": "ArcticDEM",
        "_FillValue": float("nan"),
    }
    arcticdem_ds["slope"] = slope_deg.compute()
    return arcticdem_ds

calculate_topographic_position_index

calculate_topographic_position_index(
    arcticdem_ds: xarray.Dataset,
    outer_radius: int,
    inner_radius: int,
) -> xarray.Dataset

Calculate the Topographic Position Index (TPI) from an ArcticDEM Dataset.

Parameters:

  • arcticdem_ds (xarray.Dataset) –

    The ArcticDEM Dataset containing the 'dem' variable.

  • outer_radius (int) –

    The outer radius of the annulus kernel in m.

  • inner_radius (int) –

    The inner radius of the annulus kernel in m.

Returns:

  • xarray.Dataset

    xr.Dataset: The input Dataset with the calculated TPI added as a new variable 'tpi'.

Source code in darts-preprocessing/src/darts_preprocessing/engineering/arcticdem.py
@stopuhr.funkuhr("Calculating TPI", printer=logger.debug, print_kwargs=["outer_radius", "inner_radius"])
def calculate_topographic_position_index(arcticdem_ds: xr.Dataset, outer_radius: int, inner_radius: int) -> xr.Dataset:
    """Calculate the Topographic Position Index (TPI) from an ArcticDEM Dataset.

    Args:
        arcticdem_ds (xr.Dataset): The ArcticDEM Dataset containing the 'dem' variable.
        outer_radius (int, optional): The outer radius of the annulus kernel in m.
        inner_radius (int, optional): The inner radius of the annulus kernel in m.

    Returns:
        xr.Dataset: The input Dataset with the calculated TPI added as a new variable 'tpi'.

    """
    cellsize_x, cellsize_y = convolution.calc_cellsize(arcticdem_ds.dem)  # Should be equal to the resolution of the DEM
    # Use an annulus kernel if inner_radius is greater than 0
    outer_radius_m = f"{outer_radius}m"
    outer_radius_px = f"{ceil(outer_radius / cellsize_x)}px"
    if inner_radius > 0:
        inner_radius_m = f"{inner_radius}m"
        inner_radius_px = f"{ceil(inner_radius / cellsize_x)}px"
        kernel = convolution.annulus_kernel(cellsize_x, cellsize_y, outer_radius_m, inner_radius_m)
        attr_cell_description = (
            f"within a ring at a distance of {inner_radius_px}-{outer_radius_px} cells "
            f"({inner_radius_m}-{outer_radius_m}) away from the focal cell."
        )
        logger.debug(
            f"Calculating Topographic Position Index with annulus kernel of "
            f"{inner_radius_px}-{outer_radius_px} ({inner_radius_m}-{outer_radius_m}) cells."
        )
    else:
        kernel = convolution.circle_kernel(cellsize_x, cellsize_y, outer_radius_m)
        attr_cell_description = (
            f"within a circle at a distance of {outer_radius_px} cells ({outer_radius_m}) away from the focal cell."
        )
        logger.debug(
            f"Calculating Topographic Position Index with circle kernel of {outer_radius_px} ({outer_radius_m}) cells."
        )

    if has_cuda_and_cupy() and arcticdem_ds.cupy.is_cupy:
        kernel = cp.asarray(kernel)

    tpi = arcticdem_ds.dem - convolution.convolution_2d(arcticdem_ds.dem, kernel) / kernel.sum()
    tpi.attrs = {
        "long_name": "Topographic Position Index",
        "units": "m",
        "description": "The difference between the elevation of a cell and the mean elevation of the surrounding"
        f"cells {attr_cell_description}",
        "source": "ArcticDEM",
        "_FillValue": float("nan"),
    }

    arcticdem_ds["tpi"] = tpi.compute()

    return arcticdem_ds

preprocess_legacy_fast

preprocess_legacy_fast(
    ds_merged: xarray.Dataset,
    ds_arcticdem: xarray.Dataset,
    ds_tcvis: xarray.Dataset,
    tpi_outer_radius: int = 100,
    tpi_inner_radius: int = 0,
    device: typing.Literal["cuda", "cpu"]
    | int = darts_preprocessing.preprocess.DEFAULT_DEVICE,
) -> xarray.Dataset

Preprocess optical data with legacy (DARTS v1) preprocessing steps, but with new data concepts.

The processing steps are: - Calculate NDVI - Calculate slope and relative elevation from ArcticDEM - Merge everything into a single ds.

The main difference to preprocess_legacy is the new data concept of the arcticdem. Instead of using already preprocessed arcticdem data which are loaded from a VRT, this step expects the raw arcticdem data and calculates slope and relative elevation on the fly.

Parameters:

  • ds_merged (xarray.Dataset) –

    The Planet scene optical data or Sentinel 2 scene optical dataset including data_masks.

  • ds_arcticdem (xarray.Dataset) –

    The ArcticDEM dataset.

  • ds_tcvis (xarray.Dataset) –

    The TCVIS dataset.

  • tpi_outer_radius (int, default: 100 ) –

    The outer radius of the annulus kernel for the tpi calculation in m. Defaults to 100m.

  • tpi_inner_radius (int, default: 0 ) –

    The inner radius of the annulus kernel for the tpi calculation in m. Defaults to 0.

  • device (typing.Literal['cuda', 'cpu'] | int, default: darts_preprocessing.preprocess.DEFAULT_DEVICE ) –

    The device to run the tpi and slope calculations on. If "cuda" take the first device (0), if int take the specified device. Defaults to "cuda" if cuda is available, else "cpu".

Returns:

Source code in darts-preprocessing/src/darts_preprocessing/preprocess.py
@stopuhr.funkuhr("Preprocessing", printer=logger.debug)
def preprocess_legacy_fast(
    ds_merged: xr.Dataset,
    ds_arcticdem: xr.Dataset,
    ds_tcvis: xr.Dataset,
    tpi_outer_radius: int = 100,
    tpi_inner_radius: int = 0,
    device: Literal["cuda", "cpu"] | int = DEFAULT_DEVICE,
) -> xr.Dataset:
    """Preprocess optical data with legacy (DARTS v1) preprocessing steps, but with new data concepts.

    The processing steps are:
    - Calculate NDVI
    - Calculate slope and relative elevation from ArcticDEM
    - Merge everything into a single ds.

    The main difference to preprocess_legacy is the new data concept of the arcticdem.
    Instead of using already preprocessed arcticdem data which are loaded from a VRT, this step expects the raw
    arcticdem data and calculates slope and relative elevation on the fly.

    Args:
        ds_merged (xr.Dataset): The Planet scene optical data or Sentinel 2 scene optical dataset including data_masks.
        ds_arcticdem (xr.Dataset): The ArcticDEM dataset.
        ds_tcvis (xr.Dataset): The TCVIS dataset.
        tpi_outer_radius (int, optional): The outer radius of the annulus kernel for the tpi calculation
            in m. Defaults to 100m.
        tpi_inner_radius (int, optional): The inner radius of the annulus kernel for the tpi calculation
            in m. Defaults to 0.
        device (Literal["cuda", "cpu"] | int, optional): The device to run the tpi and slope calculations on.
            If "cuda" take the first device (0), if int take the specified device.
            Defaults to "cuda" if cuda is available, else "cpu".

    Returns:
        xr.Dataset: The preprocessed dataset.

    """
    # Calculate NDVI
    ds_merged["ndvi"] = calculate_ndvi(ds_merged).ndvi

    # Reproject TCVIS to optical data
    with stopuhr.stopuhr("Reprojecting TCVIS", printer=logger.debug):
        ds_tcvis = ds_tcvis.odc.reproject(ds_merged.odc.geobox, resampling="cubic")

    ds_merged["tc_brightness"] = ds_tcvis.tc_brightness
    ds_merged["tc_greenness"] = ds_tcvis.tc_greenness
    ds_merged["tc_wetness"] = ds_tcvis.tc_wetness

    # Calculate TPI and slope from ArcticDEM
    with stopuhr.stopuhr("Reprojecting ArcticDEM", printer=logger.debug):
        ds_arcticdem = ds_arcticdem.odc.reproject(ds_merged.odc.geobox.buffered(tpi_outer_radius), resampling="cubic")

    ds_arcticdem = preprocess_legacy_arcticdem_fast(ds_arcticdem, tpi_outer_radius, tpi_inner_radius, device)
    ds_arcticdem = ds_arcticdem.odc.crop(ds_merged.odc.geobox.extent)
    # For some reason, we need to reindex, because the reproject + crop of the arcticdem sometimes results
    # in floating point errors. These error are at the order of 1e-10, hence, way below millimeter precision.
    ds_arcticdem = ds_arcticdem.reindex_like(ds_merged)

    ds_merged["dem"] = ds_arcticdem.dem
    ds_merged["relative_elevation"] = ds_arcticdem.tpi
    ds_merged["slope"] = ds_arcticdem.slope
    ds_merged["arcticdem_data_mask"] = ds_arcticdem.datamask

    # Update datamask with arcticdem mask
    # with xr.set_options(keep_attrs=True):
    #     ds_merged["quality_data_mask"] = ds_merged.quality_data_mask * ds_arcticdem.datamask
    # ds_merged.quality_data_mask.attrs["data_source"] += " + ArcticDEM"

    return ds_merged