Skip to content

darts_postprocessing.prepare_export

Prepare the export, e.g. binarizes the data and convert the float probabilities to uint8.

Parameters:

  • tile (xarray.Dataset) –

    Input tile from inference and / or an ensemble.

  • bin_threshold (float, default: 0.5 ) –

    The threshold to binarize the probabilities. Defaults to 0.5.

  • mask_erosion_size (int, default: 10 ) –

    The size of the disk to use for mask erosion and the edge-cropping. Defaults to 10.

  • min_object_size (int, default: 32 ) –

    The minimum object size to keep in pixel. Defaults to 32.

  • quality_level (int | str, default: 0 ) –

    The quality level to use for the mask. If a string maps to int. high_quality -> 2, low_quality=1, none=0 (apply no masking). Defaults to 0.

  • ensemble_subsets (list[str], default: [] ) –

    The ensemble subsets to use for the binarization. Defaults to [].

  • device (typing.Literal['cuda', 'cpu'] | int, default: darts_postprocessing.postprocess.DEFAULT_DEVICE ) –

    The device to use for dilation. Defaults to "cuda" if cuda for cucim is available, else "cpu".

Returns:

Source code in darts-postprocessing/src/darts_postprocessing/postprocess.py
@stopuhr.funkuhr(
    "Preparing export",
    printer=logger.debug,
    print_kwargs=["bin_threshold", "mask_erosion_size", "min_object_size", "quality_level", "ensemble_subsets"],
)
def prepare_export(
    tile: xr.Dataset,
    bin_threshold: float = 0.5,
    mask_erosion_size: int = 10,
    min_object_size: int = 32,
    quality_level: int | Literal["high_quality", "low_quality", "none"] = 0,
    ensemble_subsets: list[str] = [],
    device: Literal["cuda", "cpu"] | int = DEFAULT_DEVICE,
) -> xr.Dataset:
    """Prepare the export, e.g. binarizes the data and convert the float probabilities to uint8.

    Args:
        tile (xr.Dataset): Input tile from inference and / or an ensemble.
        bin_threshold (float, optional): The threshold to binarize the probabilities. Defaults to 0.5.
        mask_erosion_size (int, optional): The size of the disk to use for mask erosion and the edge-cropping.
            Defaults to 10.
        min_object_size (int, optional): The minimum object size to keep in pixel. Defaults to 32.
        quality_level (int | str, optional): The quality level to use for the mask. If a string maps to int.
            high_quality -> 2, low_quality=1, none=0 (apply no masking). Defaults to 0.
        ensemble_subsets (list[str], optional): The ensemble subsets to use for the binarization.
            Defaults to [].
        device (Literal["cuda", "cpu"] | int, optional): The device to use for dilation.
            Defaults to "cuda" if cuda for cucim is available, else "cpu".

    Returns:
        xr.Dataset: Output tile.

    """
    quality_level = (
        quality_level
        if isinstance(quality_level, int)
        else {"high_quality": 2, "low_quality": 1, "none": 0}[quality_level]
    )
    mask = tile["quality_data_mask"] >= quality_level
    if quality_level > 0:
        mask = erode_mask(mask, mask_erosion_size, device)  # 0=positive, 1=negative
    tile["extent"] = mask.copy()
    tile["extent"].attrs = {
        "long_name": "Extent of the segmentation",
    }

    def _prep_layer(tile, layername, binarized_layer_name):
        # Binarize the segmentation
        tile[binarized_layer_name] = binarize(tile[layername], bin_threshold, min_object_size, mask, device)
        tile[binarized_layer_name].attrs = {
            "long_name": "Binarized Segmentation",
        }

        # Convert the probabilities to uint8
        # Same but this time with 255 as no-data
        # But first check if this step was already run
        if tile[layername].max() > 1:
            return tile

        intprobs = (tile[layername] * 100).fillna(255).astype("uint8")
        tile[layername] = xr.where(mask, intprobs, 255)
        tile[layername].attrs = {
            "long_name": "Probabilities",
            "units": "%",
        }
        tile[layername] = tile[layername].rio.write_nodata(255)
        return tile

    tile = _prep_layer(tile, "probabilities", "binarized_segmentation")

    # get the names of the model probabilities if available
    # for example 'tcvis' from 'probabilities-tcvis'
    for ensemble_subset in ensemble_subsets:
        tile = _prep_layer(tile, f"probabilities-{ensemble_subset}", f"binarized_segmentation-{ensemble_subset}")

    return tile