Skip to content

darts_acquisition

Acquisition of data from various sources for the DARTS dataset.

__version__ module-attribute

__version__ = importlib.metadata.version('darts-nextgen')

download_admin_files

download_admin_files(admin_dir: pathlib.Path)

Download the admin files for the regions.

Files will be stored under [admin_dir]/adm1.shp and [admin_dir]/adm2.shp.

Parameters:

  • admin_dir (pathlib.Path) –

    The path to the admin files.

Source code in darts-acquisition/src/darts_acquisition/admin.py
@stopwatch.f("Downloading admin files", printer=logger.debug)
def download_admin_files(admin_dir: Path):
    """Download the admin files for the regions.

    Files will be stored under [admin_dir]/adm1.shp and [admin_dir]/adm2.shp.

    Args:
        admin_dir (Path): The path to the admin files.

    """
    # Download the admin files
    admin_1_url = "https://github.com/wmgeolab/geoBoundaries/raw/main/releaseData/CGAZ/geoBoundariesCGAZ_ADM1.zip"
    admin_2_url = "https://github.com/wmgeolab/geoBoundaries/raw/main/releaseData/CGAZ/geoBoundariesCGAZ_ADM2.zip"

    admin_dir.mkdir(exist_ok=True, parents=True)

    logger.debug(f"Downloading {admin_1_url} to {admin_dir.resolve()}")
    _download_zip(admin_1_url, admin_dir)

    logger.debug(f"Downloading {admin_2_url} to {admin_dir.resolve()}")
    _download_zip(admin_2_url, admin_dir)

download_arcticdem

download_arcticdem(
    aoi: geopandas.GeoDataFrame,
    data_dir: pathlib.Path | str,
    resolution: darts_acquisition.arcticdem.RESOLUTIONS,
) -> None

Download ArcticDEM data for the specified area of interest.

This function downloads ArcticDEM elevation tiles from AWS S3 for the given area of interest and stores them in a local icechunk data store for efficient access.

Parameters:

  • aoi (geopandas.GeoDataFrame) –

    Area of interest for which to download ArcticDEM data. Can be in any CRS; will be reprojected to EPSG:3413 (ArcticDEM's native CRS).

  • data_dir (pathlib.Path | str) –

    Path to the icechunk data directory (must have .icechunk suffix). Must contain the resolution in the name (e.g., "arcticdem_2m.icechunk").

  • resolution (typing.Literal[2, 10, 32]) –

    The resolution of the ArcticDEM data in meters. Must match the resolution indicated in the data_dir name.

Note

This function automatically configures AWS access with unsigned requests to the public ArcticDEM S3 bucket. No AWS credentials are required.

Example

Download ArcticDEM for a study area:

import geopandas as gpd
from shapely.geometry import box
from darts_acquisition import download_arcticdem

# Define area of interest
aoi = gpd.GeoDataFrame(
    geometry=[box(-50, 70, -49, 71)],
    crs="EPSG:4326"
)

# Download 2m resolution ArcticDEM
download_arcticdem(
    aoi=aoi,
    data_dir="/data/arcticdem_2m.icechunk",
    resolution=2
)
Source code in darts-acquisition/src/darts_acquisition/arcticdem.py
@stopwatch.f("Downloading ArcticDEM", printer=logger.debug, print_kwargs=["data_dir", "resolution"])
def download_arcticdem(
    aoi: gpd.GeoDataFrame,
    data_dir: Path | str,
    resolution: RESOLUTIONS,
) -> None:
    """Download ArcticDEM data for the specified area of interest.

    This function downloads ArcticDEM elevation tiles from AWS S3 for the given area
    of interest and stores them in a local icechunk data store for efficient access.

    Args:
        aoi (gpd.GeoDataFrame): Area of interest for which to download ArcticDEM data.
            Can be in any CRS; will be reprojected to EPSG:3413 (ArcticDEM's native CRS).
        data_dir (Path | str): Path to the icechunk data directory (must have .icechunk suffix).
            Must contain the resolution in the name (e.g., "arcticdem_2m.icechunk").
        resolution (Literal[2, 10, 32]): The resolution of the ArcticDEM data in meters.
            Must match the resolution indicated in the data_dir name.

    Note:
        This function automatically configures AWS access with unsigned requests to the
        public ArcticDEM S3 bucket. No AWS credentials are required.

    Example:
        Download ArcticDEM for a study area:

        ```python
        import geopandas as gpd
        from shapely.geometry import box
        from darts_acquisition import download_arcticdem

        # Define area of interest
        aoi = gpd.GeoDataFrame(
            geometry=[box(-50, 70, -49, 71)],
            crs="EPSG:4326"
        )

        # Download 2m resolution ArcticDEM
        download_arcticdem(
            aoi=aoi,
            data_dir="/data/arcticdem_2m.icechunk",
            resolution=2
        )
        ```

    """
    odc.stac.configure_rio(cloud_defaults=True, aws={"aws_unsigned": True})
    accessor = _validate_and_get_accessor(data_dir, resolution)
    accessor.download(aoi)

download_cdse_s2_sr_scene

download_cdse_s2_sr_scene(
    s2item: str | pystac.Item,
    store: pathlib.Path,
    bands_mapping: dict | typing.Literal["all"] = {
        "B02_10m": "blue",
        "B03_10m": "green",
        "B04_10m": "red",
        "B08_10m": "nir",
    },
    aws_profile_name: str = "default",
)

Download a Sentinel-2 scene from CDSE via STAC API and store it in the local data store.

This function downloads Sentinel-2 Level-2A surface reflectance data from the Copernicus Data Space Ecosystem (CDSE) and stores it locally in a compressed zarr store for efficient repeated access.

Parameters:

  • s2item (str | pystac.Item) –

    Sentinel-2 scene identifier (e.g., "S2A_MSIL2A_20230615T...") or a PySTAC Item object from a STAC search.

  • store (pathlib.Path) –

    Path to the local zarr store directory where the scene will be saved.

  • bands_mapping (dict | typing.Literal['all'], default: {'B02_10m': 'blue', 'B03_10m': 'green', 'B04_10m': 'red', 'B08_10m': 'nir'} ) –

    Mapping of Sentinel-2 band names to custom band names. Keys should be CDSE band names (e.g., "B02_10m", "B03_10m"), values are the desired output names. Use "all" to load all optical bands and SCL. Defaults to {"B02_10m": "blue", "B03_10m": "green", "B04_10m": "red", "B08_10m": "nir"}.

  • aws_profile_name (str, default: 'default' ) –

    AWS profile name for authentication with the Copernicus S3 bucket. Defaults to "default".

Note
  • Requires Copernicus Data Space authentication. Use darts_utils.copernicus.init_copernicus() to set up credentials before calling this function.
  • All bands are resampled to 10m resolution during download.
  • Data is stored with zstd compression for efficient storage.
  • The SCL (Scene Classification Layer) band is automatically included if not specified.
Example

Download Sentinel-2 scenes for a project:

from pathlib import Path
from darts_acquisition import download_cdse_s2_sr_scene
from darts_utils.copernicus import init_copernicus

# Setup authentication
init_copernicus(profile_name="default")

# Download scene with all bands
download_cdse_s2_sr_scene(
    s2item="S2A_MSIL2A_20230615T123456_N0509_R012_T33UUP_20230615T145678",
    store=Path("/data/s2_store"),
    bands_mapping="all",
    aws_profile_name="default"
)
Source code in darts-acquisition/src/darts_acquisition/s2/cdse_scene.py
@stopwatch.f("Downloading Sentinel-2 scene from CDSE if missing", printer=logger.debug, print_kwargs=["s2item"])
def download_cdse_s2_sr_scene(
    s2item: str | Item,
    store: Path,
    bands_mapping: dict | Literal["all"] = {"B02_10m": "blue", "B03_10m": "green", "B04_10m": "red", "B08_10m": "nir"},
    aws_profile_name: str = "default",
):
    """Download a Sentinel-2 scene from CDSE via STAC API and store it in the local data store.

    This function downloads Sentinel-2 Level-2A surface reflectance data from the Copernicus
    Data Space Ecosystem (CDSE) and stores it locally in a compressed zarr store for efficient
    repeated access.

    Args:
        s2item (str | Item): Sentinel-2 scene identifier (e.g., "S2A_MSIL2A_20230615T...") or
            a PySTAC Item object from a STAC search.
        store (Path): Path to the local zarr store directory where the scene will be saved.
        bands_mapping (dict | Literal["all"], optional): Mapping of Sentinel-2 band names to
            custom band names. Keys should be CDSE band names (e.g., "B02_10m", "B03_10m"),
            values are the desired output names. Use "all" to load all optical bands and SCL.
            Defaults to {"B02_10m": "blue", "B03_10m": "green", "B04_10m": "red", "B08_10m": "nir"}.
        aws_profile_name (str, optional): AWS profile name for authentication with the
            Copernicus S3 bucket. Defaults to "default".

    Note:
        - Requires Copernicus Data Space authentication. Use `darts_utils.copernicus.init_copernicus()`
          to set up credentials before calling this function.
        - All bands are resampled to 10m resolution during download.
        - Data is stored with zstd compression for efficient storage.
        - The SCL (Scene Classification Layer) band is automatically included if not specified.

    Example:
        Download Sentinel-2 scenes for a project:

        ```python
        from pathlib import Path
        from darts_acquisition import download_cdse_s2_sr_scene
        from darts_utils.copernicus import init_copernicus

        # Setup authentication
        init_copernicus(profile_name="default")

        # Download scene with all bands
        download_cdse_s2_sr_scene(
            s2item="S2A_MSIL2A_20230615T123456_N0509_R012_T33UUP_20230615T145678",
            store=Path("/data/s2_store"),
            bands_mapping="all",
            aws_profile_name="default"
        )
        ```

    """
    bands_mapping = _get_band_mapping(bands_mapping)
    store_manager = CDSEStoreManager(
        store=store,
        bands_mapping=bands_mapping,
        aws_profile_name=aws_profile_name,
    )

    store_manager.download_and_store(s2item)

download_gee_s2_sr_scene

download_gee_s2_sr_scene(
    s2item: str | ee.Image,
    store: pathlib.Path,
    bands_mapping: dict | typing.Literal["all"] = {
        "B2": "blue",
        "B3": "green",
        "B4": "red",
        "B8": "nir",
    },
)

Download a Sentinel-2 scene from Google Earth Engine and store it in the local data store.

This function downloads Sentinel-2 Level-2A surface reflectance data from Google Earth Engine (GEE) and stores it locally in a compressed zarr store for efficient repeated access.

Parameters:

  • s2item (str | ee.Image) –

    Sentinel-2 scene identifier (e.g., "20230615T123456_20230615T123659_T33UUP") or an ee.Image object from the COPERNICUS/S2_SR collection.

  • store (pathlib.Path) –

    Path to the local zarr store directory where the scene will be saved.

  • bands_mapping (dict | typing.Literal['all'], default: {'B2': 'blue', 'B3': 'green', 'B4': 'red', 'B8': 'nir'} ) –

    Mapping of Sentinel-2 band names to custom band names. Keys should be GEE band names (e.g., "B2", "B3"), values are the desired output names. Use "all" to load all optical bands and SCL. Defaults to {"B2": "blue", "B3": "green", "B4": "red", "B8": "nir"}.

Note
  • Requires Google Earth Engine authentication. Use ee.Initialize() before calling.
  • All bands are downloaded at 10m resolution.
  • Data is stored with zstd compression for efficient storage.
  • The SCL (Scene Classification Layer) band is automatically included if not specified.
Example

Download Sentinel-2 scenes from GEE:

import ee
from pathlib import Path
from darts_acquisition import download_gee_s2_sr_scene

# Initialize Earth Engine
ee.Initialize()

# Download scene with all bands
download_gee_s2_sr_scene(
    s2item="20230615T123456_20230615T123659_T33UUP",
    store=Path("/data/s2_store"),
    bands_mapping="all"
)
Source code in darts-acquisition/src/darts_acquisition/s2/gee_scene.py
@stopwatch.f("Downloading Sentinel-2 scene from GEE if missing", printer=logger.debug, print_kwargs=["s2item"])
def download_gee_s2_sr_scene(
    s2item: str | ee.Image,
    store: Path,
    bands_mapping: dict | Literal["all"] = {"B2": "blue", "B3": "green", "B4": "red", "B8": "nir"},
):
    """Download a Sentinel-2 scene from Google Earth Engine and store it in the local data store.

    This function downloads Sentinel-2 Level-2A surface reflectance data from Google Earth
    Engine (GEE) and stores it locally in a compressed zarr store for efficient repeated access.

    Args:
        s2item (str | ee.Image): Sentinel-2 scene identifier (e.g., "20230615T123456_20230615T123659_T33UUP")
            or an ee.Image object from the COPERNICUS/S2_SR collection.
        store (Path): Path to the local zarr store directory where the scene will be saved.
        bands_mapping (dict | Literal["all"], optional): Mapping of Sentinel-2 band names to
            custom band names. Keys should be GEE band names (e.g., "B2", "B3"), values are
            the desired output names. Use "all" to load all optical bands and SCL.
            Defaults to {"B2": "blue", "B3": "green", "B4": "red", "B8": "nir"}.

    Note:
        - Requires Google Earth Engine authentication. Use `ee.Initialize()` before calling.
        - All bands are downloaded at 10m resolution.
        - Data is stored with zstd compression for efficient storage.
        - The SCL (Scene Classification Layer) band is automatically included if not specified.

    Example:
        Download Sentinel-2 scenes from GEE:

        ```python
        import ee
        from pathlib import Path
        from darts_acquisition import download_gee_s2_sr_scene

        # Initialize Earth Engine
        ee.Initialize()

        # Download scene with all bands
        download_gee_s2_sr_scene(
            s2item="20230615T123456_20230615T123659_T33UUP",
            store=Path("/data/s2_store"),
            bands_mapping="all"
        )
        ```

    """
    bands_mapping = _get_band_mapping(bands_mapping)
    store_manager = GEEStoreManager(
        store=store,
        bands_mapping=bands_mapping,
    )

    store_manager.download_and_store(s2item)

download_sentinel_2_grid

download_sentinel_2_grid(grid_dir: pathlib.Path)

Download the Sentinel 2 grid files.

Files will be stored under [grid_dir]/adm1.shp and [grid_dir]/...

Parameters:

Source code in darts-acquisition/src/darts_acquisition/s2/grid.py
@stopwatch.f("Downloading Sentinel 2 grid", printer=logger.debug)
def download_sentinel_2_grid(grid_dir: Path):
    """Download the Sentinel 2 grid files.

    Files will be stored under [grid_dir]/adm1.shp and [grid_dir]/...

    Args:
        grid_dir (Path): The path to the grid.

    """
    grid_dir.mkdir(exist_ok=True, parents=True)
    grid_url = "https://github.com/justinelliotmeyers/Sentinel-2-Shapefile-Index/archive/refs/heads/master.zip"
    logger.debug(f"Downloading {grid_url} to {grid_dir.resolve()}")
    _download_zip(grid_url, grid_dir)

download_tcvis

download_tcvis(
    aoi: geopandas.GeoDataFrame,
    data_dir: pathlib.Path | str,
) -> None

Download TCVIS (Tasseled Cap trends) data for the specified area of interest.

This function downloads Tasseled Cap trend data from Google Earth Engine for the given area of interest and stores it in a local icechunk data store for efficient access.

Parameters:

  • aoi (geopandas.GeoDataFrame) –

    Area of interest for which to download TCVIS data. Can be in any CRS; will be reprojected to the TCVIS dataset's native CRS.

  • data_dir (pathlib.Path | str) –

    Path to the icechunk data directory (must have .icechunk suffix).

Note

Requires Google Earth Engine authentication to be set up before calling this function. Use ee.Initialize() or ee.Authenticate() as needed.

Example

Download TCVIS for a study area:

import geopandas as gpd
from shapely.geometry import box
from darts_acquisition import download_tcvis

# Define area of interest
aoi = gpd.GeoDataFrame(
    geometry=[box(-50, 70, -49, 71)],
    crs="EPSG:4326"
)

# Download TCVIS
download_tcvis(
    aoi=aoi,
    data_dir="/data/tcvis.icechunk"
)
Source code in darts-acquisition/src/darts_acquisition/tcvis.py
@stopwatch.f("Downloading TCVIS", printer=logger.debug, print_kwargs=["data_dir"])
def download_tcvis(
    aoi: gpd.GeoDataFrame,
    data_dir: Path | str,
) -> None:
    """Download TCVIS (Tasseled Cap trends) data for the specified area of interest.

    This function downloads Tasseled Cap trend data from Google Earth Engine for the given
    area of interest and stores it in a local icechunk data store for efficient access.

    Args:
        aoi (gpd.GeoDataFrame): Area of interest for which to download TCVIS data.
            Can be in any CRS; will be reprojected to the TCVIS dataset's native CRS.
        data_dir (Path | str): Path to the icechunk data directory (must have .icechunk suffix).

    Note:
        Requires Google Earth Engine authentication to be set up before calling this function.
        Use `ee.Initialize()` or `ee.Authenticate()` as needed.

    Example:
        Download TCVIS for a study area:

        ```python
        import geopandas as gpd
        from shapely.geometry import box
        from darts_acquisition import download_tcvis

        # Define area of interest
        aoi = gpd.GeoDataFrame(
            geometry=[box(-50, 70, -49, 71)],
            crs="EPSG:4326"
        )

        # Download TCVIS
        download_tcvis(
            aoi=aoi,
            data_dir="/data/tcvis.icechunk"
        )
        ```

    """
    assert ".icechunk" == data_dir.suffix, f"Data directory {data_dir} must have an .icechunk suffix!"
    accessor = smart_geocubes.TCTrend(data_dir, create_icechunk_storage=False)
    accessor.assert_created()
    accessor.download(aoi)

get_aoi_from_cdse_scene_ids

get_aoi_from_cdse_scene_ids(
    scene_ids: list[str],
) -> geopandas.GeoDataFrame

Get the area of interest (AOI) as a GeoDataFrame from a list of Sentinel-2 scene IDs.

Parameters:

  • scene_ids (list[str]) –

    List of Sentinel-2 scene IDs.

Returns:

  • geopandas.GeoDataFrame

    gpd.GeoDataFrame: The AOI as a GeoDataFrame.

Raises:

  • ValueError

    If no Sentinel-2 items are found for the given scene IDs.

Source code in darts-acquisition/src/darts_acquisition/s2/cdse_scene.py
@stopwatch("Getting AOI from CDSE scene IDs", printer=logger.debug)
def get_aoi_from_cdse_scene_ids(
    scene_ids: list[str],
) -> gpd.GeoDataFrame:
    """Get the area of interest (AOI) as a GeoDataFrame from a list of Sentinel-2 scene IDs.

    Args:
        scene_ids (list[str]): List of Sentinel-2 scene IDs.

    Returns:
        gpd.GeoDataFrame: The AOI as a GeoDataFrame.

    Raises:
        ValueError: If no Sentinel-2 items are found for the given scene IDs.

    """
    catalog = Client.open("https://stac.dataspace.copernicus.eu/v1/")
    search = catalog.search(
        collections=["sentinel-2-l2a"],
        ids=scene_ids,
    )
    items = list(search.items())
    if not items:
        raise ValueError("No Sentinel-2 items found for the given scene IDs.")
    gdf = gpd.GeoDataFrame.from_features(
        [item.to_dict() for item in items],
        crs="EPSG:4326",
    )
    return gdf

get_aoi_from_gee_scene_ids

get_aoi_from_gee_scene_ids(
    scene_ids: list[str],
) -> geopandas.GeoDataFrame

Get the area of interest (AOI) as a GeoDataFrame from a list of Sentinel-2 scene IDs.

Parameters:

  • scene_ids (list[str]) –

    List of Sentinel-2 scene IDs.

Returns:

  • geopandas.GeoDataFrame

    gpd.GeoDataFrame: The AOI as a GeoDataFrame.

Raises:

  • ValueError

    If no Sentinel-2 items are found for the given scene IDs.

Source code in darts-acquisition/src/darts_acquisition/s2/gee_scene.py
def get_aoi_from_gee_scene_ids(
    scene_ids: list[str],
) -> gpd.GeoDataFrame:
    """Get the area of interest (AOI) as a GeoDataFrame from a list of Sentinel-2 scene IDs.

    Args:
        scene_ids (list[str]): List of Sentinel-2 scene IDs.

    Returns:
        gpd.GeoDataFrame: The AOI as a GeoDataFrame.

    Raises:
        ValueError: If no Sentinel-2 items are found for the given scene IDs.

    """
    geoms = []
    for s2id in scene_ids:
        s2item = ee.Image(f"COPERNICUS/S2_SR/{s2id}")
        geom = s2item.geometry().getInfo()
        geoms.append(geom)

    if not geoms:
        raise ValueError("No Sentinel-2 items found for the given scene IDs.")

    features = [{"type": "Feature", "geometry": geom, "properties": {}} for geom in geoms]
    feature_collection = {"type": "FeatureCollection", "features": features}
    gdf = gpd.GeoDataFrame.from_features(feature_collection, crs="EPSG:4326")
    return gdf

get_cdse_s2_sr_scene_ids_from_geodataframe

get_cdse_s2_sr_scene_ids_from_geodataframe(
    aoi: geopandas.GeoDataFrame | pathlib.Path | str,
    start_date: str | None = None,
    end_date: str | None = None,
    max_cloud_cover: int | None = 10,
    max_snow_cover: int | None = 10,
    months: list[int] | None = None,
    years: list[int] | None = None,
    simplify_geometry: float
    | typing.Literal[False] = False,
) -> dict[str, pystac.Item]

Search for Sentinel-2 scenes via STAC based on an area of interest (aoi).

Parameters:

  • aoi (geopandas.GeoDataFrame | pathlib.Path | str) –

    AOI as a GeoDataFrame or path to a shapefile. If a path is provided, it will be read using geopandas.

  • start_date (str, default: None ) –

    Starting date in a format readable by pystac_client. If None, months and years parameters will be used for filtering if set. Defaults to None.

  • end_date (str, default: None ) –

    Ending date in a format readable by pystac_client. If None, months and years parameters will be used for filtering if set. Defaults to None.

  • max_cloud_cover (int, default: 10 ) –

    Maximum percentage of cloud cover. Defaults to 10.

  • max_snow_cover (int, default: 10 ) –

    Maximum percentage of snow cover. Defaults to 10.

  • months (list[int] | None, default: None ) –

    List of months (1-12) to filter the search. Only used if start_date and end_date are None. Defaults to None.

  • years (list[int] | None, default: None ) –

    List of years to filter the search. Only used if start_date and end_date are None. Defaults to None.

  • simplify_geometry (float | typing.Literal[False], default: False ) –

    If a float is provided, the geometry will be simplified using the simplify method of geopandas. If False, no simplification will be done. This may become useful for large / weird AOIs which are too large for the STAC API. Defaults to False.

Returns:

  • dict[str, pystac.Item]

    dict[str, Item]: A dictionary of found Sentinel-2 items.

Source code in darts-acquisition/src/darts_acquisition/s2/cdse_scene.py
@stopwatch("Searching for Sentinel-2 scenes in CDSE from AOI", printer=logger.debug)
def get_cdse_s2_sr_scene_ids_from_geodataframe(
    aoi: gpd.GeoDataFrame | Path | str,
    start_date: str | None = None,
    end_date: str | None = None,
    max_cloud_cover: int | None = 10,
    max_snow_cover: int | None = 10,
    months: list[int] | None = None,
    years: list[int] | None = None,
    simplify_geometry: float | Literal[False] = False,
) -> dict[str, Item]:
    """Search for Sentinel-2 scenes via STAC based on an area of interest (aoi).

    Args:
        aoi (gpd.GeoDataFrame | Path | str): AOI as a GeoDataFrame or path to a shapefile.
            If a path is provided, it will be read using geopandas.
        start_date (str): Starting date in a format readable by pystac_client.
            If None, months and years parameters will be used for filtering if set.
            Defaults to None.
        end_date (str): Ending date in a format readable by pystac_client.
            If None, months and years parameters will be used for filtering if set.
            Defaults to None.
        max_cloud_cover (int, optional): Maximum percentage of cloud cover. Defaults to 10.
        max_snow_cover (int, optional): Maximum percentage of snow cover. Defaults to 10.
        months (list[int] | None, optional): List of months (1-12) to filter the search.
            Only used if start_date and end_date are None.
            Defaults to None.
        years (list[int] | None, optional): List of years to filter the search.
            Only used if start_date and end_date are None.
            Defaults to None.
        simplify_geometry (float | Literal[False], optional): If a float is provided, the geometry will be simplified
            using the `simplify` method of geopandas. If False, no simplification will be done.
            This may become useful for large / weird AOIs which are too large for the STAC API.
            Defaults to False.

    Returns:
        dict[str, Item]: A dictionary of found Sentinel-2 items.

    """
    if isinstance(aoi, Path | str):
        aoi = gpd.read_file(aoi)
    s2items: dict[str, Item] = {}
    if simplify_geometry:
        aoi = aoi.copy()
        aoi["geometry"] = aoi.geometry.simplify(simplify_geometry)
    for i, row in aoi.iterrows():
        s2items.update(
            search_cdse_s2_sr(
                intersects=row.geometry,
                start_date=start_date,
                end_date=end_date,
                max_cloud_cover=max_cloud_cover,
                max_snow_cover=max_snow_cover,
                months=months,
                years=years,
            )
        )
    return s2items

get_cdse_s2_sr_scene_ids_from_tile_ids

get_cdse_s2_sr_scene_ids_from_tile_ids(
    tile_ids: list[str],
    start_date: str | None = None,
    end_date: str | None = None,
    max_cloud_cover: int | None = 10,
    max_snow_cover: int | None = 10,
    months: list[int] | None = None,
    years: list[int] | None = None,
) -> dict[str, pystac.Item]

Search for Sentinel-2 scenes via STAC based on a list of tile IDs.

Parameters:

  • tile_ids (list[str]) –

    List of MGRS tile IDs to search for.

  • start_date (str, default: None ) –

    Starting date in a format readable by pystac_client. If None, months and years parameters will be used for filtering if set. Defaults to None.

  • end_date (str, default: None ) –

    Ending date in a format readable by pystac_client. If None, months and years parameters will be used for filtering if set. Defaults to None.

  • max_cloud_cover (int, default: 10 ) –

    Maximum percentage of cloud cover. Defaults to 10.

  • max_snow_cover (int, default: 10 ) –

    Maximum percentage of snow cover. Defaults to 10.

  • months (list[int] | None, default: None ) –

    List of months (1-12) to filter the search. Only used if start_date and end_date are None. Defaults to None.

  • years (list[int] | None, default: None ) –

    List of years to filter the search. Only used if start_date and end_date are None. Defaults to None.

Returns:

  • dict[str, pystac.Item]

    dict[str, Item]: A dictionary of found Sentinel-2 items.

Source code in darts-acquisition/src/darts_acquisition/s2/cdse_scene.py
@stopwatch("Searching for Sentinel-2 scenes in CDSE from Tile-IDs", printer=logger.debug)
def get_cdse_s2_sr_scene_ids_from_tile_ids(
    tile_ids: list[str],
    start_date: str | None = None,
    end_date: str | None = None,
    max_cloud_cover: int | None = 10,
    max_snow_cover: int | None = 10,
    months: list[int] | None = None,
    years: list[int] | None = None,
) -> dict[str, Item]:
    """Search for Sentinel-2 scenes via STAC based on a list of tile IDs.

    Args:
        tile_ids (list[str]): List of MGRS tile IDs to search for.
        start_date (str): Starting date in a format readable by pystac_client.
            If None, months and years parameters will be used for filtering if set.
            Defaults to None.
        end_date (str): Ending date in a format readable by pystac_client.
            If None, months and years parameters will be used for filtering if set.
            Defaults to None.
        max_cloud_cover (int, optional): Maximum percentage of cloud cover. Defaults to 10.
        max_snow_cover (int, optional): Maximum percentage of snow cover. Defaults to 10.
        months (list[int] | None, optional): List of months (1-12) to filter the search.
            Only used if start_date and end_date are None.
            Defaults to None.
        years (list[int] | None, optional): List of years to filter the search.
            Only used if start_date and end_date are None.
            Defaults to None.

    Returns:
        dict[str, Item]: A dictionary of found Sentinel-2 items.

    """
    return search_cdse_s2_sr(
        tiles=tile_ids,
        start_date=start_date,
        end_date=end_date,
        max_cloud_cover=max_cloud_cover,
        max_snow_cover=max_snow_cover,
        months=months,
        years=years,
    )

get_gee_s2_sr_scene_ids_from_geodataframe

get_gee_s2_sr_scene_ids_from_geodataframe(
    aoi: geopandas.GeoDataFrame | pathlib.Path | str,
    start_date: str | None = None,
    end_date: str | None = None,
    max_cloud_cover: int | None = 10,
    max_snow_cover: int | None = 10,
) -> set[str]

Search for Sentinel-2 scenes via Earth Engine based on an aoi shapefile.

Parameters:

  • aoi (geopandas.GeoDataFrame | pathlib.Path | str) –

    AOI as a GeoDataFrame or path to a shapefile. If a path is provided, it will be read using geopandas.

  • start_date (str, default: None ) –

    Starting date in a format readable by ee. If None, months and years parameters will be used for filtering if set. Defaults to None.

  • end_date (str, default: None ) –

    Ending date in a format readable by ee. If None, months and years parameters will be used for filtering if set. Defaults to None.

  • max_cloud_cover (int, default: 10 ) –

    Maximum percentage of cloud cover. Defaults to 10.

  • max_snow_cover (int, default: 10 ) –

    Maximum percentage of snow cover. Defaults to 10.

Returns:

  • set[str]

    set[str]: Unique Sentinel-2 tile IDs.

Source code in darts-acquisition/src/darts_acquisition/s2/gee_scene.py
@stopwatch("Searching for Sentinel-2 scenes in Earth Engine from AOI", printer=logger.debug)
def get_gee_s2_sr_scene_ids_from_geodataframe(
    aoi: gpd.GeoDataFrame | Path | str,
    start_date: str | None = None,
    end_date: str | None = None,
    max_cloud_cover: int | None = 10,
    max_snow_cover: int | None = 10,
) -> set[str]:
    """Search for Sentinel-2 scenes via Earth Engine based on an aoi shapefile.

    Args:
        aoi (gpd.GeoDataFrame | Path | str): AOI as a GeoDataFrame or path to a shapefile.
            If a path is provided, it will be read using geopandas.
        start_date (str): Starting date in a format readable by ee.
            If None, months and years parameters will be used for filtering if set.
            Defaults to None.
        end_date (str): Ending date in a format readable by ee.
            If None, months and years parameters will be used for filtering if set.
            Defaults to None.
        max_cloud_cover (int, optional): Maximum percentage of cloud cover. Defaults to 10.
        max_snow_cover (int, optional): Maximum percentage of snow cover. Defaults to 10.

    Returns:
        set[str]: Unique Sentinel-2 tile IDs.

    """
    # Disable max xxx cover if set to 100
    if max_cloud_cover is not None and max_cloud_cover == 100:
        max_cloud_cover = None
    if max_snow_cover is not None and max_snow_cover == 100:
        max_snow_cover = None

    if isinstance(aoi, Path | str):
        aoi = gpd.read_file(aoi)
    aoi = aoi.to_crs("EPSG:4326")
    s2ids = set()
    for i, row in aoi.iterrows():
        geom = ee.Geometry.Polygon(list(row.geometry.exterior.coords))
        if start_date is not None and end_date is not None:
            ic = ee.ImageCollection("COPERNICUS/S2_SR").filterBounds(geom).filterDate(start_date, end_date)
            if max_cloud_cover:
                ic = ic.filterMetadata("CLOUDY_PIXEL_PERCENTAGE", "less_than", max_cloud_cover)
            if max_snow_cover:
                ic = ic.filterMetadata("SNOW_SNOW_ICE_PERCENTAGE", "less_than", max_snow_cover)
            s2ids.update(ic.aggregate_array("system:index").getInfo())
        else:
            logger.warning("No valid date filtering provided. This may result in a too large number of scenes for GEE.")
            ic = ee.ImageCollection("COPERNICUS/S2_SR").filterBounds(geom)
            if max_cloud_cover:
                ic = ic.filterMetadata("CLOUDY_PIXEL_PERCENTAGE", "less_than", max_cloud_cover)
            if max_snow_cover:
                ic = ic.filterMetadata("SNOW_SNOW_ICE_PERCENTAGE", "less_than", max_snow_cover)
            s2ids.update(ic.aggregate_array("system:index").getInfo())

    logger.debug(f"Found {len(s2ids)} Sentinel-2 tiles via ee.")
    return s2ids

get_gee_s2_sr_scene_ids_from_tile_ids

get_gee_s2_sr_scene_ids_from_tile_ids(
    tiles: list[str],
    start_date: str | None = None,
    end_date: str | None = None,
    max_cloud_cover: int | None = 10,
    max_snow_cover: int | None = 10,
) -> set[str]

Search for Sentinel-2 scenes via Earth Engine based on a list of tile IDs.

Parameters:

  • tiles (list[str]) –

    List of Sentinel-2 tile IDs.

  • start_date (str, default: None ) –

    Starting date in a format readable by ee. If None, months and years parameters will be used for filtering if set. Defaults to None.

  • end_date (str, default: None ) –

    Ending date in a format readable by ee. If None, months and years parameters will be used for filtering if set. Defaults to None.

  • max_cloud_cover (int, default: 10 ) –

    Maximum percentage of cloud cover. Defaults to 10.

  • max_snow_cover (int, default: 10 ) –

    Maximum percentage of snow cover. Defaults to 10.

Returns:

  • set[str]

    set[str]: Unique Sentinel-2 tile IDs.

Source code in darts-acquisition/src/darts_acquisition/s2/gee_scene.py
def get_gee_s2_sr_scene_ids_from_tile_ids(
    tiles: list[str],
    start_date: str | None = None,
    end_date: str | None = None,
    max_cloud_cover: int | None = 10,
    max_snow_cover: int | None = 10,
) -> set[str]:
    """Search for Sentinel-2 scenes via Earth Engine based on a list of tile IDs.

    Args:
        tiles (list[str]): List of Sentinel-2 tile IDs.
        start_date (str): Starting date in a format readable by ee.
            If None, months and years parameters will be used for filtering if set.
            Defaults to None.
        end_date (str): Ending date in a format readable by ee.
            If None, months and years parameters will be used for filtering if set.
            Defaults to None.
        max_cloud_cover (int, optional): Maximum percentage of cloud cover. Defaults to 10.
        max_snow_cover (int, optional): Maximum percentage of snow cover. Defaults to 10.

    Returns:
        set[str]: Unique Sentinel-2 tile IDs.

    """
    # Disable max xxx cover if set to 100
    if max_cloud_cover is not None and max_cloud_cover == 100:
        max_cloud_cover = None
    if max_snow_cover is not None and max_snow_cover == 100:
        max_snow_cover = None

    s2ids = set()
    for tile in tiles:
        if start_date is not None and end_date is not None:
            ic = (
                ee.ImageCollection("COPERNICUS/S2_SR")
                .filterDate(start_date, end_date)
                .filterMetadata("MGRS_TILE", "equals", tile)
            )
            if max_cloud_cover:
                ic = ic.filterMetadata("CLOUDY_PIXEL_PERCENTAGE", "less_than", max_cloud_cover)
            if max_snow_cover:
                ic = ic.filterMetadata("SNOW_SNOW_ICE_PERCENTAGE", "less_than", max_snow_cover)
            s2ids.update(ic.aggregate_array("system:index").getInfo())
        else:
            logger.warning("No valid date filtering provided. This may result in a too large number of scenes for GEE.")
            ic = ee.ImageCollection("COPERNICUS/S2_SR").filterMetadata("MGRS_TILE", "equals", tile)
            if max_cloud_cover:
                ic = ic.filterMetadata("CLOUDY_PIXEL_PERCENTAGE", "less_than", max_cloud_cover)
            if max_snow_cover:
                ic = ic.filterMetadata("SNOW_SNOW_ICE_PERCENTAGE", "less_than", max_snow_cover)
            s2ids.update(ic.aggregate_array("system:index").getInfo())

    logger.debug(f"Found {len(s2ids)} Sentinel-2 tiles via ee.")
    return s2ids

get_planet_geometry

get_planet_geometry(
    fpath: str | pathlib.Path,
) -> odc.geo.Geometry

Get the geometry of a Planet scene.

Parameters:

  • fpath (str | pathlib.Path) –

    The file path to the Planet scene from which to derive the geometry.

Returns:

  • odc.geo.Geometry

    odc.geo.Geometry: The geometry of the Planet scene.

Raises:

Source code in darts-acquisition/src/darts_acquisition/planet.py
def get_planet_geometry(fpath: str | Path) -> odc.geo.Geometry:
    """Get the geometry of a Planet scene.

    Args:
        fpath (str | Path): The file path to the Planet scene from which to derive the geometry.

    Returns:
        odc.geo.Geometry: The geometry of the Planet scene.

    Raises:
        FileNotFoundError: If no matching TIFF file is found in the specified path.

    """
    # Convert to Path object if a string is provided
    fpath = fpath if isinstance(fpath, Path) else Path(fpath)
    # Get imagepath
    ps_image = next(fpath.glob("*_SR.tif"), None)
    if not ps_image:
        ps_image = next(fpath.glob("*_SR_clip.tif"), None)
    if not ps_image:
        raise FileNotFoundError(f"No matching TIFF files found in {fpath.resolve()} (.glob('*_SR.tif'))")

    planet_raster = rasterio.open(ps_image)
    return odc.geo.Geometry(planet_raster.bounds, crs=planet_raster.crs)

load_arcticdem

load_arcticdem(
    geobox: odc.geo.geobox.GeoBox,
    data_dir: pathlib.Path | str,
    resolution: darts_acquisition.arcticdem.RESOLUTIONS,
    buffer: int = 0,
    offline: bool = False,
) -> xarray.Dataset

Load the ArcticDEM for the given geobox, fetch new data from the STAC server if necessary.

This function loads ArcticDEM elevation data from a local icechunk store. If offline=False, missing data will be automatically downloaded from the AWS-hosted STAC server and stored locally for future use. The loaded data is returned in the ArcticDEM's native CRS (EPSG:3413).

Parameters:

  • geobox (odc.geo.geobox.GeoBox) –

    The geobox for which the tile should be loaded. Must be in a meter-based CRS.

  • data_dir (pathlib.Path | str) –

    Path to the icechunk data directory (must have .icechunk suffix). This directory stores downloaded ArcticDEM data for faster consecutive access.

  • resolution (typing.Literal[2, 10, 32]) –

    The resolution of the ArcticDEM data in meters. Must match the resolution indicated in the data_dir name (e.g., "arcticdem_2m.icechunk").

  • buffer (int, default: 0 ) –

    Buffer around the geobox in pixels. The buffer is applied in the ArcticDEM's native CRS (EPSG:3413) after reprojecting the input geobox. Useful for edge effect removal in terrain analysis. Defaults to 0.

  • offline (bool, default: False ) –

    If True, only loads data already present in the local store without attempting any downloads. If False, missing data is downloaded from AWS. Defaults to False.

Returns:

  • xarray.Dataset

    xr.Dataset: The ArcticDEM dataset with the following data variables: - dem (float32): Elevation values in meters, clipped to [-100, 3000] range - arcticdem_data_mask (uint8): Data validity mask (1=valid, 0=invalid)

    The dataset is in the ArcticDEM's native CRS (EPSG:3413) with the buffer applied. It is NOT automatically reprojected to match the input geobox's CRS and resolution.

Note

The offline parameter controls data fetching behavior:

  • When offline=False: Uses smart_geocubes accessor's load() method which automatically downloads missing tiles from AWS and persists them to the icechunk store.
  • When offline=True: Uses the accessor's open_xarray() method to open the existing store and crops it to the requested region. Raises an error if data is missing.
Warning
  • The input geobox must be in a meter-based CRS.
  • The data_dir must have an .icechunk suffix and contain the resolution in the name.
  • The returned dataset is in EPSG:3413, not the input geobox's CRS.
Example

Load ArcticDEM with a buffer for terrain analysis:

from math import ceil, sqrt
from darts_acquisition import load_arcticdem

# Assume "optical" is a loaded Sentinel-2 dataset
arcticdem = load_arcticdem(
    geobox=optical.odc.geobox,
    data_dir="/data/arcticdem_2m.icechunk",
    resolution=2,
    buffer=ceil(128 / 2 * sqrt(2)),  # Buffer for TPI with 128m radius
    offline=False
)

# Reproject to match optical data's CRS and resolution
arcticdem = arcticdem.odc.reproject(optical.odc.geobox, resampling="cubic")
Source code in darts-acquisition/src/darts_acquisition/arcticdem.py
@stopwatch.f("Loading ArcticDEM", printer=logger.debug, print_kwargs=["data_dir", "resolution", "buffer", "offline"])
def load_arcticdem(
    geobox: GeoBox,
    data_dir: Path | str,
    resolution: RESOLUTIONS,
    buffer: int = 0,
    offline: bool = False,
) -> xr.Dataset:
    """Load the ArcticDEM for the given geobox, fetch new data from the STAC server if necessary.

    This function loads ArcticDEM elevation data from a local icechunk store. If `offline=False`,
    missing data will be automatically downloaded from the AWS-hosted STAC server and stored
    locally for future use. The loaded data is returned in the ArcticDEM's native CRS (EPSG:3413).

    Args:
        geobox (GeoBox): The geobox for which the tile should be loaded. Must be in a meter-based CRS.
        data_dir (Path | str): Path to the icechunk data directory (must have .icechunk suffix).
            This directory stores downloaded ArcticDEM data for faster consecutive access.
        resolution (Literal[2, 10, 32]): The resolution of the ArcticDEM data in meters.
            Must match the resolution indicated in the data_dir name (e.g., "arcticdem_2m.icechunk").
        buffer (int, optional): Buffer around the geobox in pixels. The buffer is applied in the
            ArcticDEM's native CRS (EPSG:3413) after reprojecting the input geobox. Useful for
            edge effect removal in terrain analysis. Defaults to 0.
        offline (bool, optional): If True, only loads data already present in the local store
            without attempting any downloads. If False, missing data is downloaded from AWS.
            Defaults to False.

    Returns:
        xr.Dataset: The ArcticDEM dataset with the following data variables:
            - dem (float32): Elevation values in meters, clipped to [-100, 3000] range
            - arcticdem_data_mask (uint8): Data validity mask (1=valid, 0=invalid)

            The dataset is in the ArcticDEM's native CRS (EPSG:3413) with the buffer applied.
            It is NOT automatically reprojected to match the input geobox's CRS and resolution.

    Note:
        The `offline` parameter controls data fetching behavior:

        - When `offline=False`: Uses `smart_geocubes` accessor's `load()` method which automatically
          downloads missing tiles from AWS and persists them to the icechunk store.
        - When `offline=True`: Uses the accessor's `open_xarray()` method to open the existing store
          and crops it to the requested region. Raises an error if data is missing.

    Warning:
        - The input geobox must be in a meter-based CRS.
        - The data_dir must have an `.icechunk` suffix and contain the resolution in the name.
        - The returned dataset is in EPSG:3413, not the input geobox's CRS.

    Example:
        Load ArcticDEM with a buffer for terrain analysis:

        ```python
        from math import ceil, sqrt
        from darts_acquisition import load_arcticdem

        # Assume "optical" is a loaded Sentinel-2 dataset
        arcticdem = load_arcticdem(
            geobox=optical.odc.geobox,
            data_dir="/data/arcticdem_2m.icechunk",
            resolution=2,
            buffer=ceil(128 / 2 * sqrt(2)),  # Buffer for TPI with 128m radius
            offline=False
        )

        # Reproject to match optical data's CRS and resolution
        arcticdem = arcticdem.odc.reproject(optical.odc.geobox, resampling="cubic")
        ```

    """
    if not offline:
        odc.stac.configure_rio(cloud_defaults=True, aws={"aws_unsigned": True})

    accessor = _validate_and_get_accessor(data_dir, resolution)

    if not offline:
        arcticdem = accessor.load(geobox, buffer=buffer, persist=True)
    else:
        xrcube = accessor.open_xarray()
        reference_geobox = geobox.to_crs(accessor.extent.crs, resolution=accessor.extent.resolution.x).pad(buffer)
        arcticdem = xrcube.odc.crop(reference_geobox.extent, apply_mask=False)
        arcticdem = arcticdem.load()

    # Change dtype of the datamask to uint8 for later reproject_match
    arcticdem["arcticdem_data_mask"] = arcticdem.datamask.astype("uint8")

    # Clip values to -100, 3000 range (see docs about bands)
    arcticdem["dem"] = arcticdem["dem"].clip(-100, 3000)

    # Change dtype of arcticdem to float32 to save memory (original is float64)
    arcticdem["dem"] = arcticdem["dem"].astype("float32")

    return arcticdem

load_cdse_s2_sr_scene

load_cdse_s2_sr_scene(
    s2item: str | pystac.Item,
    bands_mapping: dict | typing.Literal["all"] = {
        "B02_10m": "blue",
        "B03_10m": "green",
        "B04_10m": "red",
        "B08_10m": "nir",
    },
    store: pathlib.Path | None = None,
    aws_profile_name: str = "default",
    offline: bool = False,
    output_dir_for_debug_geotiff: pathlib.Path
    | None = None,
    device: typing.Literal["cuda", "cpu"]
    | int = darts_utils.cuda.DEFAULT_DEVICE,
) -> xarray.Dataset

Load a Sentinel-2 scene from CDSE, downloading from STAC API if necessary.

This function loads Sentinel-2 Level-2A surface reflectance data from the Copernicus Data Space Ecosystem (CDSE). If a local store is provided, the data is cached for efficient repeated access. The function handles quality masking, reflectance scaling, and optional GPU acceleration.

The download logic is basically as follows:

IF flag:raw-data-store THEN
    IF exist_local THEN
        open -> memory
    ELIF online THEN
        download -> memory
        save
    ELIF offline THEN
        RAISE ERROR
    ENDIF
ELIF online THEN
    download -> memory
ELIF offline THEN
    RAISE ERROR
ENDIF

Parameters:

  • s2item (str | pystac.Item) –

    Sentinel-2 scene identifier or PySTAC Item object.

  • bands_mapping (dict | typing.Literal['all'], default: {'B02_10m': 'blue', 'B03_10m': 'green', 'B04_10m': 'red', 'B08_10m': 'nir'} ) –

    Mapping of Sentinel-2 band names to custom band names. Keys should be CDSE band names (e.g., "B02_10m"), values are output names. Use "all" to load all optical bands and SCL. Defaults to {"B02_10m": "blue", "B03_10m": "green", "B04_10m": "red", "B08_10m": "nir"}.

  • store (pathlib.Path | None, default: None ) –

    Path to local zarr store for caching. If None, data is loaded directly without caching. Defaults to None.

  • aws_profile_name (str, default: 'default' ) –

    AWS profile name for Copernicus S3 authentication. Defaults to "default".

  • offline (bool, default: False ) –

    If True, only loads from local store without downloading. Requires store to be provided. If False, missing data is downloaded. Defaults to False.

  • output_dir_for_debug_geotiff (pathlib.Path | None, default: None ) –

    If provided, writes raw data as GeoTIFF files for debugging. Defaults to None.

  • device (typing.Literal['cuda', 'cpu'] | int, default: darts_utils.cuda.DEFAULT_DEVICE ) –

    Device for processing (GPU or CPU). Defaults to DEFAULT_DEVICE.

Returns:

  • xarray.Dataset

    xr.Dataset: Sentinel-2 dataset with the following data variables based on bands_mapping: - Optical bands (float32): Surface reflectance values [~-0.1 to ~1.0] Default bands: blue, green, red, nir Additional bands available: coastal, rededge071, rededge075, rededge078, nir08, nir09, swir16, swir22 Each has attributes: - long_name: "Sentinel-2 {Band}" - units: "Reflectance" - data_source: "Sentinel-2 L2A via Copernicus STAC API (sentinel-2-l2a)" - s2_scl (uint8): Scene Classification Layer Attributes: long_name, description of class values (0=NO_DATA, 1=SATURATED, etc.) - quality_data_mask (uint8): Derived quality mask - 0 = Invalid (no data, saturated, or defective) - 1 = Low quality (shadows, clouds, cirrus, snow/ice, water) - 2 = High quality (clear vegetation or non-vegetated land) - valid_data_mask (uint8): Binary validity mask (1=valid, 0=invalid)

    Dataset attributes: - azimuth (float): Solar azimuth angle from view:azimuth - elevation (float): Solar elevation angle from view:sun_elevation - s2_tile_id (str): Scene identifier - tile_id (str): Scene identifier (same as s2_tile_id) - Plus additional STAC metadata fields

Note

The offline parameter controls data fetching: - When offline=False: Automatically downloads missing data from CDSE and stores it in the local zarr store (if store is provided). - When offline=True: Only reads from the local store. Raises an error if data is missing or if store is None.

Reflectance processing: - Raw DN values are scaled: (DN / 10000.0) - 0.1 - Pixels where SCL==0 or DN==0 are masked as NaN - This matches the data format from GEE and Planet loaders

Quality mask derivation from SCL: - Invalid (0): NO_DATA, SATURATED_OR_DEFECTIVE - Low quality (1): CAST_SHADOWS, CLOUD_SHADOWS, CLOUD_*, THIN_CIRRUS, SNOW/ICE, WATER - High quality (2): VEGETATION, NOT_VEGETATED

Example

Load scene with local caching:

from pathlib import Path
from darts_acquisition import load_cdse_s2_sr_scene
from darts_utils.copernicus import init_copernicus

# Setup authentication
init_copernicus(profile_name="default")

# Load with caching
s2_ds = load_cdse_s2_sr_scene(
    s2item="S2A_MSIL2A_20230615T123456_N0509_R012_T33UUP_20230615T145678",
    bands_mapping="all",
    store=Path("/data/s2_store"),
    offline=False  # Download if not cached
)

# Compute NDVI
ndvi = (s2_ds.nir - s2_ds.red) / (s2_ds.nir + s2_ds.red)

# Filter to high quality pixels
s2_filtered = s2_ds.where(s2_ds.quality_data_mask == 2)
Source code in darts-acquisition/src/darts_acquisition/s2/cdse_scene.py
@stopwatch.f("Loading Sentinel-2 scene from CDSE", printer=logger.debug, print_kwargs=["s2item"])
def load_cdse_s2_sr_scene(
    s2item: str | Item,
    bands_mapping: dict | Literal["all"] = {"B02_10m": "blue", "B03_10m": "green", "B04_10m": "red", "B08_10m": "nir"},
    store: Path | None = None,
    aws_profile_name: str = "default",
    offline: bool = False,
    output_dir_for_debug_geotiff: Path | None = None,
    device: Literal["cuda", "cpu"] | int = DEFAULT_DEVICE,
) -> xr.Dataset:
    """Load a Sentinel-2 scene from CDSE, downloading from STAC API if necessary.

    This function loads Sentinel-2 Level-2A surface reflectance data from the Copernicus
    Data Space Ecosystem (CDSE). If a local store is provided, the data is cached for
    efficient repeated access. The function handles quality masking, reflectance scaling,
    and optional GPU acceleration.

    The download logic is basically as follows:

    ```
    IF flag:raw-data-store THEN
        IF exist_local THEN
            open -> memory
        ELIF online THEN
            download -> memory
            save
        ELIF offline THEN
            RAISE ERROR
        ENDIF
    ELIF online THEN
        download -> memory
    ELIF offline THEN
        RAISE ERROR
    ENDIF
    ```

    Args:
        s2item (str | Item): Sentinel-2 scene identifier or PySTAC Item object.
        bands_mapping (dict | Literal["all"], optional): Mapping of Sentinel-2 band names to
            custom band names. Keys should be CDSE band names (e.g., "B02_10m"), values are
            output names. Use "all" to load all optical bands and SCL.
            Defaults to {"B02_10m": "blue", "B03_10m": "green", "B04_10m": "red", "B08_10m": "nir"}.
        store (Path | None, optional): Path to local zarr store for caching. If None, data is
            loaded directly without caching. Defaults to None.
        aws_profile_name (str, optional): AWS profile name for Copernicus S3 authentication.
            Defaults to "default".
        offline (bool, optional): If True, only loads from local store without downloading.
            Requires `store` to be provided. If False, missing data is downloaded.
            Defaults to False.
        output_dir_for_debug_geotiff (Path | None, optional): If provided, writes raw data as
            GeoTIFF files for debugging. Defaults to None.
        device (Literal["cuda", "cpu"] | int, optional): Device for processing (GPU or CPU).
            Defaults to DEFAULT_DEVICE.

    Returns:
        xr.Dataset: Sentinel-2 dataset with the following data variables based on bands_mapping:
            - Optical bands (float32): Surface reflectance values [~-0.1 to ~1.0]
              Default bands: blue, green, red, nir
              Additional bands available: coastal, rededge071, rededge075, rededge078,
              nir08, nir09, swir16, swir22
              Each has attributes:
              - long_name: "Sentinel-2 {Band}"
              - units: "Reflectance"
              - data_source: "Sentinel-2 L2A via Copernicus STAC API (sentinel-2-l2a)"
            - s2_scl (uint8): Scene Classification Layer
              Attributes: long_name, description of class values (0=NO_DATA, 1=SATURATED, etc.)
            - quality_data_mask (uint8): Derived quality mask
              - 0 = Invalid (no data, saturated, or defective)
              - 1 = Low quality (shadows, clouds, cirrus, snow/ice, water)
              - 2 = High quality (clear vegetation or non-vegetated land)
            - valid_data_mask (uint8): Binary validity mask (1=valid, 0=invalid)

            Dataset attributes:
            - azimuth (float): Solar azimuth angle from view:azimuth
            - elevation (float): Solar elevation angle from view:sun_elevation
            - s2_tile_id (str): Scene identifier
            - tile_id (str): Scene identifier (same as s2_tile_id)
            - Plus additional STAC metadata fields

    Note:
        The `offline` parameter controls data fetching:
        - When `offline=False`: Automatically downloads missing data from CDSE and stores it
          in the local zarr store (if store is provided).
        - When `offline=True`: Only reads from the local store. Raises an error if data is
          missing or if store is None.

        Reflectance processing:
        - Raw DN values are scaled: (DN / 10000.0) - 0.1
        - Pixels where SCL==0 or DN==0 are masked as NaN
        - This matches the data format from GEE and Planet loaders

        Quality mask derivation from SCL:
        - Invalid (0): NO_DATA, SATURATED_OR_DEFECTIVE
        - Low quality (1): CAST_SHADOWS, CLOUD_SHADOWS, CLOUD_*, THIN_CIRRUS, SNOW/ICE, WATER
        - High quality (2): VEGETATION, NOT_VEGETATED

    Example:
        Load scene with local caching:

        ```python
        from pathlib import Path
        from darts_acquisition import load_cdse_s2_sr_scene
        from darts_utils.copernicus import init_copernicus

        # Setup authentication
        init_copernicus(profile_name="default")

        # Load with caching
        s2_ds = load_cdse_s2_sr_scene(
            s2item="S2A_MSIL2A_20230615T123456_N0509_R012_T33UUP_20230615T145678",
            bands_mapping="all",
            store=Path("/data/s2_store"),
            offline=False  # Download if not cached
        )

        # Compute NDVI
        ndvi = (s2_ds.nir - s2_ds.red) / (s2_ds.nir + s2_ds.red)

        # Filter to high quality pixels
        s2_filtered = s2_ds.where(s2_ds.quality_data_mask == 2)
        ```

    """
    s2id = s2item.id if isinstance(s2item, Item) else s2item

    bands_mapping = _get_band_mapping(bands_mapping)
    store_manager = CDSEStoreManager(
        store=store,
        bands_mapping=bands_mapping,
        aws_profile_name=aws_profile_name,
    )

    with stopwatch("Load Sentinel-2 scene from store", printer=logger.debug):
        if not offline:
            ds_s2 = store_manager.load(s2item)
        else:
            assert store is not None, "Store must be provided in offline mode!"
            ds_s2 = store_manager.open(s2item)

    if output_dir_for_debug_geotiff is not None:
        save_debug_geotiff(
            dataset=ds_s2,
            output_path=output_dir_for_debug_geotiff,
            optical_bands=[band for band in bands_mapping.keys() if band.startswith("B")],
            mask_bands=["SCL_20m"] if "SCL_20m" in bands_mapping.keys() else None,
        )

    # ? The following part takes ~2.5s on CPU and ~0.1 on GPU,
    # however moving to GPU and back takes ~2.2s
    ds_s2 = move_to_device(ds_s2, device)
    ds_s2 = ds_s2.rename_vars(bands_mapping)
    optical_bands = [band for name, band in bands_mapping.items() if name.startswith("B")]
    for band in optical_bands:
        # Set values where SCL_20m == 0 to NaN in all other bands
        # This way the data is similar to data from gee or planet data
        # We need to filter out 0 values, since they are not valid reflectance values
        # But also not reflected in the SCL for some reason
        ds_s2[band] = ds_s2[band].where(ds_s2.s2_scl != 0).where(ds_s2[band].astype("float32") != 0) / 10000.0 - 0.1
        ds_s2[band].attrs["long_name"] = f"Sentinel-2 {band.capitalize()}"
        ds_s2[band].attrs["units"] = "Reflectance"
    ds_s2["s2_scl"].attrs = {
        "long_name": "Sentinel-2 Scene Classification Layer",
        "description": (
            "0: NO_DATA - 1: SATURATED_OR_DEFECTIVE - 2: CAST_SHADOWS - 3: CLOUD_SHADOWS - 4: VEGETATION"
            " - 5: NOT_VEGETATED - 6: WATER - 7: UNCLASSIFIED - 8: CLOUD_MEDIUM_PROBABILITY"
            " - 9: CLOUD_HIGH_PROBABILITY - 10: THIN_CIRRUS - 11: SNOW or ICE"
        ),
    }
    for band in ds_s2.data_vars:
        ds_s2[band].attrs["data_source"] = "Sentinel-2 L2A via Copernicus STAC API (sentinel-2-l2a)"

    # ? This takes approx. 1.5s on CPU
    # For some reason, this takes ~1.2s on the GPU
    ds_s2 = convert_masks(ds_s2)

    ds_s2 = move_to_host(ds_s2)

    # Convert sun elevation and azimuth to match our naming
    ds_s2.attrs["azimuth"] = ds_s2.attrs.get("view:azimuth", float("nan"))
    ds_s2.attrs["elevation"] = ds_s2.attrs.get("view:sun_elevation", float("nan"))

    ds_s2.attrs["s2_tile_id"] = s2id
    ds_s2.attrs["tile_id"] = s2id

    return ds_s2

load_gee_s2_sr_scene

load_gee_s2_sr_scene(
    s2item: str | ee.Image,
    bands_mapping: dict | typing.Literal["all"] = {
        "B2": "blue",
        "B3": "green",
        "B4": "red",
        "B8": "nir",
    },
    store: pathlib.Path | None = None,
    offline: bool = False,
    output_dir_for_debug_geotiff: pathlib.Path
    | None = None,
    device: typing.Literal["cuda", "cpu"]
    | int = darts_utils.cuda.DEFAULT_DEVICE,
) -> xarray.Dataset

Load a Sentinel-2 scene from Google Earth Engine, downloading if necessary.

This function loads Sentinel-2 Level-2A surface reflectance data from Google Earth Engine. If a local store is provided, the data is cached for efficient repeated access. The function handles quality masking, reflectance scaling with time-dependent offsets, and optional GPU acceleration. It also handles NaN values in the data by masking them as invalid.

The download logic is basically as follows:

IF flag:raw-data-store THEN
    IF exist_local THEN
        open -> memory
    ELIF online THEN
        download -> memory
        save
    ELIF offline THEN
        RAISE ERROR
    ENDIF
ELIF online THEN
    download -> memory
ELIF offline THEN
    RAISE ERROR
ENDIF

Parameters:

  • s2item (str | ee.Image) –

    Sentinel-2 scene identifier or ee.Image object from COPERNICUS/S2_SR.

  • bands_mapping (dict | typing.Literal['all'], default: {'B2': 'blue', 'B3': 'green', 'B4': 'red', 'B8': 'nir'} ) –

    Mapping of Sentinel-2 band names to custom band names. Keys should be GEE band names (e.g., "B2", "B3"), values are output names. Use "all" to load all optical bands and SCL. Defaults to {"B2": "blue", "B3": "green", "B4": "red", "B8": "nir"}.

  • store (pathlib.Path | None, default: None ) –

    Path to local zarr store for caching. If None, data is loaded directly without caching. Defaults to None.

  • offline (bool, default: False ) –

    If True, only loads from local store without downloading. Requires store to be provided. If False, missing data is downloaded. Defaults to False.

  • output_dir_for_debug_geotiff (pathlib.Path | None, default: None ) –

    If provided, writes raw data as GeoTIFF files for debugging. Defaults to None.

  • device (typing.Literal['cuda', 'cpu'] | int, default: darts_utils.cuda.DEFAULT_DEVICE ) –

    Device for processing (GPU or CPU). Defaults to DEFAULT_DEVICE.

Returns:

  • xarray.Dataset

    xr.Dataset: Sentinel-2 dataset with the following data variables based on bands_mapping: - Optical bands (float32): Surface reflectance values [~-0.1 to ~1.0 for newer scenes, ~0.0 to ~1.0 for scenes before 2022-01-25] Default bands: blue, green, red, nir Additional bands available: coastal, rededge071, rededge075, rededge078, nir08, nir09, swir16, swir22 Each has attributes: - long_name: "Sentinel 2 {Band}" - units: "Reflectance" - data_source: "Sentinel-2 L2A via Google Earth Engine (COPERNICUS/S2_SR)" - s2_scl (uint8): Scene Classification Layer Attributes: long_name, description of class values (0=NO_DATA, 1=SATURATED, etc.) - quality_data_mask (uint8): Derived quality mask - 0 = Invalid (no data, saturated, defective, or NaN values) - 1 = Low quality (shadows, clouds, cirrus, snow/ice, water) - 2 = High quality (clear vegetation or non-vegetated land) - valid_data_mask (uint8): Binary validity mask (1=valid, 0=invalid)

    Dataset attributes: - azimuth (float): Solar azimuth angle from MEAN_SOLAR_AZIMUTH_ANGLE - elevation (float): Solar elevation angle from MEAN_SOLAR_ZENITH_ANGLE - s2_tile_id (str): Full PRODUCT_ID from GEE - tile_id (str): Scene identifier - time (str): Acquisition timestamp

Note

The offline parameter controls data fetching: - When offline=False: Automatically downloads missing data from GEE and stores it in the local zarr store (if store is provided). - When offline=True: Only reads from the local store. Raises an error if data is missing or if store is None.

Reflectance processing: - For scenes >= 2022-01-25: (DN / 10000.0) - 0.1 (processing baseline 04.00+) - For scenes < 2022-01-25: DN / 10000.0 (older processing baseline) - NaN values are filled with 0 and marked as invalid in quality_data_mask - Pixels where SCL is NaN are also masked as invalid

This function handles spatially random NaN values that can occur in GEE data by marking them as invalid and filling with 0 to prevent propagation in calculations.

Quality mask derivation from SCL: - Invalid (0): NO_DATA, SATURATED_OR_DEFECTIVE, or NaN values - Low quality (1): CAST_SHADOWS, CLOUD_SHADOWS, CLOUD_*, THIN_CIRRUS, SNOW/ICE, WATER - High quality (2): VEGETATION, NOT_VEGETATED

Example

Load scene with local caching:

import ee
from pathlib import Path
from darts_acquisition import load_gee_s2_sr_scene

# Initialize Earth Engine
ee.Initialize()

# Load with caching
s2_ds = load_gee_s2_sr_scene(
    s2item="20230615T123456_20230615T123659_T33UUP",
    bands_mapping="all",
    store=Path("/data/s2_store"),
    offline=False  # Download if not cached
)

# Compute NDVI
ndvi = (s2_ds.nir - s2_ds.red) / (s2_ds.nir + s2_ds.red)

# Filter to high quality pixels
s2_filtered = s2_ds.where(s2_ds.quality_data_mask == 2)
Source code in darts-acquisition/src/darts_acquisition/s2/gee_scene.py
@stopwatch.f("Loading Sentinel-2 scene from GEE", printer=logger.debug, print_kwargs=["s2item"])
def load_gee_s2_sr_scene(
    s2item: str | ee.Image,
    bands_mapping: dict | Literal["all"] = {"B2": "blue", "B3": "green", "B4": "red", "B8": "nir"},
    store: Path | None = None,
    offline: bool = False,
    output_dir_for_debug_geotiff: Path | None = None,
    device: Literal["cuda", "cpu"] | int = DEFAULT_DEVICE,
) -> xr.Dataset:
    """Load a Sentinel-2 scene from Google Earth Engine, downloading if necessary.

    This function loads Sentinel-2 Level-2A surface reflectance data from Google Earth Engine.
    If a local store is provided, the data is cached for efficient repeated access. The function
    handles quality masking, reflectance scaling with time-dependent offsets, and optional GPU
    acceleration. It also handles NaN values in the data by masking them as invalid.

    The download logic is basically as follows:

    ```
    IF flag:raw-data-store THEN
        IF exist_local THEN
            open -> memory
        ELIF online THEN
            download -> memory
            save
        ELIF offline THEN
            RAISE ERROR
        ENDIF
    ELIF online THEN
        download -> memory
    ELIF offline THEN
        RAISE ERROR
    ENDIF
    ```

    Args:
        s2item (str | ee.Image): Sentinel-2 scene identifier or ee.Image object from COPERNICUS/S2_SR.
        bands_mapping (dict | Literal["all"], optional): Mapping of Sentinel-2 band names to
            custom band names. Keys should be GEE band names (e.g., "B2", "B3"), values are
            output names. Use "all" to load all optical bands and SCL.
            Defaults to {"B2": "blue", "B3": "green", "B4": "red", "B8": "nir"}.
        store (Path | None, optional): Path to local zarr store for caching. If None, data is
            loaded directly without caching. Defaults to None.
        offline (bool, optional): If True, only loads from local store without downloading.
            Requires `store` to be provided. If False, missing data is downloaded.
            Defaults to False.
        output_dir_for_debug_geotiff (Path | None, optional): If provided, writes raw data as
            GeoTIFF files for debugging. Defaults to None.
        device (Literal["cuda", "cpu"] | int, optional): Device for processing (GPU or CPU).
            Defaults to DEFAULT_DEVICE.

    Returns:
        xr.Dataset: Sentinel-2 dataset with the following data variables based on bands_mapping:
            - Optical bands (float32): Surface reflectance values [~-0.1 to ~1.0 for newer scenes,
              ~0.0 to ~1.0 for scenes before 2022-01-25]
              Default bands: blue, green, red, nir
              Additional bands available: coastal, rededge071, rededge075, rededge078,
              nir08, nir09, swir16, swir22
              Each has attributes:
              - long_name: "Sentinel 2 {Band}"
              - units: "Reflectance"
              - data_source: "Sentinel-2 L2A via Google Earth Engine (COPERNICUS/S2_SR)"
            - s2_scl (uint8): Scene Classification Layer
              Attributes: long_name, description of class values (0=NO_DATA, 1=SATURATED, etc.)
            - quality_data_mask (uint8): Derived quality mask
              - 0 = Invalid (no data, saturated, defective, or NaN values)
              - 1 = Low quality (shadows, clouds, cirrus, snow/ice, water)
              - 2 = High quality (clear vegetation or non-vegetated land)
            - valid_data_mask (uint8): Binary validity mask (1=valid, 0=invalid)

            Dataset attributes:
            - azimuth (float): Solar azimuth angle from MEAN_SOLAR_AZIMUTH_ANGLE
            - elevation (float): Solar elevation angle from MEAN_SOLAR_ZENITH_ANGLE
            - s2_tile_id (str): Full PRODUCT_ID from GEE
            - tile_id (str): Scene identifier
            - time (str): Acquisition timestamp

    Note:
        The `offline` parameter controls data fetching:
        - When `offline=False`: Automatically downloads missing data from GEE and stores it
          in the local zarr store (if store is provided).
        - When `offline=True`: Only reads from the local store. Raises an error if data is
          missing or if store is None.

        Reflectance processing:
        - For scenes >= 2022-01-25: (DN / 10000.0) - 0.1 (processing baseline 04.00+)
        - For scenes < 2022-01-25: DN / 10000.0 (older processing baseline)
        - NaN values are filled with 0 and marked as invalid in quality_data_mask
        - Pixels where SCL is NaN are also masked as invalid

        This function handles spatially random NaN values that can occur in GEE data by
        marking them as invalid and filling with 0 to prevent propagation in calculations.

        Quality mask derivation from SCL:
        - Invalid (0): NO_DATA, SATURATED_OR_DEFECTIVE, or NaN values
        - Low quality (1): CAST_SHADOWS, CLOUD_SHADOWS, CLOUD_*, THIN_CIRRUS, SNOW/ICE, WATER
        - High quality (2): VEGETATION, NOT_VEGETATED

    Example:
        Load scene with local caching:

        ```python
        import ee
        from pathlib import Path
        from darts_acquisition import load_gee_s2_sr_scene

        # Initialize Earth Engine
        ee.Initialize()

        # Load with caching
        s2_ds = load_gee_s2_sr_scene(
            s2item="20230615T123456_20230615T123659_T33UUP",
            bands_mapping="all",
            store=Path("/data/s2_store"),
            offline=False  # Download if not cached
        )

        # Compute NDVI
        ndvi = (s2_ds.nir - s2_ds.red) / (s2_ds.nir + s2_ds.red)

        # Filter to high quality pixels
        s2_filtered = s2_ds.where(s2_ds.quality_data_mask == 2)
        ```

    """
    if isinstance(s2item, str):
        s2id = s2item
        s2item = ee.Image(f"COPERNICUS/S2_SR/{s2id}")
    else:
        s2id = s2item.id().getInfo().split("/")[-1]
    logger.debug(f"Loading Sentinel-2 tile {s2id=} from GEE")

    bands_mapping = _get_band_mapping(bands_mapping)
    store_manager = GEEStoreManager(
        store=store,
        bands_mapping=bands_mapping,
    )

    if not offline:
        ds_s2 = store_manager.load(s2item)
    else:
        assert store is not None, "Store must be provided in offline mode!"
        ds_s2 = store_manager.open(s2item)

    if output_dir_for_debug_geotiff is not None:
        save_debug_geotiff(
            dataset=ds_s2,
            output_path=output_dir_for_debug_geotiff,
            optical_bands=[band for band in bands_mapping.keys() if band.startswith("B")],
            mask_bands=["SCL"],
        )

    ds_s2 = ds_s2.rename_vars(bands_mapping)

    optical_bands = [band for name, band in bands_mapping.items() if name.startswith("B")]

    # Fix new preprocessing offset -> See docs about bands
    dt = datetime.strptime(ds_s2.attrs["time"], "%Y-%m-%dT%H:%M:%S.%f000")
    offset = 0.1 if dt >= datetime(2022, 1, 25) else 0.0

    ds_s2 = move_to_device(ds_s2, device)
    for band in optical_bands:
        # Apply scale and offset
        ds_s2[band] = ds_s2[band].astype("float32") / 10000.0 - offset
        ds_s2[band].attrs["long_name"] = f"Sentinel 2 {band.capitalize()}"
        ds_s2[band].attrs["units"] = "Reflectance"
    ds_s2["s2_scl"].attrs = {
        "long_name": "Sentinel-2 Scene Classification Layer",
        "description": (
            "0: NO_DATA - 1: SATURATED_OR_DEFECTIVE - 2: CAST_SHADOWS - 3: CLOUD_SHADOWS - 4: VEGETATION"
            " - 5: NOT_VEGETATED - 6: WATER - 7: UNCLASSIFIED - 8: CLOUD_MEDIUM_PROBABILITY - 9: CLOUD_HIGH_PROBABILITY"
            " - 10: THIN_CIRRUS - 11: SNOW or ICE"
        ),
    }
    for band in ds_s2.data_vars:
        ds_s2[band].attrs["data_source"] = "Sentinel-2 L2A via Google Earth Engine (COPERNICUS/S2_SR)"

    ds_s2 = convert_masks(ds_s2)
    qdm_attrs = ds_s2["quality_data_mask"].attrs.copy()

    # For some reason, there are some spatially random nan values in the data, not only at the borders
    # To workaround this, set all nan values to 0 and add this information to the quality_data_mask
    # This workaround is quite computational expensive, but it works for now
    # TODO: Find other solutions for this problem!
    with stopwatch(f"Fixing nan values in {s2id=}", printer=logger.debug):
        for band in optical_bands:
            ds_s2["quality_data_mask"] = xr.where(ds_s2[band].isnull(), 0, ds_s2["quality_data_mask"])
            ds_s2[band] = ds_s2[band].fillna(0)
            # Turn real nan values (s2_scl is nan) into invalid data
            ds_s2[band] = ds_s2[band].where(~ds_s2["s2_scl"].isnull())
    ds_s2 = move_to_host(ds_s2)

    ds_s2["quality_data_mask"].attrs = qdm_attrs
    ds_s2.attrs["s2_tile_id"] = s2item.getInfo()["properties"]["PRODUCT_ID"]
    ds_s2.attrs["tile_id"] = s2id

    return ds_s2

load_planet_masks

load_planet_masks(
    fpath: str | pathlib.Path,
) -> xarray.Dataset

Load quality and validity masks from a PlanetScope scene's UDM-2 data.

This function extracts data quality information from the PlanetScope Usable Data Mask (UDM-2) to create simplified quality masks for filtering and analysis.

Parameters:

  • fpath (str | pathlib.Path) –

    Path to the directory containing the PlanetScope scene data. Must contain _udm2.tif (or _udm2_clip.tif) file.

Returns:

  • xarray.Dataset

    xr.Dataset: Dataset containing quality mask information with the following data variables: - quality_data_mask (uint8): Combined quality indicator * 0 = Invalid (no data) * 1 = Low quality (clouds, shadows, haze, snow, or other artifacts) * 2 = High quality (clear, usable data) Attributes: data_source="planet", long_name="Quality data mask", description="0 = Invalid, 1 = Low Quality, 2 = High Quality" - planet_udm (uint8): Raw UDM-2 bands (8 bands) Attributes: data_source="planet", long_name="Planet UDM", description="Usable Data Mask"

Raises:

Note

Quality mask derivation logic: - Invalid: UDM band 8 (no data) is set - Low quality: Any of UDM bands 2-6 (clouds, shadows, haze, snow, or artifacts) is set - High quality: Neither invalid nor low quality

UDM-2 band definitions: 1. Clear - 2. Snow - 3. Shadow - 4. Light Haze - 5. Heavy Haze 6. Cloud - 7. Confidence - 8. No Data

Example

Load and apply quality masks:

from darts_acquisition import load_planet_scene, load_planet_masks

# Load scene and masks
scene = load_planet_scene("/data/planet/20230615_123045_1234")
masks = load_planet_masks("/data/planet/20230615_123045_1234")

# Filter to high quality pixels only
scene_filtered = scene.where(masks.quality_data_mask == 2)

# Count quality distribution
import numpy as np
unique, counts = np.unique(
    masks.quality_data_mask.values,
    return_counts=True
)
print(dict(zip(unique, counts)))
Source code in darts-acquisition/src/darts_acquisition/planet.py
@stopwatch.f("Loading Planet masks", printer=logger.debug)
def load_planet_masks(fpath: str | Path) -> xr.Dataset:
    """Load quality and validity masks from a PlanetScope scene's UDM-2 data.

    This function extracts data quality information from the PlanetScope Usable Data Mask
    (UDM-2) to create simplified quality masks for filtering and analysis.

    Args:
        fpath (str | Path): Path to the directory containing the PlanetScope scene data.
            Must contain *_udm2.tif (or *_udm2_clip.tif) file.

    Returns:
        xr.Dataset: Dataset containing quality mask information with the following data variables:
            - quality_data_mask (uint8): Combined quality indicator
                * 0 = Invalid (no data)
                * 1 = Low quality (clouds, shadows, haze, snow, or other artifacts)
                * 2 = High quality (clear, usable data)
              Attributes: data_source="planet", long_name="Quality data mask",
              description="0 = Invalid, 1 = Low Quality, 2 = High Quality"
            - planet_udm (uint8): Raw UDM-2 bands (8 bands)
              Attributes: data_source="planet", long_name="Planet UDM",
              description="Usable Data Mask"

    Raises:
        FileNotFoundError: If the UDM-2 TIFF file is not found in the directory.

    Note:
        Quality mask derivation logic:
        - Invalid: UDM band 8 (no data) is set
        - Low quality: Any of UDM bands 2-6 (clouds, shadows, haze, snow, or artifacts) is set
        - High quality: Neither invalid nor low quality

        UDM-2 band definitions:
        1. Clear - 2. Snow - 3. Shadow - 4. Light Haze - 5. Heavy Haze
        6. Cloud - 7. Confidence - 8. No Data

    Example:
        Load and apply quality masks:

        ```python
        from darts_acquisition import load_planet_scene, load_planet_masks

        # Load scene and masks
        scene = load_planet_scene("/data/planet/20230615_123045_1234")
        masks = load_planet_masks("/data/planet/20230615_123045_1234")

        # Filter to high quality pixels only
        scene_filtered = scene.where(masks.quality_data_mask == 2)

        # Count quality distribution
        import numpy as np
        unique, counts = np.unique(
            masks.quality_data_mask.values,
            return_counts=True
        )
        print(dict(zip(unique, counts)))
        ```

    """
    # Convert to Path object if a string is provided
    fpath = fpath if isinstance(fpath, Path) else Path(fpath)

    logger.debug(f"Loading data masks from {fpath.resolve()}")

    # Get imagepath
    udm_path = next(fpath.glob("*_udm2.tif"), None)
    if not udm_path:
        udm_path = next(fpath.glob("*_udm2_clip.tif"), None)
    if not udm_path:
        raise FileNotFoundError(f"No matching UDM-2 TIFF files found in {fpath.resolve()} (.glob('*_udm2.tif'))")

    # See udm classes here: https://developers.planet.com/docs/data/udm-2/
    da_udm = xr.open_dataarray(udm_path).astype("uint8")
    invalids = da_udm.sel(band=8).fillna(0) != 0
    low_quality = da_udm.sel(band=[2, 3, 4, 5, 6]).max(axis=0) == 1
    high_quality = ~low_quality & ~invalids
    qa_ds = (
        xr.where(high_quality, 2, 0)
        .where(~low_quality, 1)
        .where(~invalids, 0)
        .astype("uint8")
        .to_dataset(name="quality_data_mask")
        .drop_vars("band")
    )
    qa_ds["planet_udm"] = da_udm

    qa_ds["quality_data_mask"].attrs = {
        "data_source": "planet",
        "long_name": "Quality data mask",
        "description": "0 = Invalid, 1 = Low Quality, 2 = High Quality",
    }
    qa_ds["planet_udm"].attrs = {
        "data_source": "planet",
        "long_name": "Planet UDM",
        "description": "Usable Data Mask",
    }

    return qa_ds

load_planet_scene

load_planet_scene(
    fpath: str | pathlib.Path,
) -> xarray.Dataset

Load a PlanetScope satellite scene from GeoTIFF files.

This function loads PlanetScope surface reflectance data (PSScene or PSOrthoTile) from a directory containing TIFF files and metadata. The scene type is automatically detected from the directory name format.

Parameters:

  • fpath (str | pathlib.Path) –

    Path to the directory containing the PlanetScope scene data. The directory must follow PlanetScope naming conventions: - Scene: YYYYMMDD_HHMMSS_NN_XXXX or YYYYMMDD_HHMMSS_XXXX - Orthotile: NNNNNNN_NNNNNNN_YYYY-MM-DD_XXXX Must contain _SR.tif (or _SR_clip.tif) and *_metadata.json files.

Returns:

  • xarray.Dataset

    xr.Dataset: The loaded PlanetScope dataset with the following data variables: - blue (float32): Blue band surface reflectance [0-1] - green (float32): Green band surface reflectance [0-1] - red (float32): Red band surface reflectance [0-1] - nir (float32): Near-infrared band surface reflectance [0-1]

    Each variable has attributes: - long_name: "PLANET {Band}" - units: "Reflectance" - data_source: "planet" - planet_type: "scene" or "orthotile"

    Dataset-level attributes: - azimuth (float): Solar azimuth angle in degrees - elevation (float): Solar elevation angle in degrees - tile_id (str): Unique identifier for the scene - planet_scene_id (str): Scene identifier (for scenes) or scene portion (for orthotiles) - planet_orthotile_id (str): Orthotile identifier (only for orthotiles)

Raises:

  • FileNotFoundError

    If required TIFF or metadata files are not found in the directory.

Note
  • Input DN values are divided by 10000 to convert to reflectance [0-1].
  • The scene type (PSScene vs PSOrthoTile) is automatically detected from the directory name.
  • Solar geometry is extracted from the metadata JSON file.
Example

Load a PlanetScope scene:

from darts_acquisition import load_planet_scene

# Load scene data
planet_ds = load_planet_scene("/data/planet/20230615_123045_1234")

# Access bands
ndvi = (planet_ds.nir - planet_ds.red) / (planet_ds.nir + planet_ds.red)

# Check solar geometry
print(f"Solar azimuth: {planet_ds.azimuth}")
print(f"Solar elevation: {planet_ds.elevation}")
Source code in darts-acquisition/src/darts_acquisition/planet.py
@stopwatch.f("Loading Planet scene", printer=logger.debug)
def load_planet_scene(fpath: str | Path) -> xr.Dataset:
    """Load a PlanetScope satellite scene from GeoTIFF files.

    This function loads PlanetScope surface reflectance data (PSScene or PSOrthoTile) from
    a directory containing TIFF files and metadata. The scene type is automatically detected
    from the directory name format.

    Args:
        fpath (str | Path): Path to the directory containing the PlanetScope scene data.
            The directory must follow PlanetScope naming conventions:
            - Scene: YYYYMMDD_HHMMSS_NN_XXXX or YYYYMMDD_HHMMSS_XXXX
            - Orthotile: NNNNNNN_NNNNNNN_YYYY-MM-DD_XXXX
            Must contain *_SR.tif (or *_SR_clip.tif) and *_metadata.json files.

    Returns:
        xr.Dataset: The loaded PlanetScope dataset with the following data variables:
            - blue (float32): Blue band surface reflectance [0-1]
            - green (float32): Green band surface reflectance [0-1]
            - red (float32): Red band surface reflectance [0-1]
            - nir (float32): Near-infrared band surface reflectance [0-1]

            Each variable has attributes:
            - long_name: "PLANET {Band}"
            - units: "Reflectance"
            - data_source: "planet"
            - planet_type: "scene" or "orthotile"

            Dataset-level attributes:
            - azimuth (float): Solar azimuth angle in degrees
            - elevation (float): Solar elevation angle in degrees
            - tile_id (str): Unique identifier for the scene
            - planet_scene_id (str): Scene identifier (for scenes) or scene portion (for orthotiles)
            - planet_orthotile_id (str): Orthotile identifier (only for orthotiles)

    Raises:
        FileNotFoundError: If required TIFF or metadata files are not found in the directory.

    Note:
        - Input DN values are divided by 10000 to convert to reflectance [0-1].
        - The scene type (PSScene vs PSOrthoTile) is automatically detected from the directory name.
        - Solar geometry is extracted from the metadata JSON file.

    Example:
        Load a PlanetScope scene:

        ```python
        from darts_acquisition import load_planet_scene

        # Load scene data
        planet_ds = load_planet_scene("/data/planet/20230615_123045_1234")

        # Access bands
        ndvi = (planet_ds.nir - planet_ds.red) / (planet_ds.nir + planet_ds.red)

        # Check solar geometry
        print(f"Solar azimuth: {planet_ds.azimuth}")
        print(f"Solar elevation: {planet_ds.elevation}")
        ```

    """
    # Convert to Path object if a string is provided
    fpath = fpath if isinstance(fpath, Path) else Path(fpath)

    # Check if the directory contains a PSOrthoTile or PSScene
    planet_type = parse_planet_type(fpath)
    logger.debug(f"Loading Planet PS {planet_type.capitalize()} from {fpath.resolve()}")

    # Get imagepath
    ps_image = next(fpath.glob("*_SR.tif"), None)
    if not ps_image:
        ps_image = next(fpath.glob("*_SR_clip.tif"), None)
    if not ps_image:
        raise FileNotFoundError(f"No matching TIFF files found in {fpath.resolve()} (.glob('*_SR.tif'))")

    ps_meta = next(fpath.glob("*_metadata.json"), None)
    if not ps_meta:
        raise FileNotFoundError(
            f"No matching metadata JSON files found in {fpath.resolve()} (.glob('*_metadata.json'))"
        )
    metadata = json.load(ps_meta.open())

    # Define band names and corresponding indices
    planet_da = xr.open_dataarray(ps_image)

    # Divide by 10000 to get reflectance between 0 and 1
    planet_da = planet_da.astype("float32") / 10000.0

    # Create a dataset with the bands
    bands = ["blue", "green", "red", "nir"]
    ds_planet = planet_da.assign_coords({"band": bands}).to_dataset(dim="band")
    for var in bands:
        ds_planet[var].attrs["long_name"] = f"PLANET {var.capitalize()}"
        ds_planet[var].attrs["units"] = "Reflectance"

    for var in ds_planet.data_vars:
        ds_planet[var].attrs["data_source"] = "planet"
        ds_planet[var].attrs["planet_type"] = planet_type

    # Add sun and elevation from metadata
    ds_planet.attrs["azimuth"] = metadata.get("sun_azimuth", float("nan"))
    ds_planet.attrs["elevation"] = metadata.get("sun_elevation", float("nan"))

    if planet_type == "scene":
        ds_planet.attrs["tile_id"] = fpath.stem
        ds_planet.attrs["planet_scene_id"] = fpath.stem
    elif planet_type == "orthotile":
        ds_planet.attrs["tile_id"] = f"{fpath.parent.stem}-{fpath.stem}"
        ds_planet.attrs["planet_orthotile_id"] = fpath.parent.stem
        ds_planet.attrs["planet_scene_id"] = fpath.stem

    return ds_planet

load_tcvis

load_tcvis(
    geobox: odc.geo.geobox.GeoBox,
    data_dir: pathlib.Path | str,
    buffer: int = 0,
    offline: bool = False,
) -> xarray.Dataset

Load TCVIS (Tasseled Cap trends) for the given geobox, fetch new data from GEE if necessary.

This function loads Tasseled Cap trend data from a local icechunk store. If offline=False, missing data will be automatically downloaded from Google Earth Engine and stored locally. The data contains temporal trends in brightness, greenness, and wetness derived from Landsat imagery.

Parameters:

  • geobox (odc.geo.geobox.GeoBox) –

    The geobox for which to load the data. Can be in any CRS.

  • data_dir (pathlib.Path | str) –

    Path to the icechunk data directory (must have .icechunk suffix). This directory stores downloaded TCVIS data for faster consecutive access.

  • buffer (int, default: 0 ) –

    Buffer around the geobox in pixels. The buffer is applied in the TCVIS dataset's native CRS after reprojecting the input geobox. Defaults to 0.

  • offline (bool, default: False ) –

    If True, only loads data already present in the local store without attempting any downloads. If False, missing data is downloaded from GEE. Defaults to False.

Returns:

  • xarray.Dataset

    xr.Dataset: The TCVIS dataset with the following data variables: - tc_brightness (float): Temporal trend in Tasseled Cap brightness component - tc_greenness (float): Temporal trend in Tasseled Cap greenness component - tc_wetness (float): Temporal trend in Tasseled Cap wetness component

    The dataset is in the TCVIS native CRS with the buffer applied. It is NOT automatically reprojected to match the input geobox's CRS.

Note

The offline parameter controls data fetching behavior:

  • When offline=False: Uses smart_geocubes accessor's load() method which automatically downloads missing tiles from GEE and persists them to the icechunk store.
  • When offline=True: Uses the accessor's open_xarray() method to open the existing store and crops it to the requested region. Raises an error if data is missing.

Variable naming: The original TCB_slope, TCG_slope, and TCW_slope variables are renamed to follow DARTS conventions (tc_brightness, tc_greenness, tc_wetness).

Example

Load TCVIS data aligned with optical imagery:

from darts_acquisition import load_tcvis

# Assume "optical" is a loaded Sentinel-2 dataset
tcvis = load_tcvis(
    geobox=optical.odc.geobox,
    data_dir="/data/tcvis.icechunk",
    buffer=0,
    offline=False
)

# Reproject to match optical data's CRS and resolution
tcvis = tcvis.odc.reproject(optical.odc.geobox, resampling="cubic")
Source code in darts-acquisition/src/darts_acquisition/tcvis.py
@stopwatch.f("Loading TCVIS", printer=logger.debug, print_kwargs=["data_dir", "buffer", "offline"])
def load_tcvis(
    geobox: GeoBox,
    data_dir: Path | str,
    buffer: int = 0,
    offline: bool = False,
) -> xr.Dataset:
    """Load TCVIS (Tasseled Cap trends) for the given geobox, fetch new data from GEE if necessary.

    This function loads Tasseled Cap trend data from a local icechunk store. If `offline=False`,
    missing data will be automatically downloaded from Google Earth Engine and stored locally.
    The data contains temporal trends in brightness, greenness, and wetness derived from
    Landsat imagery.

    Args:
        geobox (GeoBox): The geobox for which to load the data. Can be in any CRS.
        data_dir (Path | str): Path to the icechunk data directory (must have .icechunk suffix).
            This directory stores downloaded TCVIS data for faster consecutive access.
        buffer (int, optional): Buffer around the geobox in pixels. The buffer is applied in the
            TCVIS dataset's native CRS after reprojecting the input geobox. Defaults to 0.
        offline (bool, optional): If True, only loads data already present in the local store
            without attempting any downloads. If False, missing data is downloaded from GEE.
            Defaults to False.

    Returns:
        xr.Dataset: The TCVIS dataset with the following data variables:
            - tc_brightness (float): Temporal trend in Tasseled Cap brightness component
            - tc_greenness (float): Temporal trend in Tasseled Cap greenness component
            - tc_wetness (float): Temporal trend in Tasseled Cap wetness component

            The dataset is in the TCVIS native CRS with the buffer applied.
            It is NOT automatically reprojected to match the input geobox's CRS.

    Note:
        The `offline` parameter controls data fetching behavior:

        - When `offline=False`: Uses `smart_geocubes` accessor's `load()` method which automatically
          downloads missing tiles from GEE and persists them to the icechunk store.
        - When `offline=True`: Uses the accessor's `open_xarray()` method to open the existing store
          and crops it to the requested region. Raises an error if data is missing.

        Variable naming: The original TCB_slope, TCG_slope, and TCW_slope variables are renamed
        to follow DARTS conventions (tc_brightness, tc_greenness, tc_wetness).

    Example:
        Load TCVIS data aligned with optical imagery:

        ```python
        from darts_acquisition import load_tcvis

        # Assume "optical" is a loaded Sentinel-2 dataset
        tcvis = load_tcvis(
            geobox=optical.odc.geobox,
            data_dir="/data/tcvis.icechunk",
            buffer=0,
            offline=False
        )

        # Reproject to match optical data's CRS and resolution
        tcvis = tcvis.odc.reproject(optical.odc.geobox, resampling="cubic")
        ```

    """
    assert ".icechunk" == data_dir.suffix, f"Data directory {data_dir} must have an .icechunk suffix!"
    accessor = smart_geocubes.TCTrend(data_dir, create_icechunk_storage=False)

    # We want to assume that the datacube is already created to be save in a multi-process environment
    accessor.assert_created()

    if not offline:
        tcvis = accessor.load(geobox, buffer=buffer, persist=True)
    else:
        xrcube = accessor.open_xarray()
        reference_geobox = geobox.to_crs(accessor.extent.crs, resolution=accessor.extent.resolution.x).pad(buffer)
        tcvis = xrcube.odc.crop(reference_geobox.extent, apply_mask=False)
        tcvis = tcvis.load()

    # Rename to follow our conventions
    tcvis = tcvis.rename_vars(
        {
            "TCB_slope": "tc_brightness",
            "TCG_slope": "tc_greenness",
            "TCW_slope": "tc_wetness",
        }
    )

    return tcvis

match_cdse_s2_sr_scene_ids_from_geodataframe

match_cdse_s2_sr_scene_ids_from_geodataframe(
    aoi: geopandas.GeoDataFrame,
    day_range: int = 60,
    max_cloud_cover: int = 20,
    min_intersects: float = 0.7,
    simplify_geometry: float
    | typing.Literal[False] = False,
    save_scores: pathlib.Path | None = None,
) -> dict[int, pystac.Item | None]

Match items from a GeoDataFrame with Sentinel-2 items from the STAC API based on a date range.

Parameters:

  • aoi (geopandas.GeoDataFrame) –

    The area of interest as a GeoDataFrame.

  • day_range (int, default: 60 ) –

    The number of days before and after the date to search for. Defaults to 60.

  • max_cloud_cover (int, default: 20 ) –

    The maximum cloud cover percentage. Defaults to 20.

  • min_intersects (float, default: 0.7 ) –

    The minimum intersection area ratio to consider a match. Defaults to 0.7.

  • simplify_geometry (float | typing.Literal[False], default: False ) –

    If a float is provided, the geometry will be simplified using the simplify method of geopandas. If False, no simplification will be done. This may become useful for large / weird AOIs which are too large for the STAC API. Defaults to False.

  • save_scores (pathlib.Path | None, default: None ) –

    If provided, the scores will be saved to this path as a Parquet file.

Raises:

  • ValueError

    If the 'date' column is not present or not of type datetime.

Returns:

  • dict[int, pystac.Item | None]

    dict[int, Item | None]: A dictionary mapping each row to its best matching Sentinel-2 item. The keys are the indices of the rows in the GeoDataFrame, and the values are the matching Sentinel-2 items. If no matching item is found, the value will be None.

Source code in darts-acquisition/src/darts_acquisition/s2/cdse_scene.py
@stopwatch("Matching Sentinel-2 scenes in CDSE from AOI", printer=logger.debug)
def match_cdse_s2_sr_scene_ids_from_geodataframe(
    aoi: gpd.GeoDataFrame,
    day_range: int = 60,
    max_cloud_cover: int = 20,
    min_intersects: float = 0.7,
    simplify_geometry: float | Literal[False] = False,
    save_scores: Path | None = None,
) -> dict[int, Item | None]:
    """Match items from a GeoDataFrame with Sentinel-2 items from the STAC API based on a date range.

    Args:
        aoi (gpd.GeoDataFrame): The area of interest as a GeoDataFrame.
        day_range (int): The number of days before and after the date to search for.
            Defaults to 60.
        max_cloud_cover (int, optional): The maximum cloud cover percentage. Defaults to 20.
        min_intersects (float, optional): The minimum intersection area ratio to consider a match. Defaults to 0.7.
        simplify_geometry (float | Literal[False], optional): If a float is provided, the geometry will be simplified
            using the `simplify` method of geopandas. If False, no simplification will be done.
            This may become useful for large / weird AOIs which are too large for the STAC API.
            Defaults to False.
        save_scores (Path | None, optional): If provided, the scores will be saved to this path as a Parquet file.

    Raises:
        ValueError: If the 'date' column is not present or not of type datetime.

    Returns:
        dict[int, Item | None]: A dictionary mapping each row to its best matching Sentinel-2 item.
            The keys are the indices of the rows in the GeoDataFrame, and the values are the matching Sentinel-2 items.
            If no matching item is found, the value will be None.

    """
    # Check weather the "date" column is present and of type datetime
    if "date" not in aoi.columns or not pd.api.types.is_datetime64_any_dtype(aoi["date"]):
        raise ValueError("The 'date' column must be present and of type datetime in the GeoDataFrame.")

    if simplify_geometry:
        aoi = aoi.copy()
        aoi["geometry"] = aoi.geometry.simplify(simplify_geometry)

    matches = {}
    scores = []
    for i, row in aoi.iterrows():
        intersects = row.geometry.__geo_interface__
        start_date = (row["date"] - pd.Timedelta(days=day_range)).strftime("%Y-%m-%d")
        end_date = (row["date"] + pd.Timedelta(days=day_range)).strftime("%Y-%m-%d")
        intersecting_items = search_cdse_s2_sr(
            intersects=intersects,
            start_date=start_date,
            end_date=end_date,
            max_cloud_cover=max_cloud_cover,
        )
        if not intersecting_items:
            logger.info(f"No Sentinel-2 items found for footprint #{i} in the date range {start_date} to {end_date}.")
            matches[i] = None
            continue
        intersecting_items_gdf = gpd.GeoDataFrame.from_features(
            [item.to_dict() for item in intersecting_items.values()],
            crs="EPSG:4326",
        )
        intersecting_items_gdf["footprint_index"] = i
        intersecting_items_gdf["s2id"] = list(intersecting_items.keys())
        # Some item geometries might be invalid (probably because of the arctic circle)
        # We will drop those items, since they cannot be used for intersection calculations
        intersecting_items_gdf = intersecting_items_gdf[intersecting_items_gdf.geometry.is_valid]
        if intersecting_items_gdf.empty:
            logger.info(
                f"No valid Sentinel-2 items found for footprint #{i} in the date range {start_date} to {end_date}."
            )
            matches[i] = None
            continue
        # Get to UTM zone for better area calculations
        utm_zone = intersecting_items_gdf.estimate_utm_crs()
        # Calculate intersection area ratio
        intersecting_items_gdf["intersection_area"] = (
            intersecting_items_gdf.intersection(row.geometry).to_crs(utm_zone).area
        )
        # We need a geodataframe containing only our wanted row, since to_crs is not available for a single row
        intersecting_items_gdf["aoi_area"] = aoi.loc[[i]].to_crs(utm_zone).iloc[0].geometry.area
        intersecting_items_gdf["intersection_ratio"] = (
            intersecting_items_gdf["intersection_area"] / intersecting_items_gdf["aoi_area"]
        )
        # Filter items based on the minimum intersection ratio
        max_intersection = intersecting_items_gdf["intersection_ratio"].max()
        intersecting_items_gdf = intersecting_items_gdf[intersecting_items_gdf["intersection_ratio"] >= min_intersects]
        if intersecting_items_gdf.empty:
            logger.info(
                f"No Sentinel-2 items found for {i} with sufficient intersection ratio "
                f"({min_intersects}, maximum was {max_intersection:.4f})"
                f" in the date range {start_date} to {end_date}."
            )
            matches[i] = None
            continue
        intersecting_items_gdf["datetime"] = pd.to_datetime(intersecting_items_gdf["datetime"])
        intersecting_items_gdf["time_diff"] = abs(intersecting_items_gdf["datetime"] - row["date"])
        intersecting_items_gdf["score_cloud"] = ((100.0 - intersecting_items_gdf["eo:cloud_cover"]) / 5) ** 2
        intersecting_items_gdf["score_fill"] = ((100.0 - intersecting_items_gdf["intersection_ratio"] * 100) / 5) ** 2
        intersecting_items_gdf["score_time_diff"] = (
            intersecting_items_gdf["time_diff"].dt.total_seconds() / (2 * 24 * 3600)
        ) ** 2

        intersecting_items_gdf["score"] = (
            intersecting_items_gdf["score_cloud"]
            + intersecting_items_gdf["score_fill"]
            + intersecting_items_gdf["score_time_diff"]
        )

        # Debug the scoring
        score_msg = f"Scores for {i}:\n"
        for j, match in intersecting_items_gdf.iterrows():
            score_msg += (
                f"\n- Match with {j}: "
                f"Cloud Cover={match['eo:cloud_cover']}, "
                f"Intersection Ratio={match['intersection_ratio']:.2f}, "
                f"Time Diff={match['time_diff']}, "
                f"Score Cloud={match['score_cloud']:.2f}, "
                f"Score Fill={match['score_fill']:.2f}, "
                f"Score Time Diff={match['score_time_diff']:.2f}, "
                f"-> Score={match['score']:.2f}"
            )
        logger.debug(score_msg)

        # Get the s2id with the lowest score
        best_item = intersecting_items_gdf.loc[intersecting_items_gdf["score"].idxmin()]
        matches[i] = intersecting_items[best_item["s2id"]]
        scores.append(intersecting_items_gdf)

    if save_scores:
        scores_df = gpd.GeoDataFrame(pd.concat(scores))
        scores_df.to_parquet(save_scores)

    return matches

parse_planet_type

parse_planet_type(
    fpath: pathlib.Path,
) -> typing.Literal["orthotile", "scene"]

Parse the type of Planet data from the directory path.

Parameters:

  • fpath (pathlib.Path) –

    The directory path to the Planet data.

Returns:

  • typing.Literal['orthotile', 'scene']

    Literal["orthotile", "scene"]: The type of Planet data.

Raises:

  • ValueError

    If the Planet data type cannot be parsed from the file path.

Source code in darts-acquisition/src/darts_acquisition/planet.py
def parse_planet_type(fpath: Path) -> Literal["orthotile", "scene"]:
    """Parse the type of Planet data from the directory path.

    Args:
        fpath (Path): The directory path to the Planet data.

    Returns:
        Literal["orthotile", "scene"]: The type of Planet data.

    Raises:
        ValueError: If the Planet data type cannot be parsed from the file path.

    """
    # Cases for Scenes:
    # - YYYYMMDD_HHMMSS_NN_XXXX
    # - YYYYMMDD_HHMMSS_XXXX

    # Cases for Orthotiles:
    # NNNNNNN/NNNNNNN_NNNNNNN_YYYY-MM-DD_XXXX
    # NNNNNNN_NNNNNNN_YYYY-MM-DD_XXXX

    assert fpath.is_dir(), "fpath must be the parent directory!"

    ps_name_parts = fpath.stem.split("_")

    if len(ps_name_parts) == 3:
        # Must be scene or invalid
        date, time, ident = ps_name_parts
        if _is_valid_date(date, "%Y%m%d") and _is_valid_date(time, "%H%M%S") and len(ident) == 4:
            return "scene"

    if len(ps_name_parts) == 4:
        # Assume scene
        date, time, n, ident = ps_name_parts
        if _is_valid_date(date, "%Y%m%d") and _is_valid_date(time, "%H%M%S") and n.isdigit() and len(ident) == 4:
            return "scene"
        # Is not scene, assume orthotile
        chunkid, tileid, date, ident = ps_name_parts
        if chunkid.isdigit() and tileid.isdigit() and _is_valid_date(date, "%Y-%m-%d") and len(ident) == 4:
            return "orthotile"

    raise ValueError(
        f"Could not parse Planet data type from {fpath}."
        f"Expected a format of YYYYMMDD_HHMMSS_NN_XXXX or YYYYMMDD_HHMMSS_XXXX for scene, "
        "or NNNNNNN/NNNNNNN_NNNNNNN_YYYY-MM-DD_XXXX or NNNNNNN_NNNNNNN_YYYY-MM-DD_XXXX for orthotile."
        f"Got {fpath.stem} instead."
        "Please ensure that the parent directory of the file is used, instead of the file itself."
    )

search_cdse_s2_sr

search_cdse_s2_sr(
    intersects=None,
    tiles: list[str] | None = None,
    start_date: str | None = None,
    end_date: str | None = None,
    max_cloud_cover: int | None = 10,
    max_snow_cover: int | None = 10,
    months: list[int] | None = None,
    years: list[int] | None = None,
) -> dict[str, pystac.Item]

Search for Sentinel-2 scenes via STAC based on an area of interest (intersects) and date range.

Note

start_date and end_date will be concatted with a / to form a date range. Read more about the date format here: https://pystac-client.readthedocs.io/en/stable/api.html#pystac_client.Client.search

Parameters:

  • intersects (any, default: None ) –

    The geometry object to search for Sentinel-2 tiles. Can be anything implementing the __geo_interface__ protocol, such as a GeoDataFrame or a shapely geometry. If None, and tiles is also None, the search will be performed globally. If set and tiles is also set, will be ignored.

  • tiles (list[str] | None, default: None ) –

    List of MGRS tile IDs to filter the search. If set, ignores intersects parameter. Defaults to None.

  • start_date (str, default: None ) –

    Starting date in a format readable by pystac_client. If None, months and years parameters will be used for filtering if set. Defaults to None.

  • end_date (str, default: None ) –

    Ending date in a format readable by pystac_client. If None, months and years parameters will be used for filtering if set. Defaults to None.

  • max_cloud_cover (int, default: 10 ) –

    Maximum percentage of cloud cover. Defaults to 10.

  • max_snow_cover (int, default: 10 ) –

    Maximum percentage of snow cover. Defaults to 10.

  • months (list[int] | None, default: None ) –

    List of months (1-12) to filter the search. Only used if start_date and end_date are None. Defaults to None.

  • years (list[int] | None, default: None ) –

    List of years to filter the search. Only used if start_date and end_date are None. Defaults to None.

Returns:

  • dict[str, pystac.Item]

    dict[str, Item]: A dictionary of found Sentinel-2 items as values and the s2id as keys.

Source code in darts-acquisition/src/darts_acquisition/s2/cdse_scene.py
@stopwatch("Searching for Sentinel-2 scenes in CDSE", printer=logger.debug)
def search_cdse_s2_sr(
    intersects=None,
    tiles: list[str] | None = None,
    start_date: str | None = None,
    end_date: str | None = None,
    max_cloud_cover: int | None = 10,
    max_snow_cover: int | None = 10,
    months: list[int] | None = None,
    years: list[int] | None = None,
) -> dict[str, Item]:
    """Search for Sentinel-2 scenes via STAC based on an area of interest (intersects) and date range.

    Note:
        `start_date` and `end_date` will be concatted with a `/` to form a date range.
        Read more about the date format here: https://pystac-client.readthedocs.io/en/stable/api.html#pystac_client.Client.search

    Args:
        intersects (any): The geometry object to search for Sentinel-2 tiles.
            Can be anything implementing the `__geo_interface__` protocol, such as a GeoDataFrame or a shapely geometry.
            If None, and tiles is also None, the search will be performed globally.
            If set and tiles is also set, will be ignored.
        tiles (list[str] | None, optional): List of MGRS tile IDs to filter the search.
            If set, ignores intersects parameter.
            Defaults to None.
        start_date (str): Starting date in a format readable by pystac_client.
            If None, months and years parameters will be used for filtering if set.
            Defaults to None.
        end_date (str): Ending date in a format readable by pystac_client.
            If None, months and years parameters will be used for filtering if set.
            Defaults to None.
        max_cloud_cover (int, optional): Maximum percentage of cloud cover. Defaults to 10.
        max_snow_cover (int, optional): Maximum percentage of snow cover. Defaults to 10.
        months (list[int] | None, optional): List of months (1-12) to filter the search.
            Only used if start_date and end_date are None.
            Defaults to None.
        years (list[int] | None, optional): List of years to filter the search.
            Only used if start_date and end_date are None.
            Defaults to None.

    Returns:
        dict[str, Item]: A dictionary of found Sentinel-2 items as values and the s2id as keys.

    """
    catalog = Client.open("https://stac.dataspace.copernicus.eu/v1/")

    if tiles is not None and intersects is not None:
        logger.warning("Both tile and intersects provided. Ignoring intersects parameter.")
        intersects = None

    cql2_filter = _build_cql2_filter(tiles, max_cloud_cover, max_snow_cover)

    if start_date is not None and end_date is not None:
        if months is not None or years is not None:
            logger.warning("Both date range and months/years filtering provided. Ignoring months/years filter.")
        logger.debug(
            f"Searching CDSE for scenes between {start_date} and {end_date} ({cql2_filter}, {type(intersects)})."
        )
        search = catalog.search(
            collections=["sentinel-2-l2a"],
            intersects=intersects,
            datetime=f"{start_date}/{end_date}",
            filter=cql2_filter,
        )
        found_items = list(search.items())
    elif months is not None or years is not None:
        if months is None:
            months = list(range(1, 13))
        if years is None:
            years = list(range(2017, 2026))
        found_items = set()
        for year in years:
            for month in months:
                search = catalog.search(
                    collections=["sentinel-2-l2a"],
                    intersects=intersects,
                    datetime=f"{year}-{month:02d}",
                    filter=cql2_filter,
                )
                found_items.update(list(search.items()))
    else:
        logger.warning("No valid date filtering provided. This may result in a too large number of scenes for CDSE.")
        search = catalog.search(
            collections=["sentinel-2-l2a"],
            intersects=intersects,
            filter=cql2_filter,
        )
        found_items = list(search.items())

    if len(found_items) == 0:
        logger.debug(
            "No Sentinel-2 items found for the given parameters:"
            f" {intersects=}, {start_date=}, {end_date=}, {max_cloud_cover=}"
        )
        return {}
    logger.debug(f"Found {len(found_items)} Sentinel-2 items in CDSE.")
    return {item.id: item for item in found_items}