From 0f2a55a1a159c7faf4b07b6eae9c6b3db27a2cfe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tobias=20H=C3=B6lzer?= Date: Tue, 26 Nov 2024 16:09:40 +0100 Subject: [PATCH 1/3] Refactor native It should now be much more easy to make changes, since less code repetition. However, understandability is worse now --- darts/src/darts/__init__.py | 3 - darts/src/darts/cli.py | 6 +- darts/src/darts/legacy_pipeline/__init__.py | 6 + darts/src/darts/legacy_pipeline/legacy.py | 333 ++++++++++ .../src/darts/legacy_pipeline/legacy_fast.py | 273 ++++++++ darts/src/darts/legacy_pipeline/shared.py | 113 ++++ darts/src/darts/native.py | 620 ------------------ 7 files changed, 728 insertions(+), 626 deletions(-) create mode 100644 darts/src/darts/legacy_pipeline/__init__.py create mode 100644 darts/src/darts/legacy_pipeline/legacy.py create mode 100644 darts/src/darts/legacy_pipeline/legacy_fast.py create mode 100644 darts/src/darts/legacy_pipeline/shared.py delete mode 100644 darts/src/darts/native.py diff --git a/darts/src/darts/__init__.py b/darts/src/darts/__init__.py index d3c4297..63e6dce 100644 --- a/darts/src/darts/__init__.py +++ b/darts/src/darts/__init__.py @@ -2,7 +2,4 @@ from importlib.metadata import version -from darts.native import run_native_planet_pipeline as run_native_planet_pipeline -from darts.native import run_native_sentinel2_pipeline as run_native_sentinel2_pipeline - __version__ = version("darts-nextgen") diff --git a/darts/src/darts/cli.py b/darts/src/darts/cli.py index a3ecc8f..4891eec 100644 --- a/darts/src/darts/cli.py +++ b/darts/src/darts/cli.py @@ -9,7 +9,7 @@ from rich.console import Console from darts import __version__ -from darts.native import ( +from darts.legacy_pipeline import ( run_native_planet_pipeline, run_native_planet_pipeline_fast, run_native_sentinel2_pipeline, @@ -27,8 +27,8 @@ version=__version__, console=console, config=config_parser, - help_format="rich", - version_format="rich", + help_format="plaintext", + version_format="plaintext", ) pipeline_group = cyclopts.Group.create_ordered("Pipeline Commands") diff --git a/darts/src/darts/legacy_pipeline/__init__.py b/darts/src/darts/legacy_pipeline/__init__.py new file mode 100644 index 0000000..5f3417f --- /dev/null +++ b/darts/src/darts/legacy_pipeline/__init__.py @@ -0,0 +1,6 @@ +"""Legacy pipeline module.""" + +from darts.legacy_pipeline.legacy import run_native_planet_pipeline as run_native_planet_pipeline +from darts.legacy_pipeline.legacy import run_native_sentinel2_pipeline as run_native_sentinel2_pipeline +from darts.legacy_pipeline.legacy_fast import run_native_planet_pipeline_fast as run_native_planet_pipeline_fast +from darts.legacy_pipeline.legacy_fast import run_native_sentinel2_pipeline_fast as run_native_sentinel2_pipeline_fast diff --git a/darts/src/darts/legacy_pipeline/legacy.py b/darts/src/darts/legacy_pipeline/legacy.py new file mode 100644 index 0000000..374cc58 --- /dev/null +++ b/darts/src/darts/legacy_pipeline/legacy.py @@ -0,0 +1,333 @@ +"""Legacy Pipeline without any other framework.""" + +import logging +from collections.abc import Generator +from pathlib import Path +from typing import Literal + +from darts.legacy_pipeline.shared import AquisitionData, _load_planet, _load_s2, _segment_and_export + +logger = logging.getLogger(__name__) + + +def _process( + data_generator: Generator[tuple[Path, Path, AquisitionData], None, None], + model_dir: Path, + tcvis_model_name: str, + notcvis_model_name: str, + device: Literal["cuda", "cpu", "auto"] | int | None, + ee_project: str | None, + ee_use_highvolume: bool, + patch_size: int, + overlap: int, + batch_size: int, + reflection: int, + binarization_threshold: float, + mask_erosion_size: int, + min_object_size: int, + use_quality_mask: bool, + write_model_outputs: bool, +): + # Import here to avoid long loading times when running other commands + import torch + from darts_ensemble.ensemble_v1 import EnsembleV1 + from darts_preprocessing import preprocess_legacy + + from darts.utils.cuda import debug_info, decide_device + from darts.utils.earthengine import init_ee + + debug_info() + device = decide_device(device) + init_ee(ee_project, ee_use_highvolume) + + ensemble = EnsembleV1( + model_dir / tcvis_model_name, + model_dir / notcvis_model_name, + device=torch.device(device), + ) + + for fpath, outpath, aqdata in data_generator: + try: + tile = preprocess_legacy(aqdata.optical, aqdata.arcticdem, aqdata.tcvis, aqdata.data_masks) + + _segment_and_export( + tile, + ensemble, + outpath, + device, + patch_size, + overlap, + batch_size, + reflection, + binarization_threshold, + mask_erosion_size, + min_object_size, + use_quality_mask, + write_model_outputs, + ) + except Exception as e: + logger.warning(f"Could not process folder '{fpath.resolve()}'.\nSkipping...") + logger.exception(e) + + +def run_native_planet_pipeline( + *, + orthotiles_dir: Path, + scenes_dir: Path, + output_data_dir: Path, + arcticdem_slope_vrt: Path, + arcticdem_elevation_vrt: Path, + tcvis_dir: Path, + model_dir: Path, + tcvis_model_name: str = "RTS_v6_tcvis.pt", + notcvis_model_name: str = "RTS_v6_notcvis.pt", + device: Literal["cuda", "cpu", "auto"] | int | None = None, + ee_project: str | None = None, + ee_use_highvolume: bool = True, + patch_size: int = 1024, + overlap: int = 16, + batch_size: int = 8, + reflection: int = 0, + binarization_threshold: float = 0.5, + mask_erosion_size: int = 10, + min_object_size: int = 32, + use_quality_mask: bool = False, + write_model_outputs: bool = False, +): + """Search for all PlanetScope scenes in the given directory and runs the segmentation pipeline on them. + + Args: + orthotiles_dir (Path): The directory containing the PlanetScope orthotiles. + scenes_dir (Path): The directory containing the PlanetScope scenes. + output_data_dir (Path): The "output" directory. + arcticdem_slope_vrt (Path): The path to the ArcticDEM slope VRT file. + arcticdem_elevation_vrt (Path): The path to the ArcticDEM elevation VRT file. + tcvis_dir (Path): The directory containing the TCVis data. + model_dir (Path): The path to the models to use for segmentation. + tcvis_model_name (str, optional): The name of the model to use for TCVis. Defaults to "RTS_v6_tcvis.pt". + notcvis_model_name (str, optional): The name of the model to use for not TCVis. Defaults to "RTS_v6_notcvis.pt". + device (Literal["cuda", "cpu"] | int, optional): The device to run the model on. + If "cuda" take the first device (0), if int take the specified device. + If "auto" try to automatically select a free GPU (<50% memory usage). + Defaults to "cuda" if available, else "cpu". + ee_project (str, optional): The Earth Engine project ID or number to use. May be omitted if + project is defined within persistent API credentials obtained via `earthengine authenticate`. + ee_use_highvolume (bool, optional): Whether to use the high volume server (https://earthengine-highvolume.googleapis.com). + patch_size (int, optional): The patch size to use for inference. Defaults to 1024. + overlap (int, optional): The overlap to use for inference. Defaults to 16. + batch_size (int, optional): The batch size to use for inference. Defaults to 8. + reflection (int, optional): The reflection padding to use for inference. Defaults to 0. + binarization_threshold (float, optional): The threshold to binarize the probabilities. Defaults to 0.5. + mask_erosion_size (int, optional): The size of the disk to use for mask erosion and the edge-cropping. + Defaults to 10. + min_object_size (int, optional): The minimum object size to keep in pixel. Defaults to 32. + use_quality_mask (bool, optional): Whether to use the "quality" mask instead of the "valid" mask + to mask the output. + write_model_outputs (bool, optional): Also save the model outputs, not only the ensemble result. + Defaults to False. + + Examples: + ### PS Orthotile + + Data directory structure: + + ```sh + data/input + ├── ArcticDEM + │ ├── elevation.vrt + │ ├── slope.vrt + │ ├── relative_elevation + │ │ └── 4372514_relative_elevation_100.tif + │ └── slope + │ └── 4372514_slope.tif + └── planet + └── PSOrthoTile + └── 4372514/5790392_4372514_2022-07-16_2459 + ├── 5790392_4372514_2022-07-16_2459_BGRN_Analytic_metadata.xml + ├── 5790392_4372514_2022-07-16_2459_BGRN_DN_udm.tif + ├── 5790392_4372514_2022-07-16_2459_BGRN_SR.tif + ├── 5790392_4372514_2022-07-16_2459_metadata.json + └── 5790392_4372514_2022-07-16_2459_udm2.tif + ``` + + then the config should be + + ``` + ... + orthotiles_dir: data/input/planet/PSOrthoTile + arcticdem_slope_vrt: data/input/ArcticDEM/slope.vrt + arcticdem_elevation_vrt: data/input/ArcticDEM/elevation.vrt + ``` + + ### PS Scene + + Data directory structure: + + ```sh + data/input + ├── ArcticDEM + │ ├── elevation.vrt + │ ├── slope.vrt + │ ├── relative_elevation + │ │ └── 4372514_relative_elevation_100.tif + │ └── slope + │ └── 4372514_slope.tif + └── planet + └── PSScene + └── 20230703_194241_43_2427 + ├── 20230703_194241_43_2427_3B_AnalyticMS_metadata.xml + ├── 20230703_194241_43_2427_3B_AnalyticMS_SR.tif + ├── 20230703_194241_43_2427_3B_udm2.tif + ├── 20230703_194241_43_2427_metadata.json + └── 20230703_194241_43_2427.json + ``` + + then the config should be + + ``` + ... + scenes_dir: data/input/planet/PSScene + arcticdem_slope_vrt: data/input/ArcticDEM/slope.vrt + arcticdem_elevation_vrt: data/input/ArcticDEM/elevation.vrt + ``` + + + """ + data_generator = _load_planet( + orthotiles_dir, + scenes_dir, + output_data_dir, + arcticdem_slope_vrt, + arcticdem_elevation_vrt, + tcvis_dir, + tpi_outer_radius=10, + ) + _process( + data_generator, + model_dir, + tcvis_model_name, + notcvis_model_name, + device, + ee_project, + ee_use_highvolume, + patch_size, + overlap, + batch_size, + reflection, + binarization_threshold, + mask_erosion_size, + min_object_size, + use_quality_mask, + write_model_outputs, + ) + + +def run_native_sentinel2_pipeline( + *, + sentinel2_dir: Path, + output_data_dir: Path, + arcticdem_slope_vrt: Path, + arcticdem_elevation_vrt: Path, + tcvis_dir: Path, + model_dir: Path, + tcvis_model_name: str = "RTS_v6_tcvis_s2native.pt", + notcvis_model_name: str = "RTS_v6_notcvis_s2native.pt", + device: Literal["cuda", "cpu", "auto"] | int | None = None, + ee_project: str | None = None, + ee_use_highvolume: bool = True, + patch_size: int = 1024, + overlap: int = 16, + batch_size: int = 8, + reflection: int = 0, + binarization_threshold: float = 0.5, + mask_erosion_size: int = 10, + min_object_size: int = 32, + use_quality_mask: bool = False, + write_model_outputs: bool = False, +): + """Search for all Sentinel scenes in the given directory and runs the segmentation pipeline on them. + + Args: + sentinel2_dir (Path): The directory containing the Sentinel 2 scenes. + output_data_dir (Path): The "output" directory. + arcticdem_slope_vrt (Path): The path to the ArcticDEM slope VRT file. + arcticdem_elevation_vrt (Path): The path to the ArcticDEM elevation VRT file. + tcvis_dir (Path): The directory containing the TCVis data. + model_dir (Path): The path to the models to use for segmentation. + tcvis_model_name (str, optional): The name of the model to use for TCVis. Defaults to "RTS_v6_tcvis.pt". + notcvis_model_name (str, optional): The name of the model to use for not TCVis. Defaults to "RTS_v6_notcvis.pt". + device (Literal["cuda", "cpu"] | int, optional): The device to run the model on. + If "cuda" take the first device (0), if int take the specified device. + If "auto" try to automatically select a free GPU (<50% memory usage). + Defaults to "cuda" if available, else "cpu". + ee_project (str, optional): The Earth Engine project ID or number to use. May be omitted if + project is defined within persistent API credentials obtained via `earthengine authenticate`. + ee_use_highvolume (bool, optional): Whether to use the high volume server (https://earthengine-highvolume.googleapis.com). + patch_size (int, optional): The patch size to use for inference. Defaults to 1024. + overlap (int, optional): The overlap to use for inference. Defaults to 16. + batch_size (int, optional): The batch size to use for inference. Defaults to 8. + reflection (int, optional): The reflection padding to use for inference. Defaults to 0. + binarization_threshold (float, optional): The threshold to binarize the probabilities. Defaults to 0.5. + mask_erosion_size (int, optional): The size of the disk to use for mask erosion and the edge-cropping. + Defaults to 10. + min_object_size (int, optional): The minimum object size to keep in pixel. Defaults to 32. + use_quality_mask (bool, optional): Whether to use the "quality" mask instead of the "valid" mask + to mask the output. + write_model_outputs (bool, optional): Also save the model outputs, not only the ensemble result. + Defaults to False. + + Examples: + Data directory structure: + + ```sh + data/input + ├── ArcticDEM + │ ├── elevation.vrt + │ ├── slope.vrt + │ ├── relative_elevation + │ │ └── 4372514_relative_elevation_100.tif + │ └── slope + │ └── 4372514_slope.tif + └── sentinel2 + └── 20220826T200911_20220826T200905_T17XMJ/ + ├── 20220826T200911_20220826T200905_T17XMJ_SCL_clip.tif + └── 20220826T200911_20220826T200905_T17XMJ_SR_clip.tif + ``` + + then the config should be + + ``` + ... + sentinel2_dir: data/input/sentinel2 + arcticdem_slope_vrt: data/input/ArcticDEM/slope.vrt + arcticdem_elevation_vrt: data/input/ArcticDEM/elevation.vrt + ``` + + + """ + data_generator = _load_s2( + sentinel2_dir, + output_data_dir, + arcticdem_slope_vrt, + arcticdem_elevation_vrt, + tcvis_dir, + tpi_outer_radius=10, + ) + _process( + data_generator, + model_dir, + tcvis_model_name, + notcvis_model_name, + device, + ee_project, + ee_use_highvolume, + patch_size, + overlap, + batch_size, + reflection, + binarization_threshold, + mask_erosion_size, + min_object_size, + use_quality_mask, + write_model_outputs, + ) diff --git a/darts/src/darts/legacy_pipeline/legacy_fast.py b/darts/src/darts/legacy_pipeline/legacy_fast.py new file mode 100644 index 0000000..98513e9 --- /dev/null +++ b/darts/src/darts/legacy_pipeline/legacy_fast.py @@ -0,0 +1,273 @@ +"""Legacy Pipeline without any other framework, but a faster and improved version.""" + +import logging +from collections.abc import Generator +from pathlib import Path +from typing import Literal + +from darts.legacy_pipeline.shared import AquisitionData, _load_planet, _load_s2, _segment_and_export + +logger = logging.getLogger(__name__) + + +def _process_fast( + data_generator: Generator[tuple[Path, Path, AquisitionData], None, None], + model_dir: Path, + tcvis_model_name: str, + notcvis_model_name: str, + device: Literal["cuda", "cpu", "auto"] | int | None, + ee_project: str | None, + ee_use_highvolume: bool, + tpi_outer_radius: int, + tpi_inner_radius: int, + patch_size: int, + overlap: int, + batch_size: int, + reflection: int, + binarization_threshold: float, + mask_erosion_size: int, + min_object_size: int, + use_quality_mask: bool, + write_model_outputs: bool, +): + # Import here to avoid long loading times when running other commands + import torch + from darts_ensemble.ensemble_v1 import EnsembleV1 + from darts_preprocessing import preprocess_legacy_fast + from dask.distributed import Client + from odc.stac import configure_rio + + from darts.utils.cuda import debug_info, decide_device + from darts.utils.earthengine import init_ee + + debug_info() + device = decide_device(device) + init_ee(ee_project, ee_use_highvolume) + + client = Client() + logger.info(f"Using Dask client: {client}") + configure_rio(cloud_defaults=True, aws={"aws_unsigned": True}, client=client) + logger.info("Configured Rasterio with Dask") + + ensemble = EnsembleV1( + model_dir / tcvis_model_name, + model_dir / notcvis_model_name, + device=torch.device(device), + ) + + for fpath, outpath, aqdata in data_generator: + try: + tile = preprocess_legacy_fast( + aqdata.optical, + aqdata.arcticdem, + aqdata.tcvis, + aqdata.data_masks, + tpi_outer_radius, + tpi_inner_radius, + device, + ) + + _segment_and_export( + tile, + ensemble, + outpath, + device, + patch_size, + overlap, + batch_size, + reflection, + binarization_threshold, + mask_erosion_size, + min_object_size, + use_quality_mask, + write_model_outputs, + ) + except Exception as e: + logger.warning(f"Could not process folder '{fpath.resolve()}'.\nSkipping...") + logger.exception(e) + + +def run_native_planet_pipeline_fast( + *, + orthotiles_dir: Path, + scenes_dir: Path, + output_data_dir: Path, + arcticdem_dir: Path, + tcvis_dir: Path, + model_dir: Path, + tcvis_model_name: str = "RTS_v6_tcvis.pt", + notcvis_model_name: str = "RTS_v6_notcvis.pt", + device: Literal["cuda", "cpu", "auto"] | int | None = None, + ee_project: str | None = None, + ee_use_highvolume: bool = True, + tpi_outer_radius: int = 100, + tpi_inner_radius: int = 0, + patch_size: int = 1024, + overlap: int = 16, + batch_size: int = 8, + reflection: int = 0, + binarization_threshold: float = 0.5, + mask_erosion_size: int = 10, + min_object_size: int = 32, + use_quality_mask: bool = False, + write_model_outputs: bool = False, +): + """Search for all PlanetScope scenes in the given directory and runs the segmentation pipeline on them. + + Loads the ArcticDEM from a datacube instead of VRT which is a lot faster and does not need manual preprocessing. + + Args: + orthotiles_dir (Path): The directory containing the PlanetScope orthotiles. + scenes_dir (Path): The directory containing the PlanetScope scenes. + output_data_dir (Path): The "output" directory. + arcticdem_dir (Path): The directory containing the ArcticDEM data (the datacube and the extent files). + Will be created and downloaded if it does not exist. + tcvis_dir (Path): The directory containing the TCVis data. + model_dir (Path): The path to the models to use for segmentation. + tcvis_model_name (str, optional): The name of the model to use for TCVis. Defaults to "RTS_v6_tcvis.pt". + notcvis_model_name (str, optional): The name of the model to use for not TCVis. Defaults to "RTS_v6_notcvis.pt". + device (Literal["cuda", "cpu"] | int, optional): The device to run the model on. + If "cuda" take the first device (0), if int take the specified device. + If "auto" try to automatically select a free GPU (<50% memory usage). + Defaults to "cuda" if available, else "cpu". + ee_project (str, optional): The Earth Engine project ID or number to use. May be omitted if + project is defined within persistent API credentials obtained via `earthengine authenticate`. + ee_use_highvolume (bool, optional): Whether to use the high volume server (https://earthengine-highvolume.googleapis.com). + tpi_outer_radius (int, optional): The outer radius of the annulus kernel for the tpi calculation + in m. Defaults 100m. + tpi_inner_radius (int, optional): The inner radius of the annulus kernel for the tpi calculation + in m. Defaults to 0. + patch_size (int, optional): The patch size to use for inference. Defaults to 1024. + overlap (int, optional): The overlap to use for inference. Defaults to 16. + batch_size (int, optional): The batch size to use for inference. Defaults to 8. + reflection (int, optional): The reflection padding to use for inference. Defaults to 0. + binarization_threshold (float, optional): The threshold to binarize the probabilities. Defaults to 0.5. + mask_erosion_size (int, optional): The size of the disk to use for mask erosion and the edge-cropping. + Defaults to 10. + min_object_size (int, optional): The minimum object size to keep in pixel. Defaults to 32. + use_quality_mask (bool, optional): Whether to use the "quality" mask instead of the "valid" mask + to mask the output. + write_model_outputs (bool, optional): Also save the model outputs, not only the ensemble result. + Defaults to False. + + """ + data_generator = _load_planet( + orthotiles_dir, + scenes_dir, + output_data_dir, + arcticdem_dir, + tcvis_dir, + tpi_outer_radius, + ) + _process_fast( + data_generator, + model_dir, + tcvis_model_name, + notcvis_model_name, + device, + ee_project, + ee_use_highvolume, + tpi_outer_radius, + tpi_inner_radius, + patch_size, + overlap, + batch_size, + reflection, + binarization_threshold, + mask_erosion_size, + min_object_size, + use_quality_mask, + write_model_outputs, + ) + + +def run_native_sentinel2_pipeline_fast( + *, + sentinel2_dir: Path, + output_data_dir: Path, + arcticdem_dir: Path, + tcvis_dir: Path, + model_dir: Path, + tcvis_model_name: str = "RTS_v6_tcvis_s2native.pt", + notcvis_model_name: str = "RTS_v6_notcvis_s2native.pt", + device: Literal["cuda", "cpu", "auto"] | int | None = None, + ee_project: str | None = None, + ee_use_highvolume: bool = True, + tpi_outer_radius: int = 100, + tpi_inner_radius: int = 0, + patch_size: int = 1024, + overlap: int = 16, + batch_size: int = 8, + reflection: int = 0, + binarization_threshold: float = 0.5, + mask_erosion_size: int = 10, + min_object_size: int = 32, + use_quality_mask: bool = False, + write_model_outputs: bool = False, +): + """Search for all Sentinel 2 scenes in the given directory and runs the segmentation pipeline on them. + + Loads the ArcticDEM from a datacube instead of VRT which is a lot faster and does not need manual preprocessing. + + Args: + sentinel2_dir (Path): The directory containing the Sentinel 2 scenes. + scenes_dir (Path): The directory containing the PlanetScope scenes. + output_data_dir (Path): The "output" directory. + arcticdem_dir (Path): The directory containing the ArcticDEM data (the datacube and the extent files). + Will be created and downloaded if it does not exist. + tcvis_dir (Path): The directory containing the TCVis data. + model_dir (Path): The path to the models to use for segmentation. + tcvis_model_name (str, optional): The name of the model to use for TCVis. Defaults to "RTS_v6_tcvis.pt". + notcvis_model_name (str, optional): The name of the model to use for not TCVis. Defaults to "RTS_v6_notcvis.pt". + device (Literal["cuda", "cpu"] | int, optional): The device to run the model on. + If "cuda" take the first device (0), if int take the specified device. + If "auto" try to automatically select a free GPU (<50% memory usage). + Defaults to "cuda" if available, else "cpu". + ee_project (str, optional): The Earth Engine project ID or number to use. May be omitted if + project is defined within persistent API credentials obtained via `earthengine authenticate`. + ee_use_highvolume (bool, optional): Whether to use the high volume server (https://earthengine-highvolume.googleapis.com). + tpi_outer_radius (int, optional): The outer radius of the annulus kernel for the tpi calculation + in m. Defaults to 100m. + tpi_inner_radius (int, optional): The inner radius of the annulus kernel for the tpi calculation + in m. Defaults to 0. + patch_size (int, optional): The patch size to use for inference. Defaults to 1024. + overlap (int, optional): The overlap to use for inference. Defaults to 16. + batch_size (int, optional): The batch size to use for inference. Defaults to 8. + reflection (int, optional): The reflection padding to use for inference. Defaults to 0. + binarization_threshold (float, optional): The threshold to binarize the probabilities. Defaults to 0.5. + mask_erosion_size (int, optional): The size of the disk to use for mask erosion and the edge-cropping. + Defaults to 10. + min_object_size (int, optional): The minimum object size to keep in pixel. Defaults to 32. + use_quality_mask (bool, optional): Whether to use the "quality" mask instead of the "valid" mask + to mask the output. + write_model_outputs (bool, optional): Also save the model outputs, not only the ensemble result. + Defaults to False. + + """ + data_generator = _load_s2( + sentinel2_dir, + output_data_dir, + arcticdem_dir, + tcvis_dir, + tpi_outer_radius, + ) + _process_fast( + data_generator, + model_dir, + tcvis_model_name, + notcvis_model_name, + device, + ee_project, + ee_use_highvolume, + tpi_outer_radius, + tpi_inner_radius, + patch_size, + overlap, + batch_size, + reflection, + binarization_threshold, + mask_erosion_size, + min_object_size, + use_quality_mask, + write_model_outputs, + ) diff --git a/darts/src/darts/legacy_pipeline/shared.py b/darts/src/darts/legacy_pipeline/shared.py new file mode 100644 index 0000000..c823fd8 --- /dev/null +++ b/darts/src/darts/legacy_pipeline/shared.py @@ -0,0 +1,113 @@ +"""Data loading for legacy Pipeline.""" + +import logging +from collections import namedtuple +from math import ceil, sqrt +from pathlib import Path +from typing import Literal + +logger = logging.getLogger(__name__) + +AquisitionData = namedtuple("AquisitionData", ["optical", "arcticdem", "tcvis", "data_masks"]) + + +def _planet_file_generator(orthotiles_dir: Path, scenes_dir: Path, output_data_dir: Path): + # Find all PlanetScope orthotiles + for fpath in orthotiles_dir.glob("*/*/"): + tile_id = fpath.parent.name + scene_id = fpath.name + outpath = output_data_dir / tile_id / scene_id + yield fpath, outpath + + # Find all PlanetScope scenes + for fpath in scenes_dir.glob("*/"): + scene_id = fpath.name + outpath = output_data_dir / scene_id + yield fpath, outpath + + +def _load_s2(sentinel2_dir: Path, output_data_dir: Path, arcticdem_dir: Path, tcvis_dir: Path, tpi_outer_radius: int): + from darts_acquisition.arcticdem import load_arcticdem_tile + from darts_acquisition.s2 import load_s2_masks, load_s2_scene + from darts_acquisition.tcvis import load_tcvis + + for fpath in sentinel2_dir.glob("*/"): + scene_id = fpath.name + outpath = output_data_dir / scene_id + try: + optical = load_s2_scene(fpath) + arcticdem = load_arcticdem_tile( + optical.odc.geobox, arcticdem_dir, resolution=10, buffer=ceil(tpi_outer_radius / 10 * sqrt(2)) + ) + tcvis = load_tcvis(optical.odc.geobox, tcvis_dir) + data_masks = load_s2_masks(fpath, optical.odc.geobox) + aqdata = AquisitionData(optical, arcticdem, tcvis, data_masks) + yield fpath, outpath, aqdata + except Exception as e: + logger.warning(f"Could not process folder '{fpath.resolve()}'.\nSkipping...") + logger.exception(e) + continue + + +def _load_planet( + orthotiles_dir: Path, + scenes_dir: Path, + output_data_dir: Path, + arcticdem_dir: Path, + tcvis_dir: Path, + tpi_outer_radius: int, +): + from darts_acquisition.arcticdem import load_arcticdem_tile + from darts_acquisition.planet import load_planet_masks, load_planet_scene + from darts_acquisition.tcvis import load_tcvis + + # Find all PlanetScope orthotiles + for fpath, outpath in _planet_file_generator(orthotiles_dir, scenes_dir, output_data_dir): + try: + optical = load_planet_scene(fpath) + arcticdem = load_arcticdem_tile( + optical.odc.geobox, arcticdem_dir, resolution=2, buffer=ceil(tpi_outer_radius / 2 * sqrt(2)) + ) + tcvis = load_tcvis(optical.odc.geobox, tcvis_dir) + data_masks = load_planet_masks(fpath) + aqdata = AquisitionData(optical, arcticdem, tcvis, data_masks) + yield fpath, outpath, aqdata + except Exception as e: + logger.warning(f"Could not process folder '{fpath.resolve()}'.\nSkipping...") + logger.exception(e) + continue + + +def _segment_and_export( + tile, + ensemble, + outpath: Path, + device: Literal["cuda", "cpu", "auto"] | int | None, + patch_size: int, + overlap: int, + batch_size: int, + reflection: int, + binarization_threshold: float, + mask_erosion_size: int, + min_object_size: int, + use_quality_mask: bool, + write_model_outputs: bool, +): + from darts_export.inference import InferenceResultWriter + from darts_postprocessing import prepare_export + + tile = ensemble.segment_tile( + tile, + patch_size=patch_size, + overlap=overlap, + batch_size=batch_size, + reflection=reflection, + keep_inputs=write_model_outputs, + ) + tile = prepare_export(tile, binarization_threshold, mask_erosion_size, min_object_size, use_quality_mask, device) + + outpath.mkdir(parents=True, exist_ok=True) + writer = InferenceResultWriter(tile) + writer.export_probabilities(outpath) + writer.export_binarized(outpath) + writer.export_polygonized(outpath) diff --git a/darts/src/darts/native.py b/darts/src/darts/native.py deleted file mode 100644 index ebd4f5d..0000000 --- a/darts/src/darts/native.py +++ /dev/null @@ -1,620 +0,0 @@ -"""Pipeline without any other framework.""" - -import logging -from math import ceil, sqrt -from pathlib import Path -from typing import Literal - -logger = logging.getLogger(__name__) - - -def planet_file_generator(orthotiles_dir: Path, scenes_dir: Path, output_data_dir: Path): - """Generate a list of files and output paths from planet scenes and orthotiles. - - Args: - orthotiles_dir (Path): The directory containing the PlanetScope orthotiles. - scenes_dir (Path): The directory containing the PlanetScope scenes. - output_data_dir (Path): The "output" directory. - - Yields: - Tuple[Path, Path]: A tuple containing the input file path and the output directory path. - - """ - # Find all PlanetScope orthotiles - for fpath in orthotiles_dir.glob("*/*/"): - tile_id = fpath.parent.name - scene_id = fpath.name - outpath = output_data_dir / tile_id / scene_id - yield fpath, outpath - - # Find all PlanetScope scenes - for fpath in scenes_dir.glob("*/"): - scene_id = fpath.name - outpath = output_data_dir / scene_id - yield fpath, outpath - - -def run_native_planet_pipeline( - orthotiles_dir: Path, - scenes_dir: Path, - output_data_dir: Path, - arcticdem_slope_vrt: Path, - arcticdem_elevation_vrt: Path, - tcvis_dir: Path, - model_dir: Path, - tcvis_model_name: str = "RTS_v6_tcvis.pt", - notcvis_model_name: str = "RTS_v6_notcvis.pt", - device: Literal["cuda", "cpu", "auto"] | int | None = None, - ee_project: str | None = None, - ee_use_highvolume: bool = True, - patch_size: int = 1024, - overlap: int = 16, - batch_size: int = 8, - reflection: int = 0, - binarization_threshold: float = 0.5, - mask_erosion_size: int = 10, - min_object_size: int = 32, - use_quality_mask: bool = False, - write_model_outputs: bool = False, -): - """Search for all PlanetScope scenes in the given directory and runs the segmentation pipeline on them. - - Args: - orthotiles_dir (Path): The directory containing the PlanetScope orthotiles. - scenes_dir (Path): The directory containing the PlanetScope scenes. - output_data_dir (Path): The "output" directory. - arcticdem_slope_vrt (Path): The path to the ArcticDEM slope VRT file. - arcticdem_elevation_vrt (Path): The path to the ArcticDEM elevation VRT file. - tcvis_dir (Path): The directory containing the TCVis data. - model_dir (Path): The path to the models to use for segmentation. - tcvis_model_name (str, optional): The name of the model to use for TCVis. Defaults to "RTS_v6_tcvis.pt". - notcvis_model_name (str, optional): The name of the model to use for not TCVis. Defaults to "RTS_v6_notcvis.pt". - device (Literal["cuda", "cpu"] | int, optional): The device to run the model on. - If "cuda" take the first device (0), if int take the specified device. - If "auto" try to automatically select a free GPU (<50% memory usage). - Defaults to "cuda" if available, else "cpu". - ee_project (str, optional): The Earth Engine project ID or number to use. May be omitted if - project is defined within persistent API credentials obtained via `earthengine authenticate`. - ee_use_highvolume (bool, optional): Whether to use the high volume server (https://earthengine-highvolume.googleapis.com). - patch_size (int, optional): The patch size to use for inference. Defaults to 1024. - overlap (int, optional): The overlap to use for inference. Defaults to 16. - batch_size (int, optional): The batch size to use for inference. Defaults to 8. - reflection (int, optional): The reflection padding to use for inference. Defaults to 0. - binarization_threshold (float, optional): The threshold to binarize the probabilities. Defaults to 0.5. - mask_erosion_size (int, optional): The size of the disk to use for mask erosion and the edge-cropping. - Defaults to 10. - min_object_size (int, optional): The minimum object size to keep in pixel. Defaults to 32. - use_quality_mask (bool, optional): Whether to use the "quality" mask instead of the "valid" mask - to mask the output. - write_model_outputs (bool, optional): Also save the model outputs, not only the ensemble result. - Defaults to False. - - Examples: - ### PS Orthotile - - Data directory structure: - - ```sh - data/input - ├── ArcticDEM - │ ├── elevation.vrt - │ ├── slope.vrt - │ ├── relative_elevation - │ │ └── 4372514_relative_elevation_100.tif - │ └── slope - │ └── 4372514_slope.tif - └── planet - └── PSOrthoTile - └── 4372514/5790392_4372514_2022-07-16_2459 - ├── 5790392_4372514_2022-07-16_2459_BGRN_Analytic_metadata.xml - ├── 5790392_4372514_2022-07-16_2459_BGRN_DN_udm.tif - ├── 5790392_4372514_2022-07-16_2459_BGRN_SR.tif - ├── 5790392_4372514_2022-07-16_2459_metadata.json - └── 5790392_4372514_2022-07-16_2459_udm2.tif - ``` - - then the config should be - - ``` - ... - orthotiles_dir: data/input/planet/PSOrthoTile - arcticdem_slope_vrt: data/input/ArcticDEM/slope.vrt - arcticdem_elevation_vrt: data/input/ArcticDEM/elevation.vrt - ``` - - ### PS Scene - - Data directory structure: - - ```sh - data/input - ├── ArcticDEM - │ ├── elevation.vrt - │ ├── slope.vrt - │ ├── relative_elevation - │ │ └── 4372514_relative_elevation_100.tif - │ └── slope - │ └── 4372514_slope.tif - └── planet - └── PSScene - └── 20230703_194241_43_2427 - ├── 20230703_194241_43_2427_3B_AnalyticMS_metadata.xml - ├── 20230703_194241_43_2427_3B_AnalyticMS_SR.tif - ├── 20230703_194241_43_2427_3B_udm2.tif - ├── 20230703_194241_43_2427_metadata.json - └── 20230703_194241_43_2427.json - ``` - - then the config should be - - ``` - ... - scenes_dir: data/input/planet/PSScene - arcticdem_slope_vrt: data/input/ArcticDEM/slope.vrt - arcticdem_elevation_vrt: data/input/ArcticDEM/elevation.vrt - ``` - - - """ - # Import here to avoid long loading times when running other commands - import torch - from darts_acquisition.arcticdem import load_arcticdem_from_vrt - from darts_acquisition.planet import load_planet_masks, load_planet_scene - from darts_acquisition.tcvis import load_tcvis - from darts_ensemble.ensemble_v1 import EnsembleV1 - from darts_export.inference import InferenceResultWriter - from darts_postprocessing import prepare_export - from darts_preprocessing import preprocess_legacy - - from darts.utils.cuda import debug_info, decide_device - from darts.utils.earthengine import init_ee - - debug_info() - device = decide_device(device) - init_ee(ee_project, ee_use_highvolume) - - # Find all PlanetScope orthotiles - for fpath, outpath in planet_file_generator(orthotiles_dir, scenes_dir, output_data_dir): - try: - optical = load_planet_scene(fpath) - arcticdem = load_arcticdem_from_vrt(arcticdem_slope_vrt, arcticdem_elevation_vrt, optical) - tcvis = load_tcvis(optical.odc.geobox, tcvis_dir) - data_masks = load_planet_masks(fpath) - - tile = preprocess_legacy(optical, arcticdem, tcvis, data_masks) - - ensemble = EnsembleV1( - model_dir / tcvis_model_name, - model_dir / notcvis_model_name, - device=torch.device(device), - ) - tile = ensemble.segment_tile( - tile, - patch_size=patch_size, - overlap=overlap, - batch_size=batch_size, - reflection=reflection, - keep_inputs=write_model_outputs, - ) - tile = prepare_export( - tile, binarization_threshold, mask_erosion_size, min_object_size, use_quality_mask, device - ) - - outpath.mkdir(parents=True, exist_ok=True) - writer = InferenceResultWriter(tile) - writer.export_probabilities(outpath) - writer.export_binarized(outpath) - writer.export_polygonized(outpath) - except Exception as e: - logger.warning(f"Could not process folder '{fpath.resolve()}'.\nSkipping...") - logger.exception(e) - - -def run_native_planet_pipeline_fast( - orthotiles_dir: Path, - scenes_dir: Path, - output_data_dir: Path, - arcticdem_dir: Path, - tcvis_dir: Path, - model_dir: Path, - tcvis_model_name: str = "RTS_v6_tcvis.pt", - notcvis_model_name: str = "RTS_v6_notcvis.pt", - device: Literal["cuda", "cpu", "auto"] | int | None = None, - ee_project: str | None = None, - ee_use_highvolume: bool = True, - tpi_outer_radius: int = 100, - tpi_inner_radius: int = 0, - patch_size: int = 1024, - overlap: int = 16, - batch_size: int = 8, - reflection: int = 0, - binarization_threshold: float = 0.5, - mask_erosion_size: int = 10, - min_object_size: int = 32, - use_quality_mask: bool = False, - write_model_outputs: bool = False, -): - """Search for all PlanetScope scenes in the given directory and runs the segmentation pipeline on them. - - Loads the ArcticDEM from a datacube instead of VRT which is a lot faster and does not need manual preprocessing. - - Args: - orthotiles_dir (Path): The directory containing the PlanetScope orthotiles. - scenes_dir (Path): The directory containing the PlanetScope scenes. - output_data_dir (Path): The "output" directory. - arcticdem_dir (Path): The directory containing the ArcticDEM data (the datacube and the extent files). - Will be created and downloaded if it does not exist. - tcvis_dir (Path): The directory containing the TCVis data. - model_dir (Path): The path to the models to use for segmentation. - tcvis_model_name (str, optional): The name of the model to use for TCVis. Defaults to "RTS_v6_tcvis.pt". - notcvis_model_name (str, optional): The name of the model to use for not TCVis. Defaults to "RTS_v6_notcvis.pt". - device (Literal["cuda", "cpu"] | int, optional): The device to run the model on. - If "cuda" take the first device (0), if int take the specified device. - If "auto" try to automatically select a free GPU (<50% memory usage). - Defaults to "cuda" if available, else "cpu". - ee_project (str, optional): The Earth Engine project ID or number to use. May be omitted if - project is defined within persistent API credentials obtained via `earthengine authenticate`. - ee_use_highvolume (bool, optional): Whether to use the high volume server (https://earthengine-highvolume.googleapis.com). - tpi_outer_radius (int, optional): The outer radius of the annulus kernel for the tpi calculation - in m. Defaults 100m. - tpi_inner_radius (int, optional): The inner radius of the annulus kernel for the tpi calculation - in m. Defaults to 0. - patch_size (int, optional): The patch size to use for inference. Defaults to 1024. - overlap (int, optional): The overlap to use for inference. Defaults to 16. - batch_size (int, optional): The batch size to use for inference. Defaults to 8. - reflection (int, optional): The reflection padding to use for inference. Defaults to 0. - binarization_threshold (float, optional): The threshold to binarize the probabilities. Defaults to 0.5. - mask_erosion_size (int, optional): The size of the disk to use for mask erosion and the edge-cropping. - Defaults to 10. - min_object_size (int, optional): The minimum object size to keep in pixel. Defaults to 32. - use_quality_mask (bool, optional): Whether to use the "quality" mask instead of the "valid" mask - to mask the output. - write_model_outputs (bool, optional): Also save the model outputs, not only the ensemble result. - Defaults to False. - - """ - # Import here to avoid long loading times when running other commands - import torch - from darts_acquisition.arcticdem import load_arcticdem_tile - from darts_acquisition.planet import load_planet_masks, load_planet_scene - from darts_acquisition.tcvis import load_tcvis - from darts_ensemble.ensemble_v1 import EnsembleV1 - from darts_export.inference import InferenceResultWriter - from darts_postprocessing import prepare_export - from darts_preprocessing import preprocess_legacy_fast - from dask.distributed import Client - from odc.stac import configure_rio - - from darts.utils.cuda import debug_info, decide_device - from darts.utils.earthengine import init_ee - - debug_info() - device = decide_device(device) - init_ee(ee_project, ee_use_highvolume) - - client = Client() - logger.info(f"Using Dask client: {client}") - configure_rio(cloud_defaults=True, aws={"aws_unsigned": True}, client=client) - logger.info("Configured Rasterio with Dask") - - # Find all PlanetScope orthotiles - for fpath, outpath in planet_file_generator(orthotiles_dir, scenes_dir, output_data_dir): - try: - optical = load_planet_scene(fpath) - arcticdem = load_arcticdem_tile( - optical.odc.geobox, arcticdem_dir, resolution=2, buffer=ceil(tpi_outer_radius / 2 * sqrt(2)) - ) - tcvis = load_tcvis(optical.odc.geobox, tcvis_dir) - data_masks = load_planet_masks(fpath) - - tile = preprocess_legacy_fast( - optical, - arcticdem, - tcvis, - data_masks, - tpi_outer_radius, - tpi_inner_radius, - device, - ) - - ensemble = EnsembleV1( - model_dir / tcvis_model_name, - model_dir / notcvis_model_name, - device=torch.device(device), - ) - tile = ensemble.segment_tile( - tile, - patch_size=patch_size, - overlap=overlap, - batch_size=batch_size, - reflection=reflection, - keep_inputs=write_model_outputs, - ) - tile = prepare_export( - tile, binarization_threshold, mask_erosion_size, min_object_size, use_quality_mask, device - ) - - outpath.mkdir(parents=True, exist_ok=True) - writer = InferenceResultWriter(tile) - writer.export_probabilities(outpath) - writer.export_binarized(outpath) - writer.export_polygonized(outpath) - except Exception as e: - logger.warning(f"Could not process folder '{fpath.resolve()}'.\nSkipping...") - logger.exception(e) - - -def run_native_sentinel2_pipeline( - sentinel2_dir: Path, - output_data_dir: Path, - arcticdem_slope_vrt: Path, - arcticdem_elevation_vrt: Path, - tcvis_dir: Path, - model_dir: Path, - tcvis_model_name: str = "RTS_v6_tcvis_s2native.pt", - notcvis_model_name: str = "RTS_v6_notcvis_s2native.pt", - device: Literal["cuda", "cpu", "auto"] | int | None = None, - ee_project: str | None = None, - ee_use_highvolume: bool = True, - patch_size: int = 1024, - overlap: int = 16, - batch_size: int = 8, - reflection: int = 0, - binarization_threshold: float = 0.5, - mask_erosion_size: int = 10, - min_object_size: int = 32, - use_quality_mask: bool = False, - write_model_outputs: bool = False, -): - """Search for all Sentinel scenes in the given directory and runs the segmentation pipeline on them. - - Args: - sentinel2_dir (Path): The directory containing the Sentinel 2 scenes. - output_data_dir (Path): The "output" directory. - arcticdem_slope_vrt (Path): The path to the ArcticDEM slope VRT file. - arcticdem_elevation_vrt (Path): The path to the ArcticDEM elevation VRT file. - tcvis_dir (Path): The directory containing the TCVis data. - model_dir (Path): The path to the models to use for segmentation. - tcvis_model_name (str, optional): The name of the model to use for TCVis. Defaults to "RTS_v6_tcvis.pt". - notcvis_model_name (str, optional): The name of the model to use for not TCVis. Defaults to "RTS_v6_notcvis.pt". - device (Literal["cuda", "cpu"] | int, optional): The device to run the model on. - If "cuda" take the first device (0), if int take the specified device. - If "auto" try to automatically select a free GPU (<50% memory usage). - Defaults to "cuda" if available, else "cpu". - ee_project (str, optional): The Earth Engine project ID or number to use. May be omitted if - project is defined within persistent API credentials obtained via `earthengine authenticate`. - ee_use_highvolume (bool, optional): Whether to use the high volume server (https://earthengine-highvolume.googleapis.com). - patch_size (int, optional): The patch size to use for inference. Defaults to 1024. - overlap (int, optional): The overlap to use for inference. Defaults to 16. - batch_size (int, optional): The batch size to use for inference. Defaults to 8. - reflection (int, optional): The reflection padding to use for inference. Defaults to 0. - binarization_threshold (float, optional): The threshold to binarize the probabilities. Defaults to 0.5. - mask_erosion_size (int, optional): The size of the disk to use for mask erosion and the edge-cropping. - Defaults to 10. - min_object_size (int, optional): The minimum object size to keep in pixel. Defaults to 32. - use_quality_mask (bool, optional): Whether to use the "quality" mask instead of the "valid" mask - to mask the output. - write_model_outputs (bool, optional): Also save the model outputs, not only the ensemble result. - Defaults to False. - - Examples: - Data directory structure: - - ```sh - data/input - ├── ArcticDEM - │ ├── elevation.vrt - │ ├── slope.vrt - │ ├── relative_elevation - │ │ └── 4372514_relative_elevation_100.tif - │ └── slope - │ └── 4372514_slope.tif - └── sentinel2 - └── 20220826T200911_20220826T200905_T17XMJ/ - ├── 20220826T200911_20220826T200905_T17XMJ_SCL_clip.tif - └── 20220826T200911_20220826T200905_T17XMJ_SR_clip.tif - ``` - - then the config should be - - ``` - ... - sentinel2_dir: data/input/sentinel2 - arcticdem_slope_vrt: data/input/ArcticDEM/slope.vrt - arcticdem_elevation_vrt: data/input/ArcticDEM/elevation.vrt - ``` - - - """ - # Import here to avoid long loading times when running other commands - import torch - from darts_acquisition.arcticdem import load_arcticdem_from_vrt - from darts_acquisition.s2 import load_s2_masks, load_s2_scene - from darts_acquisition.tcvis import load_tcvis - from darts_ensemble.ensemble_v1 import EnsembleV1 - from darts_export.inference import InferenceResultWriter - from darts_postprocessing import prepare_export - from darts_preprocessing import preprocess_legacy - - from darts.utils.cuda import debug_info, decide_device - from darts.utils.earthengine import init_ee - - debug_info() - device = decide_device(device) - init_ee(ee_project, ee_use_highvolume) - - # Find all Sentinel 2 scenes - for fpath in sentinel2_dir.glob("*/"): - try: - scene_id = fpath.name - outpath = output_data_dir / scene_id - - optical = load_s2_scene(fpath) - arcticdem = load_arcticdem_from_vrt(arcticdem_slope_vrt, arcticdem_elevation_vrt, optical) - tcvis = load_tcvis(optical.odc.geobox, tcvis_dir) - data_masks = load_s2_masks(fpath, optical.odc.geobox) - - tile = preprocess_legacy(optical, arcticdem, tcvis, data_masks) - - ensemble = EnsembleV1( - model_dir / tcvis_model_name, - model_dir / notcvis_model_name, - device=torch.device(device), - ) - tile = ensemble.segment_tile( - tile, - patch_size=patch_size, - overlap=overlap, - batch_size=batch_size, - reflection=reflection, - keep_inputs=write_model_outputs, - ) - tile = prepare_export( - tile, binarization_threshold, mask_erosion_size, min_object_size, use_quality_mask, device - ) - - outpath.mkdir(parents=True, exist_ok=True) - writer = InferenceResultWriter(tile) - writer.export_probabilities(outpath) - writer.export_binarized(outpath) - writer.export_polygonized(outpath) - except Exception as e: - logger.warning(f"Could not process folder '{fpath.resolve()}'.\nSkipping...") - logger.exception(e) - - -def run_native_sentinel2_pipeline_fast( - sentinel2_dir: Path, - output_data_dir: Path, - arcticdem_dir: Path, - tcvis_dir: Path, - model_dir: Path, - tcvis_model_name: str = "RTS_v6_tcvis_s2native.pt", - notcvis_model_name: str = "RTS_v6_notcvis_s2native.pt", - device: Literal["cuda", "cpu", "auto"] | int | None = None, - ee_project: str | None = None, - ee_use_highvolume: bool = True, - tpi_outer_radius: int = 100, - tpi_inner_radius: int = 0, - patch_size: int = 1024, - overlap: int = 16, - batch_size: int = 8, - reflection: int = 0, - binarization_threshold: float = 0.5, - mask_erosion_size: int = 10, - min_object_size: int = 32, - use_quality_mask: bool = False, - write_model_outputs: bool = False, -): - """Search for all Sentinel 2 scenes in the given directory and runs the segmentation pipeline on them. - - Loads the ArcticDEM from a datacube instead of VRT which is a lot faster and does not need manual preprocessing. - - Args: - sentinel2_dir (Path): The directory containing the Sentinel 2 scenes. - scenes_dir (Path): The directory containing the PlanetScope scenes. - output_data_dir (Path): The "output" directory. - arcticdem_dir (Path): The directory containing the ArcticDEM data (the datacube and the extent files). - Will be created and downloaded if it does not exist. - tcvis_dir (Path): The directory containing the TCVis data. - model_dir (Path): The path to the models to use for segmentation. - tcvis_model_name (str, optional): The name of the model to use for TCVis. Defaults to "RTS_v6_tcvis.pt". - notcvis_model_name (str, optional): The name of the model to use for not TCVis. Defaults to "RTS_v6_notcvis.pt". - device (Literal["cuda", "cpu"] | int, optional): The device to run the model on. - If "cuda" take the first device (0), if int take the specified device. - If "auto" try to automatically select a free GPU (<50% memory usage). - Defaults to "cuda" if available, else "cpu". - ee_project (str, optional): The Earth Engine project ID or number to use. May be omitted if - project is defined within persistent API credentials obtained via `earthengine authenticate`. - ee_use_highvolume (bool, optional): Whether to use the high volume server (https://earthengine-highvolume.googleapis.com). - tpi_outer_radius (int, optional): The outer radius of the annulus kernel for the tpi calculation - in m. Defaults to 100m. - tpi_inner_radius (int, optional): The inner radius of the annulus kernel for the tpi calculation - in m. Defaults to 0. - patch_size (int, optional): The patch size to use for inference. Defaults to 1024. - overlap (int, optional): The overlap to use for inference. Defaults to 16. - batch_size (int, optional): The batch size to use for inference. Defaults to 8. - reflection (int, optional): The reflection padding to use for inference. Defaults to 0. - binarization_threshold (float, optional): The threshold to binarize the probabilities. Defaults to 0.5. - mask_erosion_size (int, optional): The size of the disk to use for mask erosion and the edge-cropping. - Defaults to 10. - min_object_size (int, optional): The minimum object size to keep in pixel. Defaults to 32. - use_quality_mask (bool, optional): Whether to use the "quality" mask instead of the "valid" mask - to mask the output. - write_model_outputs (bool, optional): Also save the model outputs, not only the ensemble result. - Defaults to False. - - """ - # Import here to avoid long loading times when running other commands - import torch - from darts_acquisition.arcticdem import load_arcticdem_tile - from darts_acquisition.s2 import load_s2_masks, load_s2_scene - from darts_acquisition.tcvis import load_tcvis - from darts_ensemble.ensemble_v1 import EnsembleV1 - from darts_export.inference import InferenceResultWriter - from darts_postprocessing import prepare_export - from darts_preprocessing import preprocess_legacy_fast - from dask.distributed import Client - from odc.stac import configure_rio - - from darts.utils.cuda import debug_info, decide_device - from darts.utils.earthengine import init_ee - - debug_info() - device = decide_device(device) - init_ee(ee_project, ee_use_highvolume) - - client = Client() - logger.info(f"Using Dask client: {client}") - configure_rio(cloud_defaults=True, aws={"aws_unsigned": True}, client=client) - logger.info("Configured Rasterio with Dask") - - # Find all Sentinel 2 scenes - for fpath in sentinel2_dir.glob("*/"): - try: - scene_id = fpath.name - outpath = output_data_dir / scene_id - - optical = load_s2_scene(fpath) - arcticdem = load_arcticdem_tile( - optical.odc.geobox, arcticdem_dir, resolution=10, buffer=ceil(tpi_outer_radius / 10 * sqrt(2)) - ) - tcvis = load_tcvis(optical.odc.geobox, tcvis_dir) - data_masks = load_s2_masks(fpath, optical.odc.geobox) - - tile = preprocess_legacy_fast( - optical, - arcticdem, - tcvis, - data_masks, - tpi_outer_radius, - tpi_inner_radius, - device, - ) - - ensemble = EnsembleV1( - model_dir / tcvis_model_name, - model_dir / notcvis_model_name, - device=torch.device(device), - ) - tile = ensemble.segment_tile( - tile, - patch_size=patch_size, - overlap=overlap, - batch_size=batch_size, - reflection=reflection, - keep_inputs=write_model_outputs, - ) - tile = prepare_export( - tile, binarization_threshold, mask_erosion_size, min_object_size, use_quality_mask, device - ) - - outpath.mkdir(parents=True, exist_ok=True) - writer = InferenceResultWriter(tile) - writer.export_probabilities(outpath) - writer.export_binarized(outpath) - writer.export_polygonized(outpath) - - except Exception as e: - logger.warning(f"Could not process folder '{fpath.resolve()}'.\nSkipping...") - logger.exception(e) From c72bb93a26ccfe5b2c97ed8c1abbac8c433edb25 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tobias=20H=C3=B6lzer?= Date: Fri, 29 Nov 2024 21:14:25 +0100 Subject: [PATCH 2/3] Use dataclasses and mixins instead --- darts/src/darts/legacy_pipeline/__init__.py | 8 +- darts/src/darts/legacy_pipeline/_base.py | 188 ++++++++++ darts/src/darts/legacy_pipeline/legacy.py | 333 ------------------ .../src/darts/legacy_pipeline/legacy_fast.py | 273 -------------- darts/src/darts/legacy_pipeline/planet.py | 170 +++++++++ .../src/darts/legacy_pipeline/planet_fast.py | 115 ++++++ darts/src/darts/legacy_pipeline/s2.py | 129 +++++++ darts/src/darts/legacy_pipeline/s2_fast.py | 113 ++++++ darts/src/darts/legacy_pipeline/shared.py | 113 ------ 9 files changed, 719 insertions(+), 723 deletions(-) create mode 100644 darts/src/darts/legacy_pipeline/_base.py delete mode 100644 darts/src/darts/legacy_pipeline/legacy.py delete mode 100644 darts/src/darts/legacy_pipeline/legacy_fast.py create mode 100644 darts/src/darts/legacy_pipeline/planet.py create mode 100644 darts/src/darts/legacy_pipeline/planet_fast.py create mode 100644 darts/src/darts/legacy_pipeline/s2.py create mode 100644 darts/src/darts/legacy_pipeline/s2_fast.py delete mode 100644 darts/src/darts/legacy_pipeline/shared.py diff --git a/darts/src/darts/legacy_pipeline/__init__.py b/darts/src/darts/legacy_pipeline/__init__.py index 5f3417f..8a003b8 100644 --- a/darts/src/darts/legacy_pipeline/__init__.py +++ b/darts/src/darts/legacy_pipeline/__init__.py @@ -1,6 +1,6 @@ """Legacy pipeline module.""" -from darts.legacy_pipeline.legacy import run_native_planet_pipeline as run_native_planet_pipeline -from darts.legacy_pipeline.legacy import run_native_sentinel2_pipeline as run_native_sentinel2_pipeline -from darts.legacy_pipeline.legacy_fast import run_native_planet_pipeline_fast as run_native_planet_pipeline_fast -from darts.legacy_pipeline.legacy_fast import run_native_sentinel2_pipeline_fast as run_native_sentinel2_pipeline_fast +from darts.legacy_pipeline.planet import run_native_planet_pipeline as run_native_planet_pipeline +from darts.legacy_pipeline.planet_fast import run_native_planet_pipeline_fast as run_native_planet_pipeline_fast +from darts.legacy_pipeline.s2 import run_native_sentinel2_pipeline as run_native_sentinel2_pipeline +from darts.legacy_pipeline.s2_fast import run_native_sentinel2_pipeline_fast as run_native_sentinel2_pipeline_fast diff --git a/darts/src/darts/legacy_pipeline/_base.py b/darts/src/darts/legacy_pipeline/_base.py new file mode 100644 index 0000000..3079dcb --- /dev/null +++ b/darts/src/darts/legacy_pipeline/_base.py @@ -0,0 +1,188 @@ +import logging +import multiprocessing as mp +from collections import namedtuple +from dataclasses import dataclass +from pathlib import Path +from typing import Literal + +logger = logging.getLogger(__name__) + +AquisitionData = namedtuple("AquisitionData", ["optical", "arcticdem", "tcvis", "data_masks"]) + + +@dataclass +class _BasePipeline: + """Base class for all pipelines. + + This class provides the run method which is the main entry point for all pipelines. + + This class is meant to be subclassed by the specific pipelines. + These specific pipelines must implement the following methods: + + - "_path_generator" which generates the paths to the data (e.g. through Source Mixin) + - "_get_data" which loads the data for a given path + - "_preprocess" which preprocesses the data (e.g. through Processing Mixin) + + It is possible to implement these functions, by subclassing other mixins, e.g. _S2Mixin. + + The main class must be also a dataclass, to fully inherit all parameter of this class (and the mixins). + """ + + output_data_dir: Path + tcvis_dir: Path + model_dir: Path + tcvis_model_name: str + notcvis_model_name: str + device: Literal["cuda", "cpu", "auto"] | int | None + ee_project: str | None + ee_use_highvolume: bool + patch_size: int + overlap: int + batch_size: int + reflection: int + binarization_threshold: float + mask_erosion_size: int + min_object_size: int + use_quality_mask: bool + write_model_outputs: bool + + # These would be the type hints for the methods that need to be implemented + # Leaving them uncommented would result in a NotImplementedError if Mixins are used + # def _path_generator(self) -> Generator[tuple[Path, Path]]: + # raise NotImplementedError + + # def _get_data(self, fpath: Path) -> AquisitionData: + # raise NotImplementedError + + # def _preprocess(self, aqdata: AquisitionData) -> xr.Dataset: + # raise NotImplementedError + + def run(self): + import torch + from darts_ensemble.ensemble_v1 import EnsembleV1 + from darts_export.inference import InferenceResultWriter + from darts_postprocessing import prepare_export + from dask.distributed import Client, LocalCluster + from odc.stac import configure_rio + + from darts.utils.cuda import debug_info, decide_device + from darts.utils.earthengine import init_ee + + debug_info() + self.device = decide_device(self.device) + init_ee(self.ee_project, self.ee_use_highvolume) + + ensemble = EnsembleV1( + self.model_dir / self.tcvis_model_name, + self.model_dir / self.notcvis_model_name, + device=torch.device(self.device), + ) + + # Init Dask stuff with a context manager + with LocalCluster(n_workers=mp.cpu_count() - 1) as cluster, Client(cluster) as client: + logger.info(f"Using Dask client: {client}") + configure_rio(cloud_defaults=True, aws={"aws_unsigned": True}, client=client) + logger.info("Configured Rasterio with Dask") + + # Iterate over all the data (_path_generator) + for fpath, outpath in self._path_generator(): + try: + aqdata = self._get_data(fpath) + tile = self._preprocess(aqdata) + + tile = ensemble.segment_tile( + tile, + patch_size=self.patch_size, + overlap=self.overlap, + batch_size=self.batch_size, + reflection=self.reflection, + keep_inputs=self.write_model_outputs, + ) + tile = prepare_export( + tile, + self.binarization_threshold, + self.mask_erosion_size, + self.min_object_size, + self.use_quality_mask, + self.device, + ) + + outpath.mkdir(parents=True, exist_ok=True) + writer = InferenceResultWriter(tile) + writer.export_probabilities(outpath) + writer.export_binarized(outpath) + writer.export_polygonized(outpath) + except KeyboardInterrupt: + logger.warning("Keyboard interrupt detected.\nExiting...") + break + except Exception as e: + logger.warning(f"Could not process folder '{fpath.resolve()}'.\nSkipping...") + logger.exception(e) + + +# ============================================================================= +# Processing mixins (they provide _preprocess method) +# ============================================================================= +@dataclass +class _VRTMixin: + arcticdem_slope_vrt: Path + arcticdem_elevation_vrt: Path + + def _preprocess(self, aqdata: AquisitionData): + from darts_preprocessing import preprocess_legacy + + return preprocess_legacy(aqdata.optical, aqdata.arcticdem, aqdata.tcvis, aqdata.data_masks) + + +@dataclass +class _FastMixin: + arcticdem_dir: Path + tpi_outer_radius: int + tpi_inner_radius: int + + def _preprocess(self, aqdata: AquisitionData): + from darts_preprocessing import preprocess_legacy_fast + + return preprocess_legacy_fast( + aqdata.optical, + aqdata.arcticdem, + aqdata.tcvis, + aqdata.data_masks, + self.tpi_outer_radius, + self.tpi_inner_radius, + self.device, + ) + + +# ============================================================================= +# Source mixins (they provide _path_generator method) +# ============================================================================= +@dataclass +class _PlanetMixin: + orthotiles_dir: Path + scenes_dir: Path + + def _path_generator(self): + # Find all PlanetScope orthotiles + for fpath in self.orthotiles_dir.glob("*/*/"): + tile_id = fpath.parent.name + scene_id = fpath.name + outpath = self.output_data_dir / tile_id / scene_id + yield fpath, outpath + + # Find all PlanetScope scenes + for fpath in self.scenes_dir.glob("*/"): + scene_id = fpath.name + outpath = self.output_data_dir / scene_id + yield fpath, outpath + + +@dataclass +class _S2Mixin: + sentinel2_dir: Path + + def _path_generator(self): + for fpath in self.sentinel2_dir.glob("*/"): + scene_id = fpath.name + outpath = self.output_data_dir / scene_id + yield fpath, outpath diff --git a/darts/src/darts/legacy_pipeline/legacy.py b/darts/src/darts/legacy_pipeline/legacy.py deleted file mode 100644 index 374cc58..0000000 --- a/darts/src/darts/legacy_pipeline/legacy.py +++ /dev/null @@ -1,333 +0,0 @@ -"""Legacy Pipeline without any other framework.""" - -import logging -from collections.abc import Generator -from pathlib import Path -from typing import Literal - -from darts.legacy_pipeline.shared import AquisitionData, _load_planet, _load_s2, _segment_and_export - -logger = logging.getLogger(__name__) - - -def _process( - data_generator: Generator[tuple[Path, Path, AquisitionData], None, None], - model_dir: Path, - tcvis_model_name: str, - notcvis_model_name: str, - device: Literal["cuda", "cpu", "auto"] | int | None, - ee_project: str | None, - ee_use_highvolume: bool, - patch_size: int, - overlap: int, - batch_size: int, - reflection: int, - binarization_threshold: float, - mask_erosion_size: int, - min_object_size: int, - use_quality_mask: bool, - write_model_outputs: bool, -): - # Import here to avoid long loading times when running other commands - import torch - from darts_ensemble.ensemble_v1 import EnsembleV1 - from darts_preprocessing import preprocess_legacy - - from darts.utils.cuda import debug_info, decide_device - from darts.utils.earthengine import init_ee - - debug_info() - device = decide_device(device) - init_ee(ee_project, ee_use_highvolume) - - ensemble = EnsembleV1( - model_dir / tcvis_model_name, - model_dir / notcvis_model_name, - device=torch.device(device), - ) - - for fpath, outpath, aqdata in data_generator: - try: - tile = preprocess_legacy(aqdata.optical, aqdata.arcticdem, aqdata.tcvis, aqdata.data_masks) - - _segment_and_export( - tile, - ensemble, - outpath, - device, - patch_size, - overlap, - batch_size, - reflection, - binarization_threshold, - mask_erosion_size, - min_object_size, - use_quality_mask, - write_model_outputs, - ) - except Exception as e: - logger.warning(f"Could not process folder '{fpath.resolve()}'.\nSkipping...") - logger.exception(e) - - -def run_native_planet_pipeline( - *, - orthotiles_dir: Path, - scenes_dir: Path, - output_data_dir: Path, - arcticdem_slope_vrt: Path, - arcticdem_elevation_vrt: Path, - tcvis_dir: Path, - model_dir: Path, - tcvis_model_name: str = "RTS_v6_tcvis.pt", - notcvis_model_name: str = "RTS_v6_notcvis.pt", - device: Literal["cuda", "cpu", "auto"] | int | None = None, - ee_project: str | None = None, - ee_use_highvolume: bool = True, - patch_size: int = 1024, - overlap: int = 16, - batch_size: int = 8, - reflection: int = 0, - binarization_threshold: float = 0.5, - mask_erosion_size: int = 10, - min_object_size: int = 32, - use_quality_mask: bool = False, - write_model_outputs: bool = False, -): - """Search for all PlanetScope scenes in the given directory and runs the segmentation pipeline on them. - - Args: - orthotiles_dir (Path): The directory containing the PlanetScope orthotiles. - scenes_dir (Path): The directory containing the PlanetScope scenes. - output_data_dir (Path): The "output" directory. - arcticdem_slope_vrt (Path): The path to the ArcticDEM slope VRT file. - arcticdem_elevation_vrt (Path): The path to the ArcticDEM elevation VRT file. - tcvis_dir (Path): The directory containing the TCVis data. - model_dir (Path): The path to the models to use for segmentation. - tcvis_model_name (str, optional): The name of the model to use for TCVis. Defaults to "RTS_v6_tcvis.pt". - notcvis_model_name (str, optional): The name of the model to use for not TCVis. Defaults to "RTS_v6_notcvis.pt". - device (Literal["cuda", "cpu"] | int, optional): The device to run the model on. - If "cuda" take the first device (0), if int take the specified device. - If "auto" try to automatically select a free GPU (<50% memory usage). - Defaults to "cuda" if available, else "cpu". - ee_project (str, optional): The Earth Engine project ID or number to use. May be omitted if - project is defined within persistent API credentials obtained via `earthengine authenticate`. - ee_use_highvolume (bool, optional): Whether to use the high volume server (https://earthengine-highvolume.googleapis.com). - patch_size (int, optional): The patch size to use for inference. Defaults to 1024. - overlap (int, optional): The overlap to use for inference. Defaults to 16. - batch_size (int, optional): The batch size to use for inference. Defaults to 8. - reflection (int, optional): The reflection padding to use for inference. Defaults to 0. - binarization_threshold (float, optional): The threshold to binarize the probabilities. Defaults to 0.5. - mask_erosion_size (int, optional): The size of the disk to use for mask erosion and the edge-cropping. - Defaults to 10. - min_object_size (int, optional): The minimum object size to keep in pixel. Defaults to 32. - use_quality_mask (bool, optional): Whether to use the "quality" mask instead of the "valid" mask - to mask the output. - write_model_outputs (bool, optional): Also save the model outputs, not only the ensemble result. - Defaults to False. - - Examples: - ### PS Orthotile - - Data directory structure: - - ```sh - data/input - ├── ArcticDEM - │ ├── elevation.vrt - │ ├── slope.vrt - │ ├── relative_elevation - │ │ └── 4372514_relative_elevation_100.tif - │ └── slope - │ └── 4372514_slope.tif - └── planet - └── PSOrthoTile - └── 4372514/5790392_4372514_2022-07-16_2459 - ├── 5790392_4372514_2022-07-16_2459_BGRN_Analytic_metadata.xml - ├── 5790392_4372514_2022-07-16_2459_BGRN_DN_udm.tif - ├── 5790392_4372514_2022-07-16_2459_BGRN_SR.tif - ├── 5790392_4372514_2022-07-16_2459_metadata.json - └── 5790392_4372514_2022-07-16_2459_udm2.tif - ``` - - then the config should be - - ``` - ... - orthotiles_dir: data/input/planet/PSOrthoTile - arcticdem_slope_vrt: data/input/ArcticDEM/slope.vrt - arcticdem_elevation_vrt: data/input/ArcticDEM/elevation.vrt - ``` - - ### PS Scene - - Data directory structure: - - ```sh - data/input - ├── ArcticDEM - │ ├── elevation.vrt - │ ├── slope.vrt - │ ├── relative_elevation - │ │ └── 4372514_relative_elevation_100.tif - │ └── slope - │ └── 4372514_slope.tif - └── planet - └── PSScene - └── 20230703_194241_43_2427 - ├── 20230703_194241_43_2427_3B_AnalyticMS_metadata.xml - ├── 20230703_194241_43_2427_3B_AnalyticMS_SR.tif - ├── 20230703_194241_43_2427_3B_udm2.tif - ├── 20230703_194241_43_2427_metadata.json - └── 20230703_194241_43_2427.json - ``` - - then the config should be - - ``` - ... - scenes_dir: data/input/planet/PSScene - arcticdem_slope_vrt: data/input/ArcticDEM/slope.vrt - arcticdem_elevation_vrt: data/input/ArcticDEM/elevation.vrt - ``` - - - """ - data_generator = _load_planet( - orthotiles_dir, - scenes_dir, - output_data_dir, - arcticdem_slope_vrt, - arcticdem_elevation_vrt, - tcvis_dir, - tpi_outer_radius=10, - ) - _process( - data_generator, - model_dir, - tcvis_model_name, - notcvis_model_name, - device, - ee_project, - ee_use_highvolume, - patch_size, - overlap, - batch_size, - reflection, - binarization_threshold, - mask_erosion_size, - min_object_size, - use_quality_mask, - write_model_outputs, - ) - - -def run_native_sentinel2_pipeline( - *, - sentinel2_dir: Path, - output_data_dir: Path, - arcticdem_slope_vrt: Path, - arcticdem_elevation_vrt: Path, - tcvis_dir: Path, - model_dir: Path, - tcvis_model_name: str = "RTS_v6_tcvis_s2native.pt", - notcvis_model_name: str = "RTS_v6_notcvis_s2native.pt", - device: Literal["cuda", "cpu", "auto"] | int | None = None, - ee_project: str | None = None, - ee_use_highvolume: bool = True, - patch_size: int = 1024, - overlap: int = 16, - batch_size: int = 8, - reflection: int = 0, - binarization_threshold: float = 0.5, - mask_erosion_size: int = 10, - min_object_size: int = 32, - use_quality_mask: bool = False, - write_model_outputs: bool = False, -): - """Search for all Sentinel scenes in the given directory and runs the segmentation pipeline on them. - - Args: - sentinel2_dir (Path): The directory containing the Sentinel 2 scenes. - output_data_dir (Path): The "output" directory. - arcticdem_slope_vrt (Path): The path to the ArcticDEM slope VRT file. - arcticdem_elevation_vrt (Path): The path to the ArcticDEM elevation VRT file. - tcvis_dir (Path): The directory containing the TCVis data. - model_dir (Path): The path to the models to use for segmentation. - tcvis_model_name (str, optional): The name of the model to use for TCVis. Defaults to "RTS_v6_tcvis.pt". - notcvis_model_name (str, optional): The name of the model to use for not TCVis. Defaults to "RTS_v6_notcvis.pt". - device (Literal["cuda", "cpu"] | int, optional): The device to run the model on. - If "cuda" take the first device (0), if int take the specified device. - If "auto" try to automatically select a free GPU (<50% memory usage). - Defaults to "cuda" if available, else "cpu". - ee_project (str, optional): The Earth Engine project ID or number to use. May be omitted if - project is defined within persistent API credentials obtained via `earthengine authenticate`. - ee_use_highvolume (bool, optional): Whether to use the high volume server (https://earthengine-highvolume.googleapis.com). - patch_size (int, optional): The patch size to use for inference. Defaults to 1024. - overlap (int, optional): The overlap to use for inference. Defaults to 16. - batch_size (int, optional): The batch size to use for inference. Defaults to 8. - reflection (int, optional): The reflection padding to use for inference. Defaults to 0. - binarization_threshold (float, optional): The threshold to binarize the probabilities. Defaults to 0.5. - mask_erosion_size (int, optional): The size of the disk to use for mask erosion and the edge-cropping. - Defaults to 10. - min_object_size (int, optional): The minimum object size to keep in pixel. Defaults to 32. - use_quality_mask (bool, optional): Whether to use the "quality" mask instead of the "valid" mask - to mask the output. - write_model_outputs (bool, optional): Also save the model outputs, not only the ensemble result. - Defaults to False. - - Examples: - Data directory structure: - - ```sh - data/input - ├── ArcticDEM - │ ├── elevation.vrt - │ ├── slope.vrt - │ ├── relative_elevation - │ │ └── 4372514_relative_elevation_100.tif - │ └── slope - │ └── 4372514_slope.tif - └── sentinel2 - └── 20220826T200911_20220826T200905_T17XMJ/ - ├── 20220826T200911_20220826T200905_T17XMJ_SCL_clip.tif - └── 20220826T200911_20220826T200905_T17XMJ_SR_clip.tif - ``` - - then the config should be - - ``` - ... - sentinel2_dir: data/input/sentinel2 - arcticdem_slope_vrt: data/input/ArcticDEM/slope.vrt - arcticdem_elevation_vrt: data/input/ArcticDEM/elevation.vrt - ``` - - - """ - data_generator = _load_s2( - sentinel2_dir, - output_data_dir, - arcticdem_slope_vrt, - arcticdem_elevation_vrt, - tcvis_dir, - tpi_outer_radius=10, - ) - _process( - data_generator, - model_dir, - tcvis_model_name, - notcvis_model_name, - device, - ee_project, - ee_use_highvolume, - patch_size, - overlap, - batch_size, - reflection, - binarization_threshold, - mask_erosion_size, - min_object_size, - use_quality_mask, - write_model_outputs, - ) diff --git a/darts/src/darts/legacy_pipeline/legacy_fast.py b/darts/src/darts/legacy_pipeline/legacy_fast.py deleted file mode 100644 index 98513e9..0000000 --- a/darts/src/darts/legacy_pipeline/legacy_fast.py +++ /dev/null @@ -1,273 +0,0 @@ -"""Legacy Pipeline without any other framework, but a faster and improved version.""" - -import logging -from collections.abc import Generator -from pathlib import Path -from typing import Literal - -from darts.legacy_pipeline.shared import AquisitionData, _load_planet, _load_s2, _segment_and_export - -logger = logging.getLogger(__name__) - - -def _process_fast( - data_generator: Generator[tuple[Path, Path, AquisitionData], None, None], - model_dir: Path, - tcvis_model_name: str, - notcvis_model_name: str, - device: Literal["cuda", "cpu", "auto"] | int | None, - ee_project: str | None, - ee_use_highvolume: bool, - tpi_outer_radius: int, - tpi_inner_radius: int, - patch_size: int, - overlap: int, - batch_size: int, - reflection: int, - binarization_threshold: float, - mask_erosion_size: int, - min_object_size: int, - use_quality_mask: bool, - write_model_outputs: bool, -): - # Import here to avoid long loading times when running other commands - import torch - from darts_ensemble.ensemble_v1 import EnsembleV1 - from darts_preprocessing import preprocess_legacy_fast - from dask.distributed import Client - from odc.stac import configure_rio - - from darts.utils.cuda import debug_info, decide_device - from darts.utils.earthengine import init_ee - - debug_info() - device = decide_device(device) - init_ee(ee_project, ee_use_highvolume) - - client = Client() - logger.info(f"Using Dask client: {client}") - configure_rio(cloud_defaults=True, aws={"aws_unsigned": True}, client=client) - logger.info("Configured Rasterio with Dask") - - ensemble = EnsembleV1( - model_dir / tcvis_model_name, - model_dir / notcvis_model_name, - device=torch.device(device), - ) - - for fpath, outpath, aqdata in data_generator: - try: - tile = preprocess_legacy_fast( - aqdata.optical, - aqdata.arcticdem, - aqdata.tcvis, - aqdata.data_masks, - tpi_outer_radius, - tpi_inner_radius, - device, - ) - - _segment_and_export( - tile, - ensemble, - outpath, - device, - patch_size, - overlap, - batch_size, - reflection, - binarization_threshold, - mask_erosion_size, - min_object_size, - use_quality_mask, - write_model_outputs, - ) - except Exception as e: - logger.warning(f"Could not process folder '{fpath.resolve()}'.\nSkipping...") - logger.exception(e) - - -def run_native_planet_pipeline_fast( - *, - orthotiles_dir: Path, - scenes_dir: Path, - output_data_dir: Path, - arcticdem_dir: Path, - tcvis_dir: Path, - model_dir: Path, - tcvis_model_name: str = "RTS_v6_tcvis.pt", - notcvis_model_name: str = "RTS_v6_notcvis.pt", - device: Literal["cuda", "cpu", "auto"] | int | None = None, - ee_project: str | None = None, - ee_use_highvolume: bool = True, - tpi_outer_radius: int = 100, - tpi_inner_radius: int = 0, - patch_size: int = 1024, - overlap: int = 16, - batch_size: int = 8, - reflection: int = 0, - binarization_threshold: float = 0.5, - mask_erosion_size: int = 10, - min_object_size: int = 32, - use_quality_mask: bool = False, - write_model_outputs: bool = False, -): - """Search for all PlanetScope scenes in the given directory and runs the segmentation pipeline on them. - - Loads the ArcticDEM from a datacube instead of VRT which is a lot faster and does not need manual preprocessing. - - Args: - orthotiles_dir (Path): The directory containing the PlanetScope orthotiles. - scenes_dir (Path): The directory containing the PlanetScope scenes. - output_data_dir (Path): The "output" directory. - arcticdem_dir (Path): The directory containing the ArcticDEM data (the datacube and the extent files). - Will be created and downloaded if it does not exist. - tcvis_dir (Path): The directory containing the TCVis data. - model_dir (Path): The path to the models to use for segmentation. - tcvis_model_name (str, optional): The name of the model to use for TCVis. Defaults to "RTS_v6_tcvis.pt". - notcvis_model_name (str, optional): The name of the model to use for not TCVis. Defaults to "RTS_v6_notcvis.pt". - device (Literal["cuda", "cpu"] | int, optional): The device to run the model on. - If "cuda" take the first device (0), if int take the specified device. - If "auto" try to automatically select a free GPU (<50% memory usage). - Defaults to "cuda" if available, else "cpu". - ee_project (str, optional): The Earth Engine project ID or number to use. May be omitted if - project is defined within persistent API credentials obtained via `earthengine authenticate`. - ee_use_highvolume (bool, optional): Whether to use the high volume server (https://earthengine-highvolume.googleapis.com). - tpi_outer_radius (int, optional): The outer radius of the annulus kernel for the tpi calculation - in m. Defaults 100m. - tpi_inner_radius (int, optional): The inner radius of the annulus kernel for the tpi calculation - in m. Defaults to 0. - patch_size (int, optional): The patch size to use for inference. Defaults to 1024. - overlap (int, optional): The overlap to use for inference. Defaults to 16. - batch_size (int, optional): The batch size to use for inference. Defaults to 8. - reflection (int, optional): The reflection padding to use for inference. Defaults to 0. - binarization_threshold (float, optional): The threshold to binarize the probabilities. Defaults to 0.5. - mask_erosion_size (int, optional): The size of the disk to use for mask erosion and the edge-cropping. - Defaults to 10. - min_object_size (int, optional): The minimum object size to keep in pixel. Defaults to 32. - use_quality_mask (bool, optional): Whether to use the "quality" mask instead of the "valid" mask - to mask the output. - write_model_outputs (bool, optional): Also save the model outputs, not only the ensemble result. - Defaults to False. - - """ - data_generator = _load_planet( - orthotiles_dir, - scenes_dir, - output_data_dir, - arcticdem_dir, - tcvis_dir, - tpi_outer_radius, - ) - _process_fast( - data_generator, - model_dir, - tcvis_model_name, - notcvis_model_name, - device, - ee_project, - ee_use_highvolume, - tpi_outer_radius, - tpi_inner_radius, - patch_size, - overlap, - batch_size, - reflection, - binarization_threshold, - mask_erosion_size, - min_object_size, - use_quality_mask, - write_model_outputs, - ) - - -def run_native_sentinel2_pipeline_fast( - *, - sentinel2_dir: Path, - output_data_dir: Path, - arcticdem_dir: Path, - tcvis_dir: Path, - model_dir: Path, - tcvis_model_name: str = "RTS_v6_tcvis_s2native.pt", - notcvis_model_name: str = "RTS_v6_notcvis_s2native.pt", - device: Literal["cuda", "cpu", "auto"] | int | None = None, - ee_project: str | None = None, - ee_use_highvolume: bool = True, - tpi_outer_radius: int = 100, - tpi_inner_radius: int = 0, - patch_size: int = 1024, - overlap: int = 16, - batch_size: int = 8, - reflection: int = 0, - binarization_threshold: float = 0.5, - mask_erosion_size: int = 10, - min_object_size: int = 32, - use_quality_mask: bool = False, - write_model_outputs: bool = False, -): - """Search for all Sentinel 2 scenes in the given directory and runs the segmentation pipeline on them. - - Loads the ArcticDEM from a datacube instead of VRT which is a lot faster and does not need manual preprocessing. - - Args: - sentinel2_dir (Path): The directory containing the Sentinel 2 scenes. - scenes_dir (Path): The directory containing the PlanetScope scenes. - output_data_dir (Path): The "output" directory. - arcticdem_dir (Path): The directory containing the ArcticDEM data (the datacube and the extent files). - Will be created and downloaded if it does not exist. - tcvis_dir (Path): The directory containing the TCVis data. - model_dir (Path): The path to the models to use for segmentation. - tcvis_model_name (str, optional): The name of the model to use for TCVis. Defaults to "RTS_v6_tcvis.pt". - notcvis_model_name (str, optional): The name of the model to use for not TCVis. Defaults to "RTS_v6_notcvis.pt". - device (Literal["cuda", "cpu"] | int, optional): The device to run the model on. - If "cuda" take the first device (0), if int take the specified device. - If "auto" try to automatically select a free GPU (<50% memory usage). - Defaults to "cuda" if available, else "cpu". - ee_project (str, optional): The Earth Engine project ID or number to use. May be omitted if - project is defined within persistent API credentials obtained via `earthengine authenticate`. - ee_use_highvolume (bool, optional): Whether to use the high volume server (https://earthengine-highvolume.googleapis.com). - tpi_outer_radius (int, optional): The outer radius of the annulus kernel for the tpi calculation - in m. Defaults to 100m. - tpi_inner_radius (int, optional): The inner radius of the annulus kernel for the tpi calculation - in m. Defaults to 0. - patch_size (int, optional): The patch size to use for inference. Defaults to 1024. - overlap (int, optional): The overlap to use for inference. Defaults to 16. - batch_size (int, optional): The batch size to use for inference. Defaults to 8. - reflection (int, optional): The reflection padding to use for inference. Defaults to 0. - binarization_threshold (float, optional): The threshold to binarize the probabilities. Defaults to 0.5. - mask_erosion_size (int, optional): The size of the disk to use for mask erosion and the edge-cropping. - Defaults to 10. - min_object_size (int, optional): The minimum object size to keep in pixel. Defaults to 32. - use_quality_mask (bool, optional): Whether to use the "quality" mask instead of the "valid" mask - to mask the output. - write_model_outputs (bool, optional): Also save the model outputs, not only the ensemble result. - Defaults to False. - - """ - data_generator = _load_s2( - sentinel2_dir, - output_data_dir, - arcticdem_dir, - tcvis_dir, - tpi_outer_radius, - ) - _process_fast( - data_generator, - model_dir, - tcvis_model_name, - notcvis_model_name, - device, - ee_project, - ee_use_highvolume, - tpi_outer_radius, - tpi_inner_radius, - patch_size, - overlap, - batch_size, - reflection, - binarization_threshold, - mask_erosion_size, - min_object_size, - use_quality_mask, - write_model_outputs, - ) diff --git a/darts/src/darts/legacy_pipeline/planet.py b/darts/src/darts/legacy_pipeline/planet.py new file mode 100644 index 0000000..95e2756 --- /dev/null +++ b/darts/src/darts/legacy_pipeline/planet.py @@ -0,0 +1,170 @@ +"""Legacy pipeline for Planet data.""" + +from dataclasses import dataclass +from pathlib import Path +from typing import Literal + +from darts.legacy_pipeline._base import AquisitionData, _BasePipeline, _PlanetMixin, _VRTMixin + + +@dataclass +class _LegacyNativePlanetPipeline(_BasePipeline, _PlanetMixin, _VRTMixin): + def _get_data(self, fpath: Path): + from darts_acquisition.arcticdem import load_arcticdem_from_vrt + from darts_acquisition.planet import load_planet_masks, load_planet_scene + from darts_acquisition.tcvis import load_tcvis + + optical = load_planet_scene(fpath) + arcticdem = load_arcticdem_from_vrt(self.arcticdem_slope_vrt, self.arcticdem_elevation_vrt, optical) + tcvis = load_tcvis(optical.odc.geobox, self.tcvis_dir) + data_masks = load_planet_masks(fpath) + aqdata = AquisitionData(optical, arcticdem, tcvis, data_masks) + return aqdata + + +def run_native_planet_pipeline( + *, + orthotiles_dir: Path, + scenes_dir: Path, + output_data_dir: Path, + arcticdem_slope_vrt: Path, + arcticdem_elevation_vrt: Path, + tcvis_dir: Path, + model_dir: Path, + tcvis_model_name: str = "RTS_v6_tcvis.pt", + notcvis_model_name: str = "RTS_v6_notcvis.pt", + device: Literal["cuda", "cpu", "auto"] | int | None = None, + ee_project: str | None = None, + ee_use_highvolume: bool = True, + patch_size: int = 1024, + overlap: int = 16, + batch_size: int = 8, + reflection: int = 0, + binarization_threshold: float = 0.5, + mask_erosion_size: int = 10, + min_object_size: int = 32, + use_quality_mask: bool = False, + write_model_outputs: bool = False, +): + """Search for all PlanetScope scenes in the given directory and runs the segmentation pipeline on them. + + Args: + orthotiles_dir (Path): The directory containing the PlanetScope orthotiles. + scenes_dir (Path): The directory containing the PlanetScope scenes. + output_data_dir (Path): The "output" directory. + arcticdem_slope_vrt (Path): The path to the ArcticDEM slope VRT file. + arcticdem_elevation_vrt (Path): The path to the ArcticDEM elevation VRT file. + tcvis_dir (Path): The directory containing the TCVis data. + model_dir (Path): The path to the models to use for segmentation. + tcvis_model_name (str, optional): The name of the model to use for TCVis. Defaults to "RTS_v6_tcvis.pt". + notcvis_model_name (str, optional): The name of the model to use for not TCVis. Defaults to "RTS_v6_notcvis.pt". + device (Literal["cuda", "cpu"] | int, optional): The device to run the model on. + If "cuda" take the first device (0), if int take the specified device. + If "auto" try to automatically select a free GPU (<50% memory usage). + Defaults to "cuda" if available, else "cpu". + ee_project (str, optional): The Earth Engine project ID or number to use. May be omitted if + project is defined within persistent API credentials obtained via `earthengine authenticate`. + ee_use_highvolume (bool, optional): Whether to use the high volume server (https://earthengine-highvolume.googleapis.com). + patch_size (int, optional): The patch size to use for inference. Defaults to 1024. + overlap (int, optional): The overlap to use for inference. Defaults to 16. + batch_size (int, optional): The batch size to use for inference. Defaults to 8. + reflection (int, optional): The reflection padding to use for inference. Defaults to 0. + binarization_threshold (float, optional): The threshold to binarize the probabilities. Defaults to 0.5. + mask_erosion_size (int, optional): The size of the disk to use for mask erosion and the edge-cropping. + Defaults to 10. + min_object_size (int, optional): The minimum object size to keep in pixel. Defaults to 32. + use_quality_mask (bool, optional): Whether to use the "quality" mask instead of the "valid" mask + to mask the output. + write_model_outputs (bool, optional): Also save the model outputs, not only the ensemble result. + Defaults to False. + + Examples: + ### PS Orthotile + + Data directory structure: + + ```sh + data/input + ├── ArcticDEM + │ ├── elevation.vrt + │ ├── slope.vrt + │ ├── relative_elevation + │ │ └── 4372514_relative_elevation_100.tif + │ └── slope + │ └── 4372514_slope.tif + └── planet + └── PSOrthoTile + └── 4372514/5790392_4372514_2022-07-16_2459 + ├── 5790392_4372514_2022-07-16_2459_BGRN_Analytic_metadata.xml + ├── 5790392_4372514_2022-07-16_2459_BGRN_DN_udm.tif + ├── 5790392_4372514_2022-07-16_2459_BGRN_SR.tif + ├── 5790392_4372514_2022-07-16_2459_metadata.json + └── 5790392_4372514_2022-07-16_2459_udm2.tif + ``` + + then the config should be + + ``` + ... + orthotiles_dir: data/input/planet/PSOrthoTile + arcticdem_slope_vrt: data/input/ArcticDEM/slope.vrt + arcticdem_elevation_vrt: data/input/ArcticDEM/elevation.vrt + ``` + + ### PS Scene + + Data directory structure: + + ```sh + data/input + ├── ArcticDEM + │ ├── elevation.vrt + │ ├── slope.vrt + │ ├── relative_elevation + │ │ └── 4372514_relative_elevation_100.tif + │ └── slope + │ └── 4372514_slope.tif + └── planet + └── PSScene + └── 20230703_194241_43_2427 + ├── 20230703_194241_43_2427_3B_AnalyticMS_metadata.xml + ├── 20230703_194241_43_2427_3B_AnalyticMS_SR.tif + ├── 20230703_194241_43_2427_3B_udm2.tif + ├── 20230703_194241_43_2427_metadata.json + └── 20230703_194241_43_2427.json + ``` + + then the config should be + + ``` + ... + scenes_dir: data/input/planet/PSScene + arcticdem_slope_vrt: data/input/ArcticDEM/slope.vrt + arcticdem_elevation_vrt: data/input/ArcticDEM/elevation.vrt + ``` + + + """ + _LegacyNativePlanetPipeline( + orthotiles_dir=orthotiles_dir, + scenes_dir=scenes_dir, + output_data_dir=output_data_dir, + arcticdem_elevation_vrt=arcticdem_elevation_vrt, + arcticdem_slope_vrt=arcticdem_slope_vrt, + tcvis_dir=tcvis_dir, + model_dir=model_dir, + tcvis_model_name=tcvis_model_name, + notcvis_model_name=notcvis_model_name, + device=device, + ee_project=ee_project, + ee_use_highvolume=ee_use_highvolume, + patch_size=patch_size, + overlap=overlap, + batch_size=batch_size, + reflection=reflection, + binarization_threshold=binarization_threshold, + mask_erosion_size=mask_erosion_size, + min_object_size=min_object_size, + use_quality_mask=use_quality_mask, + write_model_outputs=write_model_outputs, + ).run() diff --git a/darts/src/darts/legacy_pipeline/planet_fast.py b/darts/src/darts/legacy_pipeline/planet_fast.py new file mode 100644 index 0000000..9911e5d --- /dev/null +++ b/darts/src/darts/legacy_pipeline/planet_fast.py @@ -0,0 +1,115 @@ +"""Legacy pipeline for Planet data with optimized preprocessing.""" + +from dataclasses import dataclass +from math import ceil, sqrt +from pathlib import Path +from typing import Literal + +from darts.legacy_pipeline._base import AquisitionData, _BasePipeline, _FastMixin, _PlanetMixin + + +@dataclass +class _LegacyNativePlanetPipelineFast(_BasePipeline, _PlanetMixin, _FastMixin): + def _get_data(self, fpath: Path): + from darts_acquisition.arcticdem import load_arcticdem_tile + from darts_acquisition.planet import load_planet_masks, load_planet_scene + from darts_acquisition.tcvis import load_tcvis + + optical = load_planet_scene(fpath) + arcticdem = load_arcticdem_tile( + optical.odc.geobox, self.arcticdem_dir, resolution=10, buffer=ceil(self.tpi_outer_radius / 10 * sqrt(2)) + ) + tcvis = load_tcvis(optical.odc.geobox, self.tcvis_dir) + data_masks = load_planet_masks(fpath) + aqdata = AquisitionData(optical, arcticdem, tcvis, data_masks) + return aqdata + + +def run_native_planet_pipeline_fast( + *, + orthotiles_dir: Path, + scenes_dir: Path, + output_data_dir: Path, + arcticdem_dir: Path, + tcvis_dir: Path, + model_dir: Path, + tcvis_model_name: str = "RTS_v6_tcvis.pt", + notcvis_model_name: str = "RTS_v6_notcvis.pt", + device: Literal["cuda", "cpu", "auto"] | int | None = None, + ee_project: str | None = None, + ee_use_highvolume: bool = True, + tpi_outer_radius: int = 100, + tpi_inner_radius: int = 0, + patch_size: int = 1024, + overlap: int = 16, + batch_size: int = 8, + reflection: int = 0, + binarization_threshold: float = 0.5, + mask_erosion_size: int = 10, + min_object_size: int = 32, + use_quality_mask: bool = False, + write_model_outputs: bool = False, +): + """Search for all PlanetScope scenes in the given directory and runs the segmentation pipeline on them. + + Loads the ArcticDEM from a datacube instead of VRT which is a lot faster and does not need manual preprocessing. + + Args: + orthotiles_dir (Path): The directory containing the PlanetScope orthotiles. + scenes_dir (Path): The directory containing the PlanetScope scenes. + output_data_dir (Path): The "output" directory. + arcticdem_dir (Path): The directory containing the ArcticDEM data (the datacube and the extent files). + Will be created and downloaded if it does not exist. + tcvis_dir (Path): The directory containing the TCVis data. + model_dir (Path): The path to the models to use for segmentation. + tcvis_model_name (str, optional): The name of the model to use for TCVis. Defaults to "RTS_v6_tcvis.pt". + notcvis_model_name (str, optional): The name of the model to use for not TCVis. Defaults to "RTS_v6_notcvis.pt". + device (Literal["cuda", "cpu"] | int, optional): The device to run the model on. + If "cuda" take the first device (0), if int take the specified device. + If "auto" try to automatically select a free GPU (<50% memory usage). + Defaults to "cuda" if available, else "cpu". + ee_project (str, optional): The Earth Engine project ID or number to use. May be omitted if + project is defined within persistent API credentials obtained via `earthengine authenticate`. + ee_use_highvolume (bool, optional): Whether to use the high volume server (https://earthengine-highvolume.googleapis.com). + tpi_outer_radius (int, optional): The outer radius of the annulus kernel for the tpi calculation + in m. Defaults 100m. + tpi_inner_radius (int, optional): The inner radius of the annulus kernel for the tpi calculation + in m. Defaults to 0. + patch_size (int, optional): The patch size to use for inference. Defaults to 1024. + overlap (int, optional): The overlap to use for inference. Defaults to 16. + batch_size (int, optional): The batch size to use for inference. Defaults to 8. + reflection (int, optional): The reflection padding to use for inference. Defaults to 0. + binarization_threshold (float, optional): The threshold to binarize the probabilities. Defaults to 0.5. + mask_erosion_size (int, optional): The size of the disk to use for mask erosion and the edge-cropping. + Defaults to 10. + min_object_size (int, optional): The minimum object size to keep in pixel. Defaults to 32. + use_quality_mask (bool, optional): Whether to use the "quality" mask instead of the "valid" mask + to mask the output. + write_model_outputs (bool, optional): Also save the model outputs, not only the ensemble result. + Defaults to False. + + """ + _LegacyNativePlanetPipelineFast( + orthotiles_dir=orthotiles_dir, + scenes_dir=scenes_dir, + output_data_dir=output_data_dir, + arcticdem_dir=arcticdem_dir, + tcvis_dir=tcvis_dir, + model_dir=model_dir, + tcvis_model_name=tcvis_model_name, + notcvis_model_name=notcvis_model_name, + device=device, + ee_project=ee_project, + ee_use_highvolume=ee_use_highvolume, + tpi_outer_radius=tpi_outer_radius, + tpi_inner_radius=tpi_inner_radius, + patch_size=patch_size, + overlap=overlap, + batch_size=batch_size, + reflection=reflection, + binarization_threshold=binarization_threshold, + mask_erosion_size=mask_erosion_size, + min_object_size=min_object_size, + use_quality_mask=use_quality_mask, + write_model_outputs=write_model_outputs, + ).run() diff --git a/darts/src/darts/legacy_pipeline/s2.py b/darts/src/darts/legacy_pipeline/s2.py new file mode 100644 index 0000000..fe598e1 --- /dev/null +++ b/darts/src/darts/legacy_pipeline/s2.py @@ -0,0 +1,129 @@ +"""Legacy pipeline for Sentinel 2 data.""" + +from dataclasses import dataclass +from pathlib import Path +from typing import Literal + +from darts.legacy_pipeline._base import AquisitionData, _BasePipeline, _S2Mixin, _VRTMixin + + +@dataclass +class _LegacyNativeSentinel2Pipeline(_BasePipeline, _S2Mixin, _VRTMixin): + def _get_data(self, fpath: Path): + from darts_acquisition.arcticdem import load_arcticdem_from_vrt + from darts_acquisition.s2 import load_s2_masks, load_s2_scene + from darts_acquisition.tcvis import load_tcvis + + optical = load_s2_scene(fpath) + arcticdem = load_arcticdem_from_vrt(self.arcticdem_slope_vrt, self.arcticdem_elevation_vrt, optical) + tcvis = load_tcvis(optical.odc.geobox, self.tcvis_dir) + data_masks = load_s2_masks(fpath, optical.odc.geobox) + aqdata = AquisitionData(optical, arcticdem, tcvis, data_masks) + return aqdata + + +def run_native_sentinel2_pipeline( + *, + sentinel2_dir: Path, + output_data_dir: Path, + arcticdem_slope_vrt: Path, + arcticdem_elevation_vrt: Path, + tcvis_dir: Path, + model_dir: Path, + tcvis_model_name: str = "RTS_v6_tcvis_s2native.pt", + notcvis_model_name: str = "RTS_v6_notcvis_s2native.pt", + device: Literal["cuda", "cpu", "auto"] | int | None = None, + ee_project: str | None = None, + ee_use_highvolume: bool = True, + patch_size: int = 1024, + overlap: int = 16, + batch_size: int = 8, + reflection: int = 0, + binarization_threshold: float = 0.5, + mask_erosion_size: int = 10, + min_object_size: int = 32, + use_quality_mask: bool = False, + write_model_outputs: bool = False, +): + """Search for all Sentinel scenes in the given directory and runs the segmentation pipeline on them. + + Args: + sentinel2_dir (Path): The directory containing the Sentinel 2 scenes. + output_data_dir (Path): The "output" directory. + arcticdem_slope_vrt (Path): The path to the ArcticDEM slope VRT file. + arcticdem_elevation_vrt (Path): The path to the ArcticDEM elevation VRT file. + tcvis_dir (Path): The directory containing the TCVis data. + model_dir (Path): The path to the models to use for segmentation. + tcvis_model_name (str, optional): The name of the model to use for TCVis. Defaults to "RTS_v6_tcvis.pt". + notcvis_model_name (str, optional): The name of the model to use for not TCVis. Defaults to "RTS_v6_notcvis.pt". + device (Literal["cuda", "cpu"] | int, optional): The device to run the model on. + If "cuda" take the first device (0), if int take the specified device. + If "auto" try to automatically select a free GPU (<50% memory usage). + Defaults to "cuda" if available, else "cpu". + ee_project (str, optional): The Earth Engine project ID or number to use. May be omitted if + project is defined within persistent API credentials obtained via `earthengine authenticate`. + ee_use_highvolume (bool, optional): Whether to use the high volume server (https://earthengine-highvolume.googleapis.com). + patch_size (int, optional): The patch size to use for inference. Defaults to 1024. + overlap (int, optional): The overlap to use for inference. Defaults to 16. + batch_size (int, optional): The batch size to use for inference. Defaults to 8. + reflection (int, optional): The reflection padding to use for inference. Defaults to 0. + binarization_threshold (float, optional): The threshold to binarize the probabilities. Defaults to 0.5. + mask_erosion_size (int, optional): The size of the disk to use for mask erosion and the edge-cropping. + Defaults to 10. + min_object_size (int, optional): The minimum object size to keep in pixel. Defaults to 32. + use_quality_mask (bool, optional): Whether to use the "quality" mask instead of the "valid" mask + to mask the output. + write_model_outputs (bool, optional): Also save the model outputs, not only the ensemble result. + Defaults to False. + + Examples: + Data directory structure: + + ```sh + data/input + ├── ArcticDEM + │ ├── elevation.vrt + │ ├── slope.vrt + │ ├── relative_elevation + │ │ └── 4372514_relative_elevation_100.tif + │ └── slope + │ └── 4372514_slope.tif + └── sentinel2 + └── 20220826T200911_20220826T200905_T17XMJ/ + ├── 20220826T200911_20220826T200905_T17XMJ_SCL_clip.tif + └── 20220826T200911_20220826T200905_T17XMJ_SR_clip.tif + ``` + + then the config should be + + ``` + ... + sentinel2_dir: data/input/sentinel2 + arcticdem_slope_vrt: data/input/ArcticDEM/slope.vrt + arcticdem_elevation_vrt: data/input/ArcticDEM/elevation.vrt + ``` + + + """ + _LegacyNativeSentinel2Pipeline( + sentinel2_dir=sentinel2_dir, + output_data_dir=output_data_dir, + arcticdem_elevation_vrt=arcticdem_elevation_vrt, + arcticdem_slope_vrt=arcticdem_slope_vrt, + tcvis_dir=tcvis_dir, + model_dir=model_dir, + tcvis_model_name=tcvis_model_name, + notcvis_model_name=notcvis_model_name, + device=device, + ee_project=ee_project, + ee_use_highvolume=ee_use_highvolume, + patch_size=patch_size, + overlap=overlap, + batch_size=batch_size, + reflection=reflection, + binarization_threshold=binarization_threshold, + mask_erosion_size=mask_erosion_size, + min_object_size=min_object_size, + use_quality_mask=use_quality_mask, + write_model_outputs=write_model_outputs, + ).run() diff --git a/darts/src/darts/legacy_pipeline/s2_fast.py b/darts/src/darts/legacy_pipeline/s2_fast.py new file mode 100644 index 0000000..b45e197 --- /dev/null +++ b/darts/src/darts/legacy_pipeline/s2_fast.py @@ -0,0 +1,113 @@ +"""Legacy pipeline for Sentinel 2 data with optimized preprocessing.""" + +from dataclasses import dataclass +from math import ceil, sqrt +from pathlib import Path +from typing import Literal + +from darts.legacy_pipeline._base import AquisitionData, _BasePipeline, _FastMixin, _S2Mixin + + +@dataclass +class _LegacyNativeSentinel2PipelineFast(_BasePipeline, _S2Mixin, _FastMixin): + def _get_data(self, fpath: Path): + from darts_acquisition.arcticdem import load_arcticdem_tile + from darts_acquisition.s2 import load_s2_masks, load_s2_scene + from darts_acquisition.tcvis import load_tcvis + + optical = load_s2_scene(fpath) + arcticdem = load_arcticdem_tile( + optical.odc.geobox, self.arcticdem_dir, resolution=10, buffer=ceil(self.tpi_outer_radius / 10 * sqrt(2)) + ) + tcvis = load_tcvis(optical.odc.geobox, self.tcvis_dir) + data_masks = load_s2_masks(fpath, optical.odc.geobox) + aqdata = AquisitionData(optical, arcticdem, tcvis, data_masks) + return aqdata + + +def run_native_sentinel2_pipeline_fast( + *, + sentinel2_dir: Path, + output_data_dir: Path, + arcticdem_dir: Path, + tcvis_dir: Path, + model_dir: Path, + tcvis_model_name: str = "RTS_v6_tcvis_s2native.pt", + notcvis_model_name: str = "RTS_v6_notcvis_s2native.pt", + device: Literal["cuda", "cpu", "auto"] | int | None = None, + ee_project: str | None = None, + ee_use_highvolume: bool = True, + tpi_outer_radius: int = 100, + tpi_inner_radius: int = 0, + patch_size: int = 1024, + overlap: int = 16, + batch_size: int = 8, + reflection: int = 0, + binarization_threshold: float = 0.5, + mask_erosion_size: int = 10, + min_object_size: int = 32, + use_quality_mask: bool = False, + write_model_outputs: bool = False, +): + """Search for all Sentinel 2 scenes in the given directory and runs the segmentation pipeline on them. + + Loads the ArcticDEM from a datacube instead of VRT which is a lot faster and does not need manual preprocessing. + + Args: + sentinel2_dir (Path): The directory containing the Sentinel 2 scenes. + scenes_dir (Path): The directory containing the PlanetScope scenes. + output_data_dir (Path): The "output" directory. + arcticdem_dir (Path): The directory containing the ArcticDEM data (the datacube and the extent files). + Will be created and downloaded if it does not exist. + tcvis_dir (Path): The directory containing the TCVis data. + model_dir (Path): The path to the models to use for segmentation. + tcvis_model_name (str, optional): The name of the model to use for TCVis. Defaults to "RTS_v6_tcvis.pt". + notcvis_model_name (str, optional): The name of the model to use for not TCVis. Defaults to "RTS_v6_notcvis.pt". + device (Literal["cuda", "cpu"] | int, optional): The device to run the model on. + If "cuda" take the first device (0), if int take the specified device. + If "auto" try to automatically select a free GPU (<50% memory usage). + Defaults to "cuda" if available, else "cpu". + ee_project (str, optional): The Earth Engine project ID or number to use. May be omitted if + project is defined within persistent API credentials obtained via `earthengine authenticate`. + ee_use_highvolume (bool, optional): Whether to use the high volume server (https://earthengine-highvolume.googleapis.com). + tpi_outer_radius (int, optional): The outer radius of the annulus kernel for the tpi calculation + in m. Defaults to 100m. + tpi_inner_radius (int, optional): The inner radius of the annulus kernel for the tpi calculation + in m. Defaults to 0. + patch_size (int, optional): The patch size to use for inference. Defaults to 1024. + overlap (int, optional): The overlap to use for inference. Defaults to 16. + batch_size (int, optional): The batch size to use for inference. Defaults to 8. + reflection (int, optional): The reflection padding to use for inference. Defaults to 0. + binarization_threshold (float, optional): The threshold to binarize the probabilities. Defaults to 0.5. + mask_erosion_size (int, optional): The size of the disk to use for mask erosion and the edge-cropping. + Defaults to 10. + min_object_size (int, optional): The minimum object size to keep in pixel. Defaults to 32. + use_quality_mask (bool, optional): Whether to use the "quality" mask instead of the "valid" mask + to mask the output. + write_model_outputs (bool, optional): Also save the model outputs, not only the ensemble result. + Defaults to False. + + """ + _LegacyNativeSentinel2PipelineFast( + sentinel2_dir=sentinel2_dir, + output_data_dir=output_data_dir, + arcticdem_dir=arcticdem_dir, + tcvis_dir=tcvis_dir, + model_dir=model_dir, + tcvis_model_name=tcvis_model_name, + notcvis_model_name=notcvis_model_name, + device=device, + ee_project=ee_project, + ee_use_highvolume=ee_use_highvolume, + tpi_outer_radius=tpi_outer_radius, + tpi_inner_radius=tpi_inner_radius, + patch_size=patch_size, + overlap=overlap, + batch_size=batch_size, + reflection=reflection, + binarization_threshold=binarization_threshold, + mask_erosion_size=mask_erosion_size, + min_object_size=min_object_size, + use_quality_mask=use_quality_mask, + write_model_outputs=write_model_outputs, + ).run() diff --git a/darts/src/darts/legacy_pipeline/shared.py b/darts/src/darts/legacy_pipeline/shared.py deleted file mode 100644 index c823fd8..0000000 --- a/darts/src/darts/legacy_pipeline/shared.py +++ /dev/null @@ -1,113 +0,0 @@ -"""Data loading for legacy Pipeline.""" - -import logging -from collections import namedtuple -from math import ceil, sqrt -from pathlib import Path -from typing import Literal - -logger = logging.getLogger(__name__) - -AquisitionData = namedtuple("AquisitionData", ["optical", "arcticdem", "tcvis", "data_masks"]) - - -def _planet_file_generator(orthotiles_dir: Path, scenes_dir: Path, output_data_dir: Path): - # Find all PlanetScope orthotiles - for fpath in orthotiles_dir.glob("*/*/"): - tile_id = fpath.parent.name - scene_id = fpath.name - outpath = output_data_dir / tile_id / scene_id - yield fpath, outpath - - # Find all PlanetScope scenes - for fpath in scenes_dir.glob("*/"): - scene_id = fpath.name - outpath = output_data_dir / scene_id - yield fpath, outpath - - -def _load_s2(sentinel2_dir: Path, output_data_dir: Path, arcticdem_dir: Path, tcvis_dir: Path, tpi_outer_radius: int): - from darts_acquisition.arcticdem import load_arcticdem_tile - from darts_acquisition.s2 import load_s2_masks, load_s2_scene - from darts_acquisition.tcvis import load_tcvis - - for fpath in sentinel2_dir.glob("*/"): - scene_id = fpath.name - outpath = output_data_dir / scene_id - try: - optical = load_s2_scene(fpath) - arcticdem = load_arcticdem_tile( - optical.odc.geobox, arcticdem_dir, resolution=10, buffer=ceil(tpi_outer_radius / 10 * sqrt(2)) - ) - tcvis = load_tcvis(optical.odc.geobox, tcvis_dir) - data_masks = load_s2_masks(fpath, optical.odc.geobox) - aqdata = AquisitionData(optical, arcticdem, tcvis, data_masks) - yield fpath, outpath, aqdata - except Exception as e: - logger.warning(f"Could not process folder '{fpath.resolve()}'.\nSkipping...") - logger.exception(e) - continue - - -def _load_planet( - orthotiles_dir: Path, - scenes_dir: Path, - output_data_dir: Path, - arcticdem_dir: Path, - tcvis_dir: Path, - tpi_outer_radius: int, -): - from darts_acquisition.arcticdem import load_arcticdem_tile - from darts_acquisition.planet import load_planet_masks, load_planet_scene - from darts_acquisition.tcvis import load_tcvis - - # Find all PlanetScope orthotiles - for fpath, outpath in _planet_file_generator(orthotiles_dir, scenes_dir, output_data_dir): - try: - optical = load_planet_scene(fpath) - arcticdem = load_arcticdem_tile( - optical.odc.geobox, arcticdem_dir, resolution=2, buffer=ceil(tpi_outer_radius / 2 * sqrt(2)) - ) - tcvis = load_tcvis(optical.odc.geobox, tcvis_dir) - data_masks = load_planet_masks(fpath) - aqdata = AquisitionData(optical, arcticdem, tcvis, data_masks) - yield fpath, outpath, aqdata - except Exception as e: - logger.warning(f"Could not process folder '{fpath.resolve()}'.\nSkipping...") - logger.exception(e) - continue - - -def _segment_and_export( - tile, - ensemble, - outpath: Path, - device: Literal["cuda", "cpu", "auto"] | int | None, - patch_size: int, - overlap: int, - batch_size: int, - reflection: int, - binarization_threshold: float, - mask_erosion_size: int, - min_object_size: int, - use_quality_mask: bool, - write_model_outputs: bool, -): - from darts_export.inference import InferenceResultWriter - from darts_postprocessing import prepare_export - - tile = ensemble.segment_tile( - tile, - patch_size=patch_size, - overlap=overlap, - batch_size=batch_size, - reflection=reflection, - keep_inputs=write_model_outputs, - ) - tile = prepare_export(tile, binarization_threshold, mask_erosion_size, min_object_size, use_quality_mask, device) - - outpath.mkdir(parents=True, exist_ok=True) - writer = InferenceResultWriter(tile) - writer.export_probabilities(outpath) - writer.export_binarized(outpath) - writer.export_polygonized(outpath) From ea19c05bcfbd0594bd03e35abbb983be1406819d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tobias=20H=C3=B6lzer?= Date: Tue, 3 Dec 2024 11:59:26 +0100 Subject: [PATCH 3/3] Reduce parameter duplication --- darts/src/darts/legacy_pipeline/_base.py | 73 ++++++------ darts/src/darts/legacy_pipeline/planet.py | 96 +++++----------- .../src/darts/legacy_pipeline/planet_fast.py | 106 ++++++----------- darts/src/darts/legacy_pipeline/s2.py | 94 +++++---------- darts/src/darts/legacy_pipeline/s2_fast.py | 107 ++++++------------ 5 files changed, 163 insertions(+), 313 deletions(-) diff --git a/darts/src/darts/legacy_pipeline/_base.py b/darts/src/darts/legacy_pipeline/_base.py index 3079dcb..110c534 100644 --- a/darts/src/darts/legacy_pipeline/_base.py +++ b/darts/src/darts/legacy_pipeline/_base.py @@ -1,6 +1,7 @@ import logging import multiprocessing as mp from collections import namedtuple +from collections.abc import Generator from dataclasses import dataclass from pathlib import Path from typing import Literal @@ -12,7 +13,7 @@ @dataclass class _BasePipeline: - """Base class for all pipelines. + """Base class for all legacy pipelines. This class provides the run method which is the main entry point for all pipelines. @@ -28,34 +29,32 @@ class _BasePipeline: The main class must be also a dataclass, to fully inherit all parameter of this class (and the mixins). """ - output_data_dir: Path - tcvis_dir: Path - model_dir: Path - tcvis_model_name: str - notcvis_model_name: str - device: Literal["cuda", "cpu", "auto"] | int | None - ee_project: str | None - ee_use_highvolume: bool - patch_size: int - overlap: int - batch_size: int - reflection: int - binarization_threshold: float - mask_erosion_size: int - min_object_size: int - use_quality_mask: bool - write_model_outputs: bool - - # These would be the type hints for the methods that need to be implemented - # Leaving them uncommented would result in a NotImplementedError if Mixins are used - # def _path_generator(self) -> Generator[tuple[Path, Path]]: - # raise NotImplementedError - - # def _get_data(self, fpath: Path) -> AquisitionData: - # raise NotImplementedError - - # def _preprocess(self, aqdata: AquisitionData) -> xr.Dataset: - # raise NotImplementedError + output_data_dir: Path = Path("data/output") + tcvis_dir: Path = Path("data/download/tcvis") + model_dir: Path = Path("models") + tcvis_model_name: str = "RTS_v6_tcvis_s2native.pt" + notcvis_model_name: str = "RTS_v6_notcvis_s2native.pt" + device: Literal["cuda", "cpu", "auto"] | int | None = None + ee_project: str | None = None + ee_use_highvolume: bool = True + patch_size: int = 1024 + overlap: int = 256 + batch_size: int = 8 + reflection: int = 0 + binarization_threshold: float = 0.5 + mask_erosion_size: int = 10 + min_object_size: int = 32 + use_quality_mask: bool = False + write_model_outputs: bool = False + + def _path_generator(self) -> Generator[tuple[Path, Path]]: + raise NotImplementedError + + def _get_data(self, fpath: Path) -> AquisitionData: + raise NotImplementedError + + def _preprocess(self, aqdata: AquisitionData): + raise NotImplementedError def run(self): import torch @@ -125,8 +124,8 @@ def run(self): # ============================================================================= @dataclass class _VRTMixin: - arcticdem_slope_vrt: Path - arcticdem_elevation_vrt: Path + arcticdem_slope_vrt: Path = Path("data/input/ArcticDEM/slope.vrt") + arcticdem_elevation_vrt: Path = Path("data/input/ArcticDEM/elevation.vrt") def _preprocess(self, aqdata: AquisitionData): from darts_preprocessing import preprocess_legacy @@ -136,9 +135,9 @@ def _preprocess(self, aqdata: AquisitionData): @dataclass class _FastMixin: - arcticdem_dir: Path - tpi_outer_radius: int - tpi_inner_radius: int + arcticdem_dir: Path = Path("data/download/arcticdem") + tpi_outer_radius: int = 100 + tpi_inner_radius: int = 0 def _preprocess(self, aqdata: AquisitionData): from darts_preprocessing import preprocess_legacy_fast @@ -159,8 +158,8 @@ def _preprocess(self, aqdata: AquisitionData): # ============================================================================= @dataclass class _PlanetMixin: - orthotiles_dir: Path - scenes_dir: Path + orthotiles_dir: Path = Path("data/input/planet/PSOrthoTile") + scenes_dir: Path = Path("data/input/planet/PSScene") def _path_generator(self): # Find all PlanetScope orthotiles @@ -179,7 +178,7 @@ def _path_generator(self): @dataclass class _S2Mixin: - sentinel2_dir: Path + sentinel2_dir: Path = Path("data/input/sentinel2") def _path_generator(self): for fpath in self.sentinel2_dir.glob("*/"): diff --git a/darts/src/darts/legacy_pipeline/planet.py b/darts/src/darts/legacy_pipeline/planet.py index 95e2756..8a55512 100644 --- a/darts/src/darts/legacy_pipeline/planet.py +++ b/darts/src/darts/legacy_pipeline/planet.py @@ -2,60 +2,29 @@ from dataclasses import dataclass from pathlib import Path -from typing import Literal +from typing import Annotated + +from cyclopts import Parameter from darts.legacy_pipeline._base import AquisitionData, _BasePipeline, _PlanetMixin, _VRTMixin @dataclass -class _LegacyNativePlanetPipeline(_BasePipeline, _PlanetMixin, _VRTMixin): - def _get_data(self, fpath: Path): - from darts_acquisition.arcticdem import load_arcticdem_from_vrt - from darts_acquisition.planet import load_planet_masks, load_planet_scene - from darts_acquisition.tcvis import load_tcvis - - optical = load_planet_scene(fpath) - arcticdem = load_arcticdem_from_vrt(self.arcticdem_slope_vrt, self.arcticdem_elevation_vrt, optical) - tcvis = load_tcvis(optical.odc.geobox, self.tcvis_dir) - data_masks = load_planet_masks(fpath) - aqdata = AquisitionData(optical, arcticdem, tcvis, data_masks) - return aqdata - - -def run_native_planet_pipeline( - *, - orthotiles_dir: Path, - scenes_dir: Path, - output_data_dir: Path, - arcticdem_slope_vrt: Path, - arcticdem_elevation_vrt: Path, - tcvis_dir: Path, - model_dir: Path, - tcvis_model_name: str = "RTS_v6_tcvis.pt", - notcvis_model_name: str = "RTS_v6_notcvis.pt", - device: Literal["cuda", "cpu", "auto"] | int | None = None, - ee_project: str | None = None, - ee_use_highvolume: bool = True, - patch_size: int = 1024, - overlap: int = 16, - batch_size: int = 8, - reflection: int = 0, - binarization_threshold: float = 0.5, - mask_erosion_size: int = 10, - min_object_size: int = 32, - use_quality_mask: bool = False, - write_model_outputs: bool = False, -): - """Search for all PlanetScope scenes in the given directory and runs the segmentation pipeline on them. +class LegacyNativePlanetPipeline(_PlanetMixin, _VRTMixin, _BasePipeline): + """Pipeline for Planet data. Args: orthotiles_dir (Path): The directory containing the PlanetScope orthotiles. + Defaults to Path("data/input/planet/PSOrthoTile"). scenes_dir (Path): The directory containing the PlanetScope scenes. - output_data_dir (Path): The "output" directory. + Defaults to Path("data/input/planet/PSScene"). + output_data_dir (Path): The "output" directory. Defaults to Path("data/output"). arcticdem_slope_vrt (Path): The path to the ArcticDEM slope VRT file. + Defaults to Path("data/input/ArcticDEM/slope.vrt"). arcticdem_elevation_vrt (Path): The path to the ArcticDEM elevation VRT file. - tcvis_dir (Path): The directory containing the TCVis data. - model_dir (Path): The path to the models to use for segmentation. + Defaults to Path("data/input/ArcticDEM/elevation.vrt"). + tcvis_dir (Path): The directory containing the TCVis data. Defaults to Path("data/download/tcvis"). + model_dir (Path): The path to the models to use for segmentation. Defaults to Path("models"). tcvis_model_name (str, optional): The name of the model to use for TCVis. Defaults to "RTS_v6_tcvis.pt". notcvis_model_name (str, optional): The name of the model to use for not TCVis. Defaults to "RTS_v6_notcvis.pt". device (Literal["cuda", "cpu"] | int, optional): The device to run the model on. @@ -143,28 +112,21 @@ def run_native_planet_pipeline( arcticdem_elevation_vrt: data/input/ArcticDEM/elevation.vrt ``` - """ - _LegacyNativePlanetPipeline( - orthotiles_dir=orthotiles_dir, - scenes_dir=scenes_dir, - output_data_dir=output_data_dir, - arcticdem_elevation_vrt=arcticdem_elevation_vrt, - arcticdem_slope_vrt=arcticdem_slope_vrt, - tcvis_dir=tcvis_dir, - model_dir=model_dir, - tcvis_model_name=tcvis_model_name, - notcvis_model_name=notcvis_model_name, - device=device, - ee_project=ee_project, - ee_use_highvolume=ee_use_highvolume, - patch_size=patch_size, - overlap=overlap, - batch_size=batch_size, - reflection=reflection, - binarization_threshold=binarization_threshold, - mask_erosion_size=mask_erosion_size, - min_object_size=min_object_size, - use_quality_mask=use_quality_mask, - write_model_outputs=write_model_outputs, - ).run() + + def _get_data(self, fpath: Path): + from darts_acquisition.arcticdem import load_arcticdem_from_vrt + from darts_acquisition.planet import load_planet_masks, load_planet_scene + from darts_acquisition.tcvis import load_tcvis + + optical = load_planet_scene(fpath) + arcticdem = load_arcticdem_from_vrt(self.arcticdem_slope_vrt, self.arcticdem_elevation_vrt, optical) + tcvis = load_tcvis(optical.odc.geobox, self.tcvis_dir) + data_masks = load_planet_masks(fpath) + aqdata = AquisitionData(optical, arcticdem, tcvis, data_masks) + return aqdata + + +def run_native_planet_pipeline(*, pipeline: Annotated[LegacyNativePlanetPipeline, Parameter("*")]): + """Search for all PlanetScope scenes in the given directory and runs the segmentation pipeline on them.""" + pipeline.run() diff --git a/darts/src/darts/legacy_pipeline/planet_fast.py b/darts/src/darts/legacy_pipeline/planet_fast.py index 9911e5d..ea2a0d2 100644 --- a/darts/src/darts/legacy_pipeline/planet_fast.py +++ b/darts/src/darts/legacy_pipeline/planet_fast.py @@ -3,65 +3,28 @@ from dataclasses import dataclass from math import ceil, sqrt from pathlib import Path -from typing import Literal +from typing import Annotated + +from cyclopts import Parameter from darts.legacy_pipeline._base import AquisitionData, _BasePipeline, _FastMixin, _PlanetMixin @dataclass -class _LegacyNativePlanetPipelineFast(_BasePipeline, _PlanetMixin, _FastMixin): - def _get_data(self, fpath: Path): - from darts_acquisition.arcticdem import load_arcticdem_tile - from darts_acquisition.planet import load_planet_masks, load_planet_scene - from darts_acquisition.tcvis import load_tcvis - - optical = load_planet_scene(fpath) - arcticdem = load_arcticdem_tile( - optical.odc.geobox, self.arcticdem_dir, resolution=10, buffer=ceil(self.tpi_outer_radius / 10 * sqrt(2)) - ) - tcvis = load_tcvis(optical.odc.geobox, self.tcvis_dir) - data_masks = load_planet_masks(fpath) - aqdata = AquisitionData(optical, arcticdem, tcvis, data_masks) - return aqdata - - -def run_native_planet_pipeline_fast( - *, - orthotiles_dir: Path, - scenes_dir: Path, - output_data_dir: Path, - arcticdem_dir: Path, - tcvis_dir: Path, - model_dir: Path, - tcvis_model_name: str = "RTS_v6_tcvis.pt", - notcvis_model_name: str = "RTS_v6_notcvis.pt", - device: Literal["cuda", "cpu", "auto"] | int | None = None, - ee_project: str | None = None, - ee_use_highvolume: bool = True, - tpi_outer_radius: int = 100, - tpi_inner_radius: int = 0, - patch_size: int = 1024, - overlap: int = 16, - batch_size: int = 8, - reflection: int = 0, - binarization_threshold: float = 0.5, - mask_erosion_size: int = 10, - min_object_size: int = 32, - use_quality_mask: bool = False, - write_model_outputs: bool = False, -): - """Search for all PlanetScope scenes in the given directory and runs the segmentation pipeline on them. - - Loads the ArcticDEM from a datacube instead of VRT which is a lot faster and does not need manual preprocessing. +class LegacyNativePlanetPipelineFast(_FastMixin, _PlanetMixin, _BasePipeline): + """Pipeline for Planet data with optimized preprocessing. Args: orthotiles_dir (Path): The directory containing the PlanetScope orthotiles. + Defaults to Path("data/input/planet/PSOrthoTile"). scenes_dir (Path): The directory containing the PlanetScope scenes. - output_data_dir (Path): The "output" directory. + Defaults to Path("data/input/planet/PSScene"). + output_data_dir (Path): The "output" directory. Defaults to Path("data/output"). arcticdem_dir (Path): The directory containing the ArcticDEM data (the datacube and the extent files). Will be created and downloaded if it does not exist. - tcvis_dir (Path): The directory containing the TCVis data. - model_dir (Path): The path to the models to use for segmentation. + Defaults to Path("data/download/arcticdem"). + tcvis_dir (Path): The directory containing the TCVis data. Defaults to Path("data/download/tcvis"). + model_dir (Path): The path to the models to use for segmentation. Defaults to Path("models"). tcvis_model_name (str, optional): The name of the model to use for TCVis. Defaults to "RTS_v6_tcvis.pt". notcvis_model_name (str, optional): The name of the model to use for not TCVis. Defaults to "RTS_v6_notcvis.pt". device (Literal["cuda", "cpu"] | int, optional): The device to run the model on. @@ -89,27 +52,26 @@ def run_native_planet_pipeline_fast( Defaults to False. """ - _LegacyNativePlanetPipelineFast( - orthotiles_dir=orthotiles_dir, - scenes_dir=scenes_dir, - output_data_dir=output_data_dir, - arcticdem_dir=arcticdem_dir, - tcvis_dir=tcvis_dir, - model_dir=model_dir, - tcvis_model_name=tcvis_model_name, - notcvis_model_name=notcvis_model_name, - device=device, - ee_project=ee_project, - ee_use_highvolume=ee_use_highvolume, - tpi_outer_radius=tpi_outer_radius, - tpi_inner_radius=tpi_inner_radius, - patch_size=patch_size, - overlap=overlap, - batch_size=batch_size, - reflection=reflection, - binarization_threshold=binarization_threshold, - mask_erosion_size=mask_erosion_size, - min_object_size=min_object_size, - use_quality_mask=use_quality_mask, - write_model_outputs=write_model_outputs, - ).run() + + def _get_data(self, fpath: Path): + from darts_acquisition.arcticdem import load_arcticdem_tile + from darts_acquisition.planet import load_planet_masks, load_planet_scene + from darts_acquisition.tcvis import load_tcvis + + optical = load_planet_scene(fpath) + arcticdem = load_arcticdem_tile( + optical.odc.geobox, self.arcticdem_dir, resolution=10, buffer=ceil(self.tpi_outer_radius / 10 * sqrt(2)) + ) + tcvis = load_tcvis(optical.odc.geobox, self.tcvis_dir) + data_masks = load_planet_masks(fpath) + aqdata = AquisitionData(optical, arcticdem, tcvis, data_masks) + return aqdata + + +def run_native_planet_pipeline_fast(*, pipeline: Annotated[LegacyNativePlanetPipelineFast, Parameter("*")]): + """Search for all PlanetScope scenes in the given directory and runs the segmentation pipeline on them. + + Loads the ArcticDEM from a datacube instead of VRT which is a lot faster and does not need manual preprocessing. + + """ + pipeline.run() diff --git a/darts/src/darts/legacy_pipeline/s2.py b/darts/src/darts/legacy_pipeline/s2.py index fe598e1..891ff7f 100644 --- a/darts/src/darts/legacy_pipeline/s2.py +++ b/darts/src/darts/legacy_pipeline/s2.py @@ -2,58 +2,26 @@ from dataclasses import dataclass from pathlib import Path -from typing import Literal +from typing import Annotated + +from cyclopts import Parameter from darts.legacy_pipeline._base import AquisitionData, _BasePipeline, _S2Mixin, _VRTMixin @dataclass -class _LegacyNativeSentinel2Pipeline(_BasePipeline, _S2Mixin, _VRTMixin): - def _get_data(self, fpath: Path): - from darts_acquisition.arcticdem import load_arcticdem_from_vrt - from darts_acquisition.s2 import load_s2_masks, load_s2_scene - from darts_acquisition.tcvis import load_tcvis - - optical = load_s2_scene(fpath) - arcticdem = load_arcticdem_from_vrt(self.arcticdem_slope_vrt, self.arcticdem_elevation_vrt, optical) - tcvis = load_tcvis(optical.odc.geobox, self.tcvis_dir) - data_masks = load_s2_masks(fpath, optical.odc.geobox) - aqdata = AquisitionData(optical, arcticdem, tcvis, data_masks) - return aqdata - - -def run_native_sentinel2_pipeline( - *, - sentinel2_dir: Path, - output_data_dir: Path, - arcticdem_slope_vrt: Path, - arcticdem_elevation_vrt: Path, - tcvis_dir: Path, - model_dir: Path, - tcvis_model_name: str = "RTS_v6_tcvis_s2native.pt", - notcvis_model_name: str = "RTS_v6_notcvis_s2native.pt", - device: Literal["cuda", "cpu", "auto"] | int | None = None, - ee_project: str | None = None, - ee_use_highvolume: bool = True, - patch_size: int = 1024, - overlap: int = 16, - batch_size: int = 8, - reflection: int = 0, - binarization_threshold: float = 0.5, - mask_erosion_size: int = 10, - min_object_size: int = 32, - use_quality_mask: bool = False, - write_model_outputs: bool = False, -): - """Search for all Sentinel scenes in the given directory and runs the segmentation pipeline on them. +class LegacyNativeSentinel2Pipeline(_S2Mixin, _VRTMixin, _BasePipeline): + """Pipeline for Sentinel 2 data. Args: - sentinel2_dir (Path): The directory containing the Sentinel 2 scenes. - output_data_dir (Path): The "output" directory. + sentinel2_dir (Path): The directory containing the Sentinel 2 scenes. Defaults to Path("data/input/sentinel2"). + output_data_dir (Path): The "output" directory. Defaults to Path("data/output"). arcticdem_slope_vrt (Path): The path to the ArcticDEM slope VRT file. + Defaults to Path("data/input/ArcticDEM/slope.vrt"). arcticdem_elevation_vrt (Path): The path to the ArcticDEM elevation VRT file. - tcvis_dir (Path): The directory containing the TCVis data. - model_dir (Path): The path to the models to use for segmentation. + Defaults to Path("data/input/ArcticDEM/elevation.vrt"). + tcvis_dir (Path): The directory containing the TCVis data. Defaults to Path("data/download/tcvis"). + model_dir (Path): The path to the models to use for segmentation. Defaults to Path("models"). tcvis_model_name (str, optional): The name of the model to use for TCVis. Defaults to "RTS_v6_tcvis.pt". notcvis_model_name (str, optional): The name of the model to use for not TCVis. Defaults to "RTS_v6_notcvis.pt". device (Literal["cuda", "cpu"] | int, optional): The device to run the model on. @@ -103,27 +71,21 @@ def run_native_sentinel2_pipeline( arcticdem_elevation_vrt: data/input/ArcticDEM/elevation.vrt ``` - """ - _LegacyNativeSentinel2Pipeline( - sentinel2_dir=sentinel2_dir, - output_data_dir=output_data_dir, - arcticdem_elevation_vrt=arcticdem_elevation_vrt, - arcticdem_slope_vrt=arcticdem_slope_vrt, - tcvis_dir=tcvis_dir, - model_dir=model_dir, - tcvis_model_name=tcvis_model_name, - notcvis_model_name=notcvis_model_name, - device=device, - ee_project=ee_project, - ee_use_highvolume=ee_use_highvolume, - patch_size=patch_size, - overlap=overlap, - batch_size=batch_size, - reflection=reflection, - binarization_threshold=binarization_threshold, - mask_erosion_size=mask_erosion_size, - min_object_size=min_object_size, - use_quality_mask=use_quality_mask, - write_model_outputs=write_model_outputs, - ).run() + + def _get_data(self, fpath: Path): + from darts_acquisition.arcticdem import load_arcticdem_from_vrt + from darts_acquisition.s2 import load_s2_masks, load_s2_scene + from darts_acquisition.tcvis import load_tcvis + + optical = load_s2_scene(fpath) + arcticdem = load_arcticdem_from_vrt(self.arcticdem_slope_vrt, self.arcticdem_elevation_vrt, optical) + tcvis = load_tcvis(optical.odc.geobox, self.tcvis_dir) + data_masks = load_s2_masks(fpath, optical.odc.geobox) + aqdata = AquisitionData(optical, arcticdem, tcvis, data_masks) + return aqdata + + +def run_native_sentinel2_pipeline(*, pipeline: Annotated[LegacyNativeSentinel2Pipeline, Parameter("*")]): + """Search for all Sentinel scenes in the given directory and runs the segmentation pipeline on them.""" + pipeline.run() diff --git a/darts/src/darts/legacy_pipeline/s2_fast.py b/darts/src/darts/legacy_pipeline/s2_fast.py index b45e197..8660652 100644 --- a/darts/src/darts/legacy_pipeline/s2_fast.py +++ b/darts/src/darts/legacy_pipeline/s2_fast.py @@ -3,64 +3,26 @@ from dataclasses import dataclass from math import ceil, sqrt from pathlib import Path -from typing import Literal +from typing import Annotated + +from cyclopts import Parameter from darts.legacy_pipeline._base import AquisitionData, _BasePipeline, _FastMixin, _S2Mixin @dataclass -class _LegacyNativeSentinel2PipelineFast(_BasePipeline, _S2Mixin, _FastMixin): - def _get_data(self, fpath: Path): - from darts_acquisition.arcticdem import load_arcticdem_tile - from darts_acquisition.s2 import load_s2_masks, load_s2_scene - from darts_acquisition.tcvis import load_tcvis - - optical = load_s2_scene(fpath) - arcticdem = load_arcticdem_tile( - optical.odc.geobox, self.arcticdem_dir, resolution=10, buffer=ceil(self.tpi_outer_radius / 10 * sqrt(2)) - ) - tcvis = load_tcvis(optical.odc.geobox, self.tcvis_dir) - data_masks = load_s2_masks(fpath, optical.odc.geobox) - aqdata = AquisitionData(optical, arcticdem, tcvis, data_masks) - return aqdata - - -def run_native_sentinel2_pipeline_fast( - *, - sentinel2_dir: Path, - output_data_dir: Path, - arcticdem_dir: Path, - tcvis_dir: Path, - model_dir: Path, - tcvis_model_name: str = "RTS_v6_tcvis_s2native.pt", - notcvis_model_name: str = "RTS_v6_notcvis_s2native.pt", - device: Literal["cuda", "cpu", "auto"] | int | None = None, - ee_project: str | None = None, - ee_use_highvolume: bool = True, - tpi_outer_radius: int = 100, - tpi_inner_radius: int = 0, - patch_size: int = 1024, - overlap: int = 16, - batch_size: int = 8, - reflection: int = 0, - binarization_threshold: float = 0.5, - mask_erosion_size: int = 10, - min_object_size: int = 32, - use_quality_mask: bool = False, - write_model_outputs: bool = False, -): - """Search for all Sentinel 2 scenes in the given directory and runs the segmentation pipeline on them. - - Loads the ArcticDEM from a datacube instead of VRT which is a lot faster and does not need manual preprocessing. +class LegacyNativeSentinel2PipelineFast(_FastMixin, _S2Mixin, _BasePipeline): + """Pipeline for Sentinel 2 data with optimized preprocessing. Args: sentinel2_dir (Path): The directory containing the Sentinel 2 scenes. - scenes_dir (Path): The directory containing the PlanetScope scenes. - output_data_dir (Path): The "output" directory. + [italic magenta]Defaults to Path("data/input/sentinel2").[/italic magenta] + output_data_dir (Path): The "output" directory. Defaults to Path("data/output"). arcticdem_dir (Path): The directory containing the ArcticDEM data (the datacube and the extent files). Will be created and downloaded if it does not exist. - tcvis_dir (Path): The directory containing the TCVis data. - model_dir (Path): The path to the models to use for segmentation. + Defaults to Path("data/download/arcticdem"). + tcvis_dir (Path): The directory containing the TCVis data. Defaults to Path("data/download/tcvis"). + model_dir (Path): The path to the models to use for segmentation. Defaults to Path("models"). tcvis_model_name (str, optional): The name of the model to use for TCVis. Defaults to "RTS_v6_tcvis.pt". notcvis_model_name (str, optional): The name of the model to use for not TCVis. Defaults to "RTS_v6_notcvis.pt". device (Literal["cuda", "cpu"] | int, optional): The device to run the model on. @@ -88,26 +50,29 @@ def run_native_sentinel2_pipeline_fast( Defaults to False. """ - _LegacyNativeSentinel2PipelineFast( - sentinel2_dir=sentinel2_dir, - output_data_dir=output_data_dir, - arcticdem_dir=arcticdem_dir, - tcvis_dir=tcvis_dir, - model_dir=model_dir, - tcvis_model_name=tcvis_model_name, - notcvis_model_name=notcvis_model_name, - device=device, - ee_project=ee_project, - ee_use_highvolume=ee_use_highvolume, - tpi_outer_radius=tpi_outer_radius, - tpi_inner_radius=tpi_inner_radius, - patch_size=patch_size, - overlap=overlap, - batch_size=batch_size, - reflection=reflection, - binarization_threshold=binarization_threshold, - mask_erosion_size=mask_erosion_size, - min_object_size=min_object_size, - use_quality_mask=use_quality_mask, - write_model_outputs=write_model_outputs, - ).run() + + def _get_data(self, fpath: Path): + from darts_acquisition.arcticdem import load_arcticdem_tile + from darts_acquisition.s2 import load_s2_masks, load_s2_scene + from darts_acquisition.tcvis import load_tcvis + + optical = load_s2_scene(fpath) + arcticdem = load_arcticdem_tile( + optical.odc.geobox, self.arcticdem_dir, resolution=10, buffer=ceil(self.tpi_outer_radius / 10 * sqrt(2)) + ) + tcvis = load_tcvis(optical.odc.geobox, self.tcvis_dir) + data_masks = load_s2_masks(fpath, optical.odc.geobox) + aqdata = AquisitionData(optical, arcticdem, tcvis, data_masks) + return aqdata + + +def run_native_sentinel2_pipeline_fast(*, pipeline: Annotated[LegacyNativeSentinel2PipelineFast, Parameter("*")]): + """Search for all Sentinel 2 scenes in the given directory and runs the segmentation pipeline on them. + + Loads the ArcticDEM from a datacube instead of VRT which is a lot faster and does not need manual preprocessing. + + Args: + pipeline (_LegacyNativeSentinel2PipelineFast): The pipeline to run. + + """ + pipeline.run()