From 5ab4c4bde6c7d86e4f4abd3e19036ec7954b72f3 Mon Sep 17 00:00:00 2001 From: James Fisher <85769594+jamesfisher-geo@users.noreply.github.com> Date: Fri, 29 Nov 2024 22:13:30 -0500 Subject: [PATCH] Aggregation Bugfix and Documentation Update (#314) **Related Issue(s):** - #290 **Description:** Includes a bugfix for Elasticsearch aggregation, the indices() function was only checking if the input was None. But in POST requests the input is an empty list ({}). So that was leading to some aggregations to search through all indices in an Elasticsearch cluster, not just the items indices. **PR Checklist:** - [x] Code is formatted and linted (run `pre-commit run --all-files`) - [x] Tests pass (run `make test`) - [x] Documentation has been updated to reflect changes, if applicable - [x] Changes are added to the changelog --- CHANGELOG.md | 2 + README.md | 104 +---------------- docs/mkdocs.yml | 1 + docs/src/aggregation.md | 105 ++++++++++++++++++ .../elasticsearch/database_logic.py | 2 +- 5 files changed, 110 insertions(+), 104 deletions(-) create mode 100644 docs/src/aggregation.md diff --git a/CHANGELOG.md b/CHANGELOG.md index c3994220..95c427a7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,8 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ## [Unreleased] +- Aggregation ElasticSearch `total_count` bugfix, moved aggregation text to docs. [#314](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/314) + ## [v3.2.0] - 2024-10-09 ### Added diff --git a/README.md b/README.md index 6ec94b72..0ff4f254 100644 --- a/README.md +++ b/README.md @@ -279,111 +279,9 @@ The modified Items with lowercase identifiers will now be visible to users acces Authentication is an optional feature that can be enabled through `Route Dependencies` examples can be found and a more detailed explanation in [examples/auth](examples/auth). - ## Aggregation -Sfeos supports the STAC API [Aggregation Extension](https://github.com/stac-api-extensions/aggregation). This enables geospatial aggregation of points and geometries, as well as frequency distribution aggregation of any other property including dates. Aggregations can be defined at the root Catalog level (`/aggregations`) and at the Collection level (`//aggregations`). The `/aggregate` route also fully supports base search and the STAC API [Filter Extension](https://github.com/stac-api-extensions/filter). Any query made with `/search` may also be executed with `/aggregate`, provided that the relevant aggregation fields are available, - - -A field named `aggregations` should be added to the Collection object for the collection for which the aggregations are available, for example: - -```json -"aggregations": [ - { - "name": "total_count", - "data_type": "integer" - }, - { - "name": "datetime_max", - "data_type": "datetime" - }, - { - "name": "datetime_min", - "data_type": "datetime" - }, - { - "name": "datetime_frequency", - "data_type": "frequency_distribution", - "frequency_distribution_data_type": "datetime" - }, - { - "name": "sun_elevation_frequency", - "data_type": "frequency_distribution", - "frequency_distribution_data_type": "numeric" - }, - { - "name": "platform_frequency", - "data_type": "frequency_distribution", - "frequency_distribution_data_type": "string" - }, - { - "name": "sun_azimuth_frequency", - "data_type": "frequency_distribution", - "frequency_distribution_data_type": "numeric" - }, - { - "name": "off_nadir_frequency", - "data_type": "frequency_distribution", - "frequency_distribution_data_type": "numeric" - }, - { - "name": "cloud_cover_frequency", - "data_type": "frequency_distribution", - "frequency_distribution_data_type": "numeric" - }, - { - "name": "grid_code_frequency", - "data_type": "frequency_distribution", - "frequency_distribution_data_type": "string" - }, - { - "name": "centroid_geohash_grid_frequency", - "data_type": "frequency_distribution", - "frequency_distribution_data_type": "string" - }, - { - "name": "centroid_geohex_grid_frequency", - "data_type": "frequency_distribution", - "frequency_distribution_data_type": "string" - }, - { - "name": "centroid_geotile_grid_frequency", - "data_type": "frequency_distribution", - "frequency_distribution_data_type": "string" - }, - { - "name": "geometry_geohash_grid_frequency", - "data_type": "frequency_distribution", - "frequency_distribution_data_type": "numeric" - }, - { - "name": "geometry_geotile_grid_frequency", - "data_type": "frequency_distribution", - "frequency_distribution_data_type": "string" - } -] - ``` - -Available aggregations are: - -- total_count (count of total items) -- collection_frequency (Item `collection` field) -- platform_frequency (Item.Properties.platform) -- cloud_cover_frequency (Item.Properties.eo:cloud_cover) -- datetime_frequency (Item.Properties.datetime, monthly interval) -- datetime_min (earliest Item.Properties.datetime) -- datetime_max (latest Item.Properties.datetime) -- sun_elevation_frequency (Item.Properties.view:sun_elevation) -- sun_azimuth_frequency (Item.Properties.view:sun_azimuth) -- off_nadir_frequency (Item.Properties.view:off_nadir) -- grid_code_frequency (Item.Properties.grid:code) -- centroid_geohash_grid_frequency ([geohash grid](https://opensearch.org/docs/latest/aggregations/bucket/geohash-grid/) on Item.Properties.proj:centroid) -- centroid_geohex_grid_frequency ([geohex grid](https://opensearch.org/docs/latest/aggregations/bucket/geohex-grid/) on Item.Properties.proj:centroid) -- centroid_geotile_grid_frequency (geotile on Item.Properties.proj:centroid) -- geometry_geohash_grid_frequency ([geohash grid](https://opensearch.org/docs/latest/aggregations/bucket/geohash-grid/) on Item.geometry) -- geometry_geotile_grid_frequency ([geotile grid](https://opensearch.org/docs/latest/aggregations/bucket/geotile-grid/) on Item.geometry) - -Support for additional fields and new aggregations can be added in the associated `database_logic.py` file. +Aggregation of points and geometries, as well as frequency distribution aggregation of any other property including dates is supported in stac-fatsapi-elasticsearch-opensearch. Aggregations can be defined at the root Catalog level (`/aggregations`) and at the Collection level (`//aggregations`). Details for supported aggregations can be found at [./docs/src/aggregation.md](./docs/src/aggregation.md) ## Rate Limiting diff --git a/docs/mkdocs.yml b/docs/mkdocs.yml index 5968ffa6..67764805 100644 --- a/docs/mkdocs.yml +++ b/docs/mkdocs.yml @@ -52,6 +52,7 @@ nav: - session: api/stac_fastapi/core/session.md - utilities: api/stac_fastapi/core/utilities.md - version: api/stac_fastapi/core/version.md + - Aggregation: "aggregation.md" - Development - Contributing: "contributing.md" - Release Notes: "release-notes.md" diff --git a/docs/src/aggregation.md b/docs/src/aggregation.md new file mode 100644 index 00000000..be09fa40 --- /dev/null +++ b/docs/src/aggregation.md @@ -0,0 +1,105 @@ +## Aggregation + +Stac-fatsapi-elasticsearch-opensearch supports the STAC API [Aggregation Extension](https://github.com/stac-api-extensions/aggregation). This enables aggregation of points and geometries, as well as frequency distribution aggregation of any other property including dates. Aggregations can be defined at the root Catalog level (`/aggregations`) and at the Collection level (`//aggregations`). The [Filter Extension](https://github.com/stac-api-extensions/filter) is also fully supported, enabling aggregated returns of search queries. Any query made with `/search` may also be executed with `/aggregate`, provided that the relevant aggregation fields are available, + +A field named `aggregations` should be added to the Collection object for the collection for which the aggregations are available, for example: + +Available aggregations are: + +- total_count (count of total items) +- collection_frequency (Item `collection` field) +- platform_frequency (Item.Properties.platform) +- cloud_cover_frequency (Item.Properties.eo:cloud_cover) +- datetime_frequency (Item.Properties.datetime, monthly interval) +- datetime_min (earliest Item.Properties.datetime) +- datetime_max (latest Item.Properties.datetime) +- sun_elevation_frequency (Item.Properties.view:sun_elevation) +- sun_azimuth_frequency (Item.Properties.view:sun_azimuth) +- off_nadir_frequency (Item.Properties.view:off_nadir) +- grid_code_frequency (Item.Properties.grid:code) +- centroid_geohash_grid_frequency ([geohash grid](https://opensearch.org/docs/latest/aggregations/bucket/geohash-grid/) on Item.Properties.proj:centroid) +- centroid_geohex_grid_frequency ([geohex grid](https://opensearch.org/docs/latest/aggregations/bucket/geohex-grid/) on Item.Properties.proj:centroid) +- centroid_geotile_grid_frequency (geotile on Item.Properties.proj:centroid) +- geometry_geohash_grid_frequency ([geohash grid](https://opensearch.org/docs/latest/aggregations/bucket/geohash-grid/) on Item.geometry) +- geometry_geotile_grid_frequency ([geotile grid](https://opensearch.org/docs/latest/aggregations/bucket/geotile-grid/) on Item.geometry) + +Support for additional fields and new aggregations can be added in the [OpenSearch database_logic.py](../../stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py) and [ElasticSearch database_logic.py](../../stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py) files. + +```json +"aggregations": [ + { + "name": "total_count", + "data_type": "integer" + }, + { + "name": "datetime_max", + "data_type": "datetime" + }, + { + "name": "datetime_min", + "data_type": "datetime" + }, + { + "name": "datetime_frequency", + "data_type": "frequency_distribution", + "frequency_distribution_data_type": "datetime" + }, + { + "name": "sun_elevation_frequency", + "data_type": "frequency_distribution", + "frequency_distribution_data_type": "numeric" + }, + { + "name": "platform_frequency", + "data_type": "frequency_distribution", + "frequency_distribution_data_type": "string" + }, + { + "name": "sun_azimuth_frequency", + "data_type": "frequency_distribution", + "frequency_distribution_data_type": "numeric" + }, + { + "name": "off_nadir_frequency", + "data_type": "frequency_distribution", + "frequency_distribution_data_type": "numeric" + }, + { + "name": "cloud_cover_frequency", + "data_type": "frequency_distribution", + "frequency_distribution_data_type": "numeric" + }, + { + "name": "grid_code_frequency", + "data_type": "frequency_distribution", + "frequency_distribution_data_type": "string" + }, + { + "name": "centroid_geohash_grid_frequency", + "data_type": "frequency_distribution", + "frequency_distribution_data_type": "string" + }, + { + "name": "centroid_geohex_grid_frequency", + "data_type": "frequency_distribution", + "frequency_distribution_data_type": "string" + }, + { + "name": "centroid_geotile_grid_frequency", + "data_type": "frequency_distribution", + "frequency_distribution_data_type": "string" + }, + { + "name": "geometry_geohash_grid_frequency", + "data_type": "frequency_distribution", + "frequency_distribution_data_type": "numeric" + }, + { + "name": "geometry_geotile_grid_frequency", + "data_type": "frequency_distribution", + "frequency_distribution_data_type": "string" + } +] + ``` + + diff --git a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py index da6d6880..cd723cfb 100644 --- a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py +++ b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py @@ -168,7 +168,7 @@ def indices(collection_ids: Optional[List[str]]) -> str: Returns: A string of comma-separated index names. If `collection_ids` is None, returns the default indices. """ - if collection_ids is None: + if collection_ids is None or collection_ids == []: return ITEM_INDICES else: return ",".join([index_by_collection_id(c) for c in collection_ids])