From a8eb93cfaa83dfefa7409ec2f479b35c2f92419f Mon Sep 17 00:00:00 2001 From: Joseph H Kennedy Date: Wed, 20 Nov 2024 09:17:01 -0900 Subject: [PATCH 01/45] add units --- its_live_monitoring/src/sentinel2.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/its_live_monitoring/src/sentinel2.py b/its_live_monitoring/src/sentinel2.py index e6887189..5e69f618 100644 --- a/its_live_monitoring/src/sentinel2.py +++ b/its_live_monitoring/src/sentinel2.py @@ -75,7 +75,7 @@ def get_sentinel2_stac_item(scene: str) -> pystac.Item: items = [item for page in results.pages() for item in page] if (n_items := len(items)) != 1: raise ValueError( - f'{n_items} for {scene} found in Sentinel-2 STAC collection: ' + f'{n_items} items for {scene} found in Sentinel-2 STAC collection: ' f'{SENTINEL2_CATALOG_API}/collections/{SENTINEL2_COLLECTION_NAME}' ) item = items[0] From 51038d104859699577f85091ff4553d442f37b50 Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Thu, 21 Nov 2024 11:30:04 -0900 Subject: [PATCH 02/45] allow lambda to query hyp3 jobs table --- .github/actions/deploy/action.yml | 3 +++ .github/workflows/deploy-prod.yml | 1 + .github/workflows/deploy-test.yml | 1 + cloudformation.yml | 6 ++++++ 4 files changed, 11 insertions(+) diff --git a/.github/actions/deploy/action.yml b/.github/actions/deploy/action.yml index 18956e89..cfa0e32c 100644 --- a/.github/actions/deploy/action.yml +++ b/.github/actions/deploy/action.yml @@ -16,6 +16,8 @@ inputs: required: true HYP3_API: required: true + HYP3_JOBS_TABLE: + required: true LAMBDA_LOGGING_LEVEL: required: true EARTHDATA_USERNAME: @@ -60,6 +62,7 @@ runs: LandsatTopicArn=${{ inputs.LANDSAT_TOPIC_ARN }} \ Sentinel2TopicArn=${{ inputs.SENTINEL2_TOPIC_ARN }} \ Hyp3Api=${{ inputs.HYP3_API }} \ + Hyp3JobsTable=${{ inputs.HYP3_JOBS_TABLE }} \ LambdaLoggingLevel=${{ inputs.LAMBDA_LOGGING_LEVEL }} \ EarthdataUsername=${{ inputs.EARTHDATA_USERNAME }} \ EarthdataPassword=${{ inputs.EARTHDATA_PASSWORD }} \ diff --git a/.github/workflows/deploy-prod.yml b/.github/workflows/deploy-prod.yml index 5ba58c16..fa5adc86 100644 --- a/.github/workflows/deploy-prod.yml +++ b/.github/workflows/deploy-prod.yml @@ -25,6 +25,7 @@ jobs: LANDSAT_TOPIC_ARN: arn:aws:sns:us-west-2:673253540267:public-c2-notify-v2 SENTINEL2_TOPIC_ARN: arn:aws:sns:eu-west-1:214830741341:NewSentinel2Product HYP3_API: https://hyp3-its-live.asf.alaska.edu + HYP3_JOBS_TABLE: hyp3-its-live-JobsTable-178MIZ4ZXKL69 LAMBDA_LOGGING_LEVEL: INFO PUBLISH_BUCKET: its-live-data MATTERMOST_PAT: ${{ secrets.MATTERMOST_PAT }} diff --git a/.github/workflows/deploy-test.yml b/.github/workflows/deploy-test.yml index 65bd3e6e..20eec4dc 100644 --- a/.github/workflows/deploy-test.yml +++ b/.github/workflows/deploy-test.yml @@ -25,6 +25,7 @@ jobs: LANDSAT_TOPIC_ARN: arn:aws:sns:us-west-2:986442313181:its-live-notify-landsat-test SENTINEL2_TOPIC_ARN: arn:aws:sns:eu-west-1:986442313181:its-live-notify-sentinel2-test HYP3_API: https://hyp3-its-live.asf.alaska.edu + HYP3_JOBS_TABLE: hyp3-its-live-test-JobsTable-1FT990PLG5K45 LAMBDA_LOGGING_LEVEL: INFO PUBLISH_BUCKET: its-live-data-test MATTERMOST_PAT: ${{ secrets.MATTERMOST_PAT }} diff --git a/cloudformation.yml b/cloudformation.yml index 6fb6256c..9088b671 100644 --- a/cloudformation.yml +++ b/cloudformation.yml @@ -8,6 +8,9 @@ Parameters: Hyp3Api: Type: String + Hyp3JobsTable: + Type: String + LambdaLoggingLevel: Type: String Default: INFO @@ -143,6 +146,9 @@ Resources: - Effect: Allow Action: sqs:* Resource: !GetAtt Queue.Arn + - Effect: Allow + Action: dynamodb:Query + Resource: !Sub "arn:aws:dynamodb:${AWS::Region}:${AWS::AccountId}:table/${Hyp3JobsTable}" StatusMessages: Type: AWS::CloudFormation::Stack From 90d8e8c8412cde4543ef115de8427988bcd4c401 Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Thu, 21 Nov 2024 15:46:28 -0500 Subject: [PATCH 03/45] query dynamo for jobs by status type --- its_live_monitoring/src/main.py | 66 +++++++++++++++++++++++++-------- 1 file changed, 51 insertions(+), 15 deletions(-) diff --git a/its_live_monitoring/src/main.py b/its_live_monitoring/src/main.py index aedeb600..1ddd938c 100644 --- a/its_live_monitoring/src/main.py +++ b/its_live_monitoring/src/main.py @@ -1,10 +1,13 @@ """Lambda function to trigger low-latency Landsat and Sentinel-2 processing from newly acquired scenes.""" import argparse +import datetime import json import logging import os import sys +from datetime import timezone +from dateutil.parser import parse from typing import Iterable import boto3 @@ -13,6 +16,8 @@ import hyp3_sdk as sdk import numpy as np import pandas as pd +from boto3.dynamodb.conditions import Attr, Key +# from hyp3_sdk.jobs import Job, Batch from landsat import ( get_landsat_pairs_for_reference_scene, @@ -43,6 +48,7 @@ 's3', config=botocore.config.Config(signature_version=botocore.UNSIGNED), ) +dynamo = boto3.resource('dynamodb') def point_to_region(lat: float, lon: float) -> str: @@ -119,6 +125,37 @@ def deduplicate_s3_pairs(pairs: gpd.GeoDataFrame) -> gpd.GeoDataFrame: return pairs.drop(index=drop_indexes) +def format_time(time: datetime) -> str: + if time.tzinfo is None: + raise ValueError(f'missing tzinfo for datetime {time}') + utc_time = time.astimezone(timezone.utc) + return utc_time.isoformat(timespec='seconds') + + +def query_jobs_by_status_code(status_code, user, name, start): + table = dynamo.Table(os.environ['JOBS_TABLE_NAME']) + + formatted_start = format_time(parse(start)) + + key_expression = Key('status_code').eq(status_code) + key_expression &= Key('user_id').eq(user) + key_expression &= Key('request_time').geq(formatted_start) + + filter_expression = Attr('job_id').exists() + filter_expression &= Attr('name').eq(name) + + params = { + 'IndexName': 'status_code', + 'KeyConditionExpression': key_expression, + 'FilterExpression': filter_expression, + 'ScanIndexForward': False, + } + + response = table.query(**params) + jobs = response['Items'] + return sdk.Batch([sdk.Job.from_dict(job) for job in jobs]) + + def deduplicate_hyp3_pairs(pairs: gpd.GeoDataFrame) -> gpd.GeoDataFrame: """Search HyP3 jobs since the reference scene's acquisition date and remove already submitted (in PENDING or RUNNING state) pairs. @@ -129,25 +166,24 @@ def deduplicate_hyp3_pairs(pairs: gpd.GeoDataFrame) -> gpd.GeoDataFrame: Returns: The pairs GeoDataFrame with any already submitted pairs removed. """ - pending_jobs = HYP3.find_jobs( - job_type='AUTORIFT', - start=pairs.iloc[0].reference_acquisition, - name=pairs.iloc[0].reference, - user_id=EARTHDATA_USERNAME, - status_code='PENDING', - ) - running_jobs = HYP3.find_jobs( - job_type='AUTORIFT', - start=pairs.iloc[0].reference_acquisition, - name=pairs.iloc[0].reference, - user_id=EARTHDATA_USERNAME, - status_code='RUNNING', - ) + its_live_user = 'hyp3.its_live' + pending_jobs = query_jobs_by_status_code( + 'PENDING', + its_live_user, + pairs.iloc[0].reference, + pairs.iloc[0].reference_acquisition + ) + running_jobs = query_jobs_by_status_code( + 'RUNNING', + its_live_user, + pairs.iloc[0].reference, + pairs.iloc[0].reference_acquisition + ) jobs = pending_jobs + running_jobs - df = pd.DataFrame([job.job_parameters['granules'] for job in jobs], columns=['reference', 'secondary']) + df = pd.DataFrame([job['job_parameters']['granules'] for job in jobs], columns=['reference', 'secondary']) df = df.set_index(['reference', 'secondary']) pairs = pairs.set_index(['reference', 'secondary']) From 1ae79194c8e50f39e4e99071ecb1160964f2bfb8 Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Thu, 21 Nov 2024 15:57:37 -0500 Subject: [PATCH 04/45] ruff --- its_live_monitoring/src/main.py | 40 +++++++++++++++++++++------------ 1 file changed, 26 insertions(+), 14 deletions(-) diff --git a/its_live_monitoring/src/main.py b/its_live_monitoring/src/main.py index 1ddd938c..17f92e87 100644 --- a/its_live_monitoring/src/main.py +++ b/its_live_monitoring/src/main.py @@ -7,7 +7,6 @@ import os import sys from datetime import timezone -from dateutil.parser import parse from typing import Iterable import boto3 @@ -17,8 +16,9 @@ import numpy as np import pandas as pd from boto3.dynamodb.conditions import Attr, Key -# from hyp3_sdk.jobs import Job, Batch +from dateutil.parser import parse +# from hyp3_sdk.jobs import Job, Batch from landsat import ( get_landsat_pairs_for_reference_scene, get_landsat_stac_item, @@ -126,13 +126,32 @@ def deduplicate_s3_pairs(pairs: gpd.GeoDataFrame) -> gpd.GeoDataFrame: def format_time(time: datetime) -> str: + """Format time to ISO with UTC timezone. + + Args: + time: a datetime object to format + + Returns: + datetime: the UTC time in ISO format + """ if time.tzinfo is None: raise ValueError(f'missing tzinfo for datetime {time}') utc_time = time.astimezone(timezone.utc) return utc_time.isoformat(timespec='seconds') -def query_jobs_by_status_code(status_code, user, name, start): +def query_jobs_by_status_code(status_code: str, user: str, name: str, start: datetime.datetime) -> sdk.Batch: + """Query dynamodb for jobs by status_code, then filter by user, name, and date. + + Args: + status_code: `status_code` of the desired jobs + user: the `user_id` that submitted the jobs + name: the name of the jobs + start: the earliest submission date of the jobs + + Returns: + sdk.Batch: batch of jobs matching the filters + """ table = dynamo.Table(os.environ['JOBS_TABLE_NAME']) formatted_start = format_time(parse(start)) @@ -153,7 +172,7 @@ def query_jobs_by_status_code(status_code, user, name, start): response = table.query(**params) jobs = response['Items'] - return sdk.Batch([sdk.Job.from_dict(job) for job in jobs]) + return sdk.Batch([sdk.Job.from_dict(job) for job in jobs]) def deduplicate_hyp3_pairs(pairs: gpd.GeoDataFrame) -> gpd.GeoDataFrame: @@ -166,24 +185,17 @@ def deduplicate_hyp3_pairs(pairs: gpd.GeoDataFrame) -> gpd.GeoDataFrame: Returns: The pairs GeoDataFrame with any already submitted pairs removed. """ - its_live_user = 'hyp3.its_live' pending_jobs = query_jobs_by_status_code( - 'PENDING', - its_live_user, - pairs.iloc[0].reference, - pairs.iloc[0].reference_acquisition + 'PENDING', its_live_user, pairs.iloc[0].reference, pairs.iloc[0].reference_acquisition ) running_jobs = query_jobs_by_status_code( - 'RUNNING', - its_live_user, - pairs.iloc[0].reference, - pairs.iloc[0].reference_acquisition + 'RUNNING', its_live_user, pairs.iloc[0].reference, pairs.iloc[0].reference_acquisition ) jobs = pending_jobs + running_jobs - df = pd.DataFrame([job['job_parameters']['granules'] for job in jobs], columns=['reference', 'secondary']) + df = pd.DataFrame([job.job_parameters['granules'] for job in jobs], columns=['reference', 'secondary']) df = df.set_index(['reference', 'secondary']) pairs = pairs.set_index(['reference', 'secondary']) From ae55f76d63d521eb0d2c58fc28e5327e75c3ed65 Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Thu, 21 Nov 2024 16:01:00 -0500 Subject: [PATCH 05/45] updated changelog --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index da49d308..181366fd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,10 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [0.5.9] +### Changed +- Improved deduplication performance by searching HyP3's dynamodb directly for `PENDING` and `RUNNING` jobs, instead of using the `hyp3_sdk`. + ## [0.5.8] ### Changed - As an incremental improvement to deduplication performance, its-live-monitoring now: From 9354079537a14fc09ed0c1bf2def5116e79c5b09 Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Thu, 21 Nov 2024 16:01:55 -0500 Subject: [PATCH 06/45] removed unnecessary comment --- its_live_monitoring/src/main.py | 1 - 1 file changed, 1 deletion(-) diff --git a/its_live_monitoring/src/main.py b/its_live_monitoring/src/main.py index 17f92e87..e2c2225f 100644 --- a/its_live_monitoring/src/main.py +++ b/its_live_monitoring/src/main.py @@ -18,7 +18,6 @@ from boto3.dynamodb.conditions import Attr, Key from dateutil.parser import parse -# from hyp3_sdk.jobs import Job, Batch from landsat import ( get_landsat_pairs_for_reference_scene, get_landsat_stac_item, From 03fe49d6e6eb7af7692ca0c33c972411797365b8 Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Thu, 21 Nov 2024 13:07:20 -0900 Subject: [PATCH 07/45] pass jobs table name to lambda --- cloudformation.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/cloudformation.yml b/cloudformation.yml index 9088b671..c0967bb2 100644 --- a/cloudformation.yml +++ b/cloudformation.yml @@ -93,6 +93,7 @@ Resources: Environment: Variables: HYP3_API: !Ref Hyp3Api + JOBS_TABLE_NAME: !Ref Hyp3JobsTable LOGGING_LEVEL: !Ref LambdaLoggingLevel EARTHDATA_USERNAME: !Ref EarthdataUsername EARTHDATA_PASSWORD: !Ref EarthdataPassword From 5d75621e2cc50cdc3d5f2f7996e4e2101d51913a Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Thu, 21 Nov 2024 14:49:39 -0900 Subject: [PATCH 08/45] finish implementing query_jobs_by_status_code --- its_live_monitoring/src/main.py | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/its_live_monitoring/src/main.py b/its_live_monitoring/src/main.py index e2c2225f..c69aa5d5 100644 --- a/its_live_monitoring/src/main.py +++ b/its_live_monitoring/src/main.py @@ -16,7 +16,6 @@ import numpy as np import pandas as pd from boto3.dynamodb.conditions import Attr, Key -from dateutil.parser import parse from landsat import ( get_landsat_pairs_for_reference_scene, @@ -139,6 +138,9 @@ def format_time(time: datetime) -> str: return utc_time.isoformat(timespec='seconds') +# TODO: +# - add an `end` parameter? +# - unit tests def query_jobs_by_status_code(status_code: str, user: str, name: str, start: datetime.datetime) -> sdk.Batch: """Query dynamodb for jobs by status_code, then filter by user, name, and date. @@ -153,14 +155,13 @@ def query_jobs_by_status_code(status_code: str, user: str, name: str, start: dat """ table = dynamo.Table(os.environ['JOBS_TABLE_NAME']) - formatted_start = format_time(parse(start)) - key_expression = Key('status_code').eq(status_code) - key_expression &= Key('user_id').eq(user) - key_expression &= Key('request_time').geq(formatted_start) - filter_expression = Attr('job_id').exists() - filter_expression &= Attr('name').eq(name) + filter_expression = ( + Attr('user_id').eq(user) + & Attr('name').eq(name) + & Attr('request_time').gte(format_time(start)) + ) params = { 'IndexName': 'status_code', @@ -169,8 +170,14 @@ def query_jobs_by_status_code(status_code: str, user: str, name: str, start: dat 'ScanIndexForward': False, } - response = table.query(**params) - jobs = response['Items'] + jobs = [] + while True: + response = table.query(**params) + jobs.extend(response['Items']) + if (next_key := response.get('LastEvaluatedKey')) is None: + break + params['ExclusiveStartKey'] = next_key + return sdk.Batch([sdk.Job.from_dict(job) for job in jobs]) From 80a280236b3c106c37eabdc6038622e686638169 Mon Sep 17 00:00:00 2001 From: Joseph H Kennedy Date: Thu, 21 Nov 2024 18:21:45 -0900 Subject: [PATCH 09/45] handle missing microseconds gracefully --- CHANGELOG.md | 4 ++++ its_live_monitoring/src/landsat.py | 2 +- its_live_monitoring/src/sentinel2.py | 2 +- tests/conftest.py | 8 ++++---- tests/its_live_monitoring/test_landsat.py | 8 ++++++-- tests/its_live_monitoring/test_sentinel2.py | 8 ++++---- 6 files changed, 20 insertions(+), 12 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index da49d308..897185b7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,10 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [0.5.9] +### Fixed +- Mixed STAC Item datetime formats (e.g., occasionally not including microseconds) in the list of secondary scenes no longer causes a ValueError to be raised. + ## [0.5.8] ### Changed - As an incremental improvement to deduplication performance, its-live-monitoring now: diff --git a/its_live_monitoring/src/landsat.py b/its_live_monitoring/src/landsat.py index 07a5e37f..b2a6ff37 100644 --- a/its_live_monitoring/src/landsat.py +++ b/its_live_monitoring/src/landsat.py @@ -123,6 +123,6 @@ def get_landsat_pairs_for_reference_scene( features.append(feature) df = gpd.GeoDataFrame.from_features(features) - df['datetime'] = pd.to_datetime(df.datetime) + df['datetime'] = pd.to_datetime(df.datetime, format='ISO8601') return df diff --git a/its_live_monitoring/src/sentinel2.py b/its_live_monitoring/src/sentinel2.py index 5e69f618..79fda857 100644 --- a/its_live_monitoring/src/sentinel2.py +++ b/its_live_monitoring/src/sentinel2.py @@ -234,6 +234,6 @@ def get_sentinel2_pairs_for_reference_scene( features.append(feature) df = gpd.GeoDataFrame.from_features(features) - df['datetime'] = pd.to_datetime(df.datetime) + df['datetime'] = pd.to_datetime(df.datetime, format='ISO8601') return df diff --git a/tests/conftest.py b/tests/conftest.py index e52b911f..3cc3e0e0 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -16,19 +16,19 @@ def create_pystac_item( collection: str, geometry: dict | None = None, bbox: list | None = None, - assets: dict = None, + assets: dict | None = None, ) -> pystac.item.Item: if isinstance(datetime, str): datetime = date_parser(datetime) expected_item = pystac.item.Item( id=id, - geometry=geometry, + geometry=geometry if geometry is None else geometry.copy(), bbox=bbox, datetime=datetime, - properties=properties, + properties=properties.copy(), collection=collection, - assets=assets, + assets= assets if assets is None else assets.copy() ) return expected_item diff --git a/tests/its_live_monitoring/test_landsat.py b/tests/its_live_monitoring/test_landsat.py index 3c77aa1d..e769cbf6 100644 --- a/tests/its_live_monitoring/test_landsat.py +++ b/tests/its_live_monitoring/test_landsat.py @@ -1,5 +1,5 @@ from copy import deepcopy -from datetime import datetime +from datetime import datetime, timezone from unittest.mock import patch import landsat @@ -122,7 +122,11 @@ def test_get_landsat_pairs_for_reference_scene(mock_landsat_get_item, pystac_ite 'LC08_L1TP_138041_20240112_20240123_02_T1', 'LC09_L1TP_138041_20240104_20240104_02_T1', ] - sec_date_times = ['2024-01-20T04:30:03.658618Z', '2024-01-12T04:29:55.948514Z', '2024-01-04T04:30:03.184014Z'] + sec_date_times = [ + datetime(2024, 1, 20, 4, 30, 3, 658618, tzinfo=timezone.utc), + datetime(2024, 1, 12, 4, 29, 55, tzinfo=timezone.utc), + datetime(2024, 1, 4, 4, 30, 3, 184014, tzinfo=timezone.utc), + ] sec_items = [] for scene, date_time in zip(sec_scenes, sec_date_times): sec_items.append( diff --git a/tests/its_live_monitoring/test_sentinel2.py b/tests/its_live_monitoring/test_sentinel2.py index 91e1b146..3afd67a5 100644 --- a/tests/its_live_monitoring/test_sentinel2.py +++ b/tests/its_live_monitoring/test_sentinel2.py @@ -1,5 +1,5 @@ from copy import deepcopy -from datetime import datetime +from datetime import datetime, timezone from unittest.mock import patch import pystac @@ -192,9 +192,9 @@ def test_get_sentinel2_pairs_for_reference_scene( 'S2B_22TCR_20210528_0_L1C', ] sec_date_times = [ - '2024-05-28T00:00:00.000Z', - '2023-05-28T00:00:00.000Z', - '2021-05-28T00:00:00.000Z', + datetime(2024, 5, 28, 0, 0, 0, 0, tzinfo=timezone.utc), + datetime(2023, 5, 28, 0, 0, 0, tzinfo=timezone.utc), + datetime(2021, 5, 28, 0, 0, 0, 0, tzinfo=timezone.utc), ] sec_items = [] for scene, date_time in zip(sec_scenes, sec_date_times): From ec45408143bb50bd982cf7b3a11d039e0f3f4682 Mon Sep 17 00:00:00 2001 From: Joseph H Kennedy Date: Thu, 21 Nov 2024 19:24:02 -0900 Subject: [PATCH 10/45] whitespace --- tests/conftest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/conftest.py b/tests/conftest.py index 3cc3e0e0..7a588ecc 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -28,7 +28,7 @@ def create_pystac_item( datetime=datetime, properties=properties.copy(), collection=collection, - assets= assets if assets is None else assets.copy() + assets=assets if assets is None else assets.copy(), ) return expected_item From 20eb58fbdd1a3a75bdf1ac673397a602fecb2cbc Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Fri, 22 Nov 2024 09:56:05 -0900 Subject: [PATCH 11/45] Update its_live_monitoring/src/main.py --- its_live_monitoring/src/main.py | 1 - 1 file changed, 1 deletion(-) diff --git a/its_live_monitoring/src/main.py b/its_live_monitoring/src/main.py index c69aa5d5..7d9a1e76 100644 --- a/its_live_monitoring/src/main.py +++ b/its_live_monitoring/src/main.py @@ -139,7 +139,6 @@ def format_time(time: datetime) -> str: # TODO: -# - add an `end` parameter? # - unit tests def query_jobs_by_status_code(status_code: str, user: str, name: str, start: datetime.datetime) -> sdk.Batch: """Query dynamodb for jobs by status_code, then filter by user, name, and date. From db6d847c52fd6a216d3286ceedae62528fd0e199 Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Fri, 22 Nov 2024 14:15:28 -0500 Subject: [PATCH 12/45] add conftest --- Makefile | 1 + requirements-its_live_monitoring.txt | 1 + tests/cfg.env | 1 + tests/conftest.py | 45 ++++++++++++++++++++++++++++ 4 files changed, 48 insertions(+) create mode 100644 tests/cfg.env diff --git a/Makefile b/Makefile index f94ba947..93e5d151 100644 --- a/Makefile +++ b/Makefile @@ -13,6 +13,7 @@ install-lambda-deps: test_file ?= 'tests/' test: + export $$(xargs < tests/cfg.env); \ pytest $(test_file) landsat-integration: diff --git a/requirements-its_live_monitoring.txt b/requirements-its_live_monitoring.txt index 415fae97..1df2b1c0 100644 --- a/requirements-its_live_monitoring.txt +++ b/requirements-its_live_monitoring.txt @@ -5,3 +5,4 @@ pystac-client==0.8.5 requests==2.32.3 shapely==2.0.6 numpy==2.1.3 +moto==5.0.21 diff --git a/tests/cfg.env b/tests/cfg.env new file mode 100644 index 00000000..68736b1d --- /dev/null +++ b/tests/cfg.env @@ -0,0 +1 @@ +JOBS_TABLE_NAME=job-db-table diff --git a/tests/conftest.py b/tests/conftest.py index e52b911f..83f7c6e4 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,10 +1,13 @@ import datetime as dt +from os import environ from unittest.mock import NonCallableMock +import boto3 import hyp3_sdk as sdk import pystac import pytest from dateutil.parser import parse as date_parser +from moto import mock_aws @pytest.fixture @@ -62,3 +65,45 @@ def create_hyp3_batch(granules_list: list) -> sdk.Batch: return sdk.Batch([hyp3_job_factory(granules) for granules in granules_list]) return create_hyp3_batch + + +@mock_aws +@pytest.fixture +def tables(): + table_properties = { + 'BillingMode': 'PAY_PER_REQUEST', + 'AttributeDefinitions': [ + {'AttributeName': 'job_id', 'AttributeType': 'S'}, + {'AttributeName': 'user_id', 'AttributeType': 'S'}, + {'AttributeName': 'status_code', 'AttributeType': 'S'}, + {'AttributeName': 'request_time', 'AttributeType': 'S'}, + ], + 'KeySchema': [{'AttributeName': 'job_id', 'KeyType': 'HASH'}], + 'GlobalSecondaryIndexes': [ + { + 'IndexName': 'user_id', + 'KeySchema': [ + {'AttributeName': 'user_id', 'KeyType': 'HASH'}, + {'AttributeName': 'request_time', 'KeyType': 'RANGE'}, + ], + 'Projection': {'ProjectionType': 'ALL'}, + }, + { + 'IndexName': 'status_code', + 'KeySchema': [{'AttributeName': 'status_code', 'KeyType': 'HASH'}], + 'Projection': {'ProjectionType': 'ALL'}, + }, + ], + } + + with mock_aws(): + dynamo = boto3.resource('dynamodb') + + class Tables: + jobs_table = dynamo.create_table( + TableName=environ['JOBS_TABLE_NAME'], + **table_properties, + ) + + tables = Tables() + yield tables From 9d3a512e6f241a9bd29e03d2a4bd008a1e032c01 Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Fri, 22 Nov 2024 14:39:08 -0500 Subject: [PATCH 13/45] added `test_query_jobs_by_status_code` --- tests/its_live_monitoring/test_main.py | 77 ++++++++++++++++++++++++++ 1 file changed, 77 insertions(+) diff --git a/tests/its_live_monitoring/test_main.py b/tests/its_live_monitoring/test_main.py index 7cbc07b2..e2f63ba6 100644 --- a/tests/its_live_monitoring/test_main.py +++ b/tests/its_live_monitoring/test_main.py @@ -1,3 +1,4 @@ +import datetime from unittest.mock import patch import geopandas as gpd @@ -136,3 +137,79 @@ def test_submit_pairs_for_processing(mock_submit_prepared_jobs, hyp3_batch_facto mock_submit_prepared_jobs.side_effect = [landsat_jobs] jobs = main.submit_pairs_for_processing(landsat_pairs) assert jobs == landsat_jobs + + +def test_query_jobs_by_status_code(tables): + its_live_user = 'hyp3.its_live' + + table_items = [ + { + 'job_id': 'job1', + 'user_id': its_live_user, + 'status_code': 'PENDING', + 'request_time': '2024-01-29T00:00:00.000000Z', + 'job_type': 'AUTORIFT', + 'name': 'LC09_L1TP_138041_20240120_20240120_02_T1', + 'job_parameters': { + 'reference': 'LC09_L1TP_138041_20240120_20240120_02_T1', + 'reference_acquisition': '2024-01-28T04:29:49.361022Z', + 'secondary': 'LC08_L1TP_138041_20240112_20240123_02_T1', + }, + }, + { + 'job_id': 'job2', + 'user_id': 'other-user', + 'status_code': 'RUNNING', + 'request_time': '2024-01-29T00:00:00+00:00', + 'job_type': 'AUTORIFT', + 'name': 'LC09_L1TP_138041_20240120_20240120_02_T1', + 'job_parameters': { + 'reference': 'LC09_L1TP_138041_20240120_20240120_02_T1', + 'reference_acquisition': '2024-01-28T04:29:49.361022Z', + 'secondary': 'LC08_L1TP_138041_20240112_20240123_02_T1', + }, + }, + ] + + for item in table_items: + tables.jobs_table.put_item(Item=item) + + jobs = main.query_jobs_by_status_code( + 'PENDING', + its_live_user, + 'LC09_L1TP_138041_20240120_20240120_02_T1', + datetime.datetime.fromisoformat('2024-01-28T00:00:00.000000Z'), + ) + assert jobs == sdk.Batch([sdk.Job.from_dict(table_items[0])]) + + jobs = main.query_jobs_by_status_code( + 'RUNNING', + 'other-user', + 'LC09_L1TP_138041_20240120_20240120_02_T1', + datetime.datetime.fromisoformat('2024-01-01T00:00:00+00:00'), + ) + assert jobs == sdk.Batch([sdk.Job.from_dict(table_items[1])]) + + jobs = main.query_jobs_by_status_code( + 'PENDING', + its_live_user, + 'LC09_L1TP_138041_20240120_20240120_02_T1', + datetime.datetime.fromisoformat('2024-01-30T00:00:00.000000Z'), + ) + assert jobs == sdk.Batch([]) + + jobs = main.query_jobs_by_status_code( + 'RUNNING', + its_live_user, + 'LC09_L1TP_138041_20240120_20240120_02_T1', + datetime.datetime.fromisoformat('2024-01-28T00:00:00.000000Z'), + ) + assert jobs == sdk.Batch([]) + + jobs = main.query_jobs_by_status_code( + 'SUCCEEDED', + 'non-existant-user', + 'non-existant-granule', + datetime.datetime.fromisoformat('2000-01-01T00:00:00.000000Z'), + ) + assert jobs == sdk.Batch([]) From d9b14970b844fbd3b714ed07e27ce3be1cae8fc9 Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Fri, 22 Nov 2024 14:40:54 -0500 Subject: [PATCH 14/45] removed unnecessary params --- tests/its_live_monitoring/test_main.py | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/tests/its_live_monitoring/test_main.py b/tests/its_live_monitoring/test_main.py index e2f63ba6..7522449a 100644 --- a/tests/its_live_monitoring/test_main.py +++ b/tests/its_live_monitoring/test_main.py @@ -150,11 +150,6 @@ def test_query_jobs_by_status_code(tables): 'request_time': '2024-01-29T00:00:00.000000Z', 'job_type': 'AUTORIFT', 'name': 'LC09_L1TP_138041_20240120_20240120_02_T1', - 'job_parameters': { - 'reference': 'LC09_L1TP_138041_20240120_20240120_02_T1', - 'reference_acquisition': '2024-01-28T04:29:49.361022Z', - 'secondary': 'LC08_L1TP_138041_20240112_20240123_02_T1', - }, }, { 'job_id': 'job2', @@ -163,11 +158,6 @@ def test_query_jobs_by_status_code(tables): 'request_time': '2024-01-29T00:00:00+00:00', 'job_type': 'AUTORIFT', 'name': 'LC09_L1TP_138041_20240120_20240120_02_T1', - 'job_parameters': { - 'reference': 'LC09_L1TP_138041_20240120_20240120_02_T1', - 'reference_acquisition': '2024-01-28T04:29:49.361022Z', - 'secondary': 'LC08_L1TP_138041_20240112_20240123_02_T1', - }, }, ] From 8b4d31c05bb59be58a517615c599a6816de510e6 Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Fri, 22 Nov 2024 10:45:12 -0900 Subject: [PATCH 15/45] add moto to requirements --- requirements-all.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements-all.txt b/requirements-all.txt index f15dc5da..762c0021 100644 --- a/requirements-all.txt +++ b/requirements-all.txt @@ -4,3 +4,4 @@ cfn-lint==1.19.0 ruff==0.7.4 pytest==8.3.3 responses==0.25.3 +moto==5.0.21 From 3792a528ba420ff250a8e2a098edf1f52577b71e Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Fri, 22 Nov 2024 10:46:09 -0900 Subject: [PATCH 16/45] remove duplicate moto --- requirements-all.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/requirements-all.txt b/requirements-all.txt index 762c0021..f15dc5da 100644 --- a/requirements-all.txt +++ b/requirements-all.txt @@ -4,4 +4,3 @@ cfn-lint==1.19.0 ruff==0.7.4 pytest==8.3.3 responses==0.25.3 -moto==5.0.21 From 054168b8ae37c42b15f04a7be6ddebb950939756 Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Fri, 22 Nov 2024 14:46:27 -0500 Subject: [PATCH 17/45] added extra jobs to ensure filtering is working --- tests/its_live_monitoring/test_main.py | 60 ++++++++++++++++---------- 1 file changed, 38 insertions(+), 22 deletions(-) diff --git a/tests/its_live_monitoring/test_main.py b/tests/its_live_monitoring/test_main.py index 7522449a..a1ee627f 100644 --- a/tests/its_live_monitoring/test_main.py +++ b/tests/its_live_monitoring/test_main.py @@ -62,31 +62,31 @@ def test_get_key(mock_list_objects_v2): assert main.get_key(['N00E000', 'N00E010'], 'latest', 'earliest') == 'N00E010/earliest_X_latest_G0120V02_P000.nc' -@patch('main.HYP3.find_jobs') -def test_deduplicate_hyp3_pairs(mock_find_jobs, hyp3_batch_factory): - sec_scenes = [ - 'LC09_L1TP_138041_20240120_20240120_02_T1', - 'LC08_L1TP_138041_20240112_20240123_02_T1', - 'LC09_L1TP_138041_20240104_20240104_02_T1', - ] - ref_scenes = ['LC08_L1TP_138041_20240128_20240207_02_T1'] * 3 - ref_acquisitions = ['2024-01-28T04:29:49.361022Z'] * 3 +# @patch('main.HYP3.find_jobs') +# def test_deduplicate_hyp3_pairs(mock_find_jobs, hyp3_batch_factory): +# sec_scenes = [ +# 'LC09_L1TP_138041_20240120_20240120_02_T1', +# 'LC08_L1TP_138041_20240112_20240123_02_T1', +# 'LC09_L1TP_138041_20240104_20240104_02_T1', +# ] +# ref_scenes = ['LC08_L1TP_138041_20240128_20240207_02_T1'] * 3 +# ref_acquisitions = ['2024-01-28T04:29:49.361022Z'] * 3 - landsat_pairs = gpd.GeoDataFrame( - {'reference': ref_scenes, 'secondary': sec_scenes, 'reference_acquisition': ref_acquisitions} - ) +# landsat_pairs = gpd.GeoDataFrame( +# {'reference': ref_scenes, 'secondary': sec_scenes, 'reference_acquisition': ref_acquisitions} +# ) - mock_find_jobs.side_effect = [sdk.Batch(), sdk.Batch()] - pairs = main.deduplicate_hyp3_pairs(landsat_pairs) - assert pairs.equals(landsat_pairs) +# mock_find_jobs.side_effect = [sdk.Batch(), sdk.Batch()] +# pairs = main.deduplicate_hyp3_pairs(landsat_pairs) +# assert pairs.equals(landsat_pairs) - mock_find_jobs.side_effect = [hyp3_batch_factory(zip(ref_scenes, sec_scenes)), sdk.Batch()] - pairs = main.deduplicate_hyp3_pairs(landsat_pairs) - assert len(pairs) == 0 +# mock_find_jobs.side_effect = [hyp3_batch_factory(zip(ref_scenes, sec_scenes)), sdk.Batch()] +# pairs = main.deduplicate_hyp3_pairs(landsat_pairs) +# assert len(pairs) == 0 - mock_find_jobs.side_effect = [hyp3_batch_factory(zip(ref_scenes[:-1], sec_scenes[:-1])), sdk.Batch()] - pairs = main.deduplicate_hyp3_pairs(landsat_pairs) - assert len(pairs) == 1 +# mock_find_jobs.side_effect = [hyp3_batch_factory(zip(ref_scenes[:-1], sec_scenes[:-1])), sdk.Batch()] +# pairs = main.deduplicate_hyp3_pairs(landsat_pairs) +# assert len(pairs) == 1 @patch('main.get_key') @@ -153,6 +153,22 @@ def test_query_jobs_by_status_code(tables): }, { 'job_id': 'job2', + 'user_id': its_live_user, + 'status_code': 'PENDING', + 'request_time': '2024-01-01T00:00:00.000000Z', + 'job_type': 'AUTORIFT', + 'name': 'LC09_L1TP_138041_20240120_20240120_02_T1', + }, + { + 'job_id': 'job3', + 'user_id': 'other-user', + 'status_code': 'PENDING', + 'request_time': '2024-01-29T00:00:00.000000Z', + 'job_type': 'AUTORIFT', + 'name': 'LC09_L1TP_138041_20240120_20240120_02_T1', + }, + { + 'job_id': 'job4', 'user_id': 'other-user', 'status_code': 'RUNNING', 'request_time': '2024-01-29T00:00:00+00:00', @@ -178,7 +194,7 @@ def test_query_jobs_by_status_code(tables): 'LC09_L1TP_138041_20240120_20240120_02_T1', datetime.datetime.fromisoformat('2024-01-01T00:00:00+00:00'), ) - assert jobs == sdk.Batch([sdk.Job.from_dict(table_items[1])]) + assert jobs == sdk.Batch([sdk.Job.from_dict(table_items[3])]) jobs = main.query_jobs_by_status_code( 'PENDING', From e6342bc9c24b0003cba67f867b36a2d92a6291f0 Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Fri, 22 Nov 2024 14:46:52 -0500 Subject: [PATCH 18/45] uncommented --- tests/its_live_monitoring/test_main.py | 50 +++++++++++++------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/tests/its_live_monitoring/test_main.py b/tests/its_live_monitoring/test_main.py index a1ee627f..94359eaa 100644 --- a/tests/its_live_monitoring/test_main.py +++ b/tests/its_live_monitoring/test_main.py @@ -62,31 +62,31 @@ def test_get_key(mock_list_objects_v2): assert main.get_key(['N00E000', 'N00E010'], 'latest', 'earliest') == 'N00E010/earliest_X_latest_G0120V02_P000.nc' -# @patch('main.HYP3.find_jobs') -# def test_deduplicate_hyp3_pairs(mock_find_jobs, hyp3_batch_factory): -# sec_scenes = [ -# 'LC09_L1TP_138041_20240120_20240120_02_T1', -# 'LC08_L1TP_138041_20240112_20240123_02_T1', -# 'LC09_L1TP_138041_20240104_20240104_02_T1', -# ] -# ref_scenes = ['LC08_L1TP_138041_20240128_20240207_02_T1'] * 3 -# ref_acquisitions = ['2024-01-28T04:29:49.361022Z'] * 3 - -# landsat_pairs = gpd.GeoDataFrame( -# {'reference': ref_scenes, 'secondary': sec_scenes, 'reference_acquisition': ref_acquisitions} -# ) - -# mock_find_jobs.side_effect = [sdk.Batch(), sdk.Batch()] -# pairs = main.deduplicate_hyp3_pairs(landsat_pairs) -# assert pairs.equals(landsat_pairs) - -# mock_find_jobs.side_effect = [hyp3_batch_factory(zip(ref_scenes, sec_scenes)), sdk.Batch()] -# pairs = main.deduplicate_hyp3_pairs(landsat_pairs) -# assert len(pairs) == 0 - -# mock_find_jobs.side_effect = [hyp3_batch_factory(zip(ref_scenes[:-1], sec_scenes[:-1])), sdk.Batch()] -# pairs = main.deduplicate_hyp3_pairs(landsat_pairs) -# assert len(pairs) == 1 +@patch('main.HYP3.find_jobs') +def test_deduplicate_hyp3_pairs(mock_find_jobs, hyp3_batch_factory): + sec_scenes = [ + 'LC09_L1TP_138041_20240120_20240120_02_T1', + 'LC08_L1TP_138041_20240112_20240123_02_T1', + 'LC09_L1TP_138041_20240104_20240104_02_T1', + ] + ref_scenes = ['LC08_L1TP_138041_20240128_20240207_02_T1'] * 3 + ref_acquisitions = ['2024-01-28T04:29:49.361022Z'] * 3 + + landsat_pairs = gpd.GeoDataFrame( + {'reference': ref_scenes, 'secondary': sec_scenes, 'reference_acquisition': ref_acquisitions} + ) + + mock_find_jobs.side_effect = [sdk.Batch(), sdk.Batch()] + pairs = main.deduplicate_hyp3_pairs(landsat_pairs) + assert pairs.equals(landsat_pairs) + + mock_find_jobs.side_effect = [hyp3_batch_factory(zip(ref_scenes, sec_scenes)), sdk.Batch()] + pairs = main.deduplicate_hyp3_pairs(landsat_pairs) + assert len(pairs) == 0 + + mock_find_jobs.side_effect = [hyp3_batch_factory(zip(ref_scenes[:-1], sec_scenes[:-1])), sdk.Batch()] + pairs = main.deduplicate_hyp3_pairs(landsat_pairs) + assert len(pairs) == 1 @patch('main.get_key') From 600e6cf8ee502e09edef11a98ce0250d23dbd33d Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Fri, 22 Nov 2024 10:48:58 -0900 Subject: [PATCH 19/45] update test_deduplicate_hyp3_pairs --- tests/its_live_monitoring/test_main.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/its_live_monitoring/test_main.py b/tests/its_live_monitoring/test_main.py index 7522449a..0cedc2cc 100644 --- a/tests/its_live_monitoring/test_main.py +++ b/tests/its_live_monitoring/test_main.py @@ -62,8 +62,8 @@ def test_get_key(mock_list_objects_v2): assert main.get_key(['N00E000', 'N00E010'], 'latest', 'earliest') == 'N00E010/earliest_X_latest_G0120V02_P000.nc' -@patch('main.HYP3.find_jobs') -def test_deduplicate_hyp3_pairs(mock_find_jobs, hyp3_batch_factory): +@patch('main.query_jobs_by_status_code') +def test_deduplicate_hyp3_pairs(mock_query_jobs_by_status_code, hyp3_batch_factory): sec_scenes = [ 'LC09_L1TP_138041_20240120_20240120_02_T1', 'LC08_L1TP_138041_20240112_20240123_02_T1', @@ -76,15 +76,15 @@ def test_deduplicate_hyp3_pairs(mock_find_jobs, hyp3_batch_factory): {'reference': ref_scenes, 'secondary': sec_scenes, 'reference_acquisition': ref_acquisitions} ) - mock_find_jobs.side_effect = [sdk.Batch(), sdk.Batch()] + mock_query_jobs_by_status_code.side_effect = [sdk.Batch(), sdk.Batch()] pairs = main.deduplicate_hyp3_pairs(landsat_pairs) assert pairs.equals(landsat_pairs) - mock_find_jobs.side_effect = [hyp3_batch_factory(zip(ref_scenes, sec_scenes)), sdk.Batch()] + mock_query_jobs_by_status_code.side_effect = [hyp3_batch_factory(zip(ref_scenes, sec_scenes)), sdk.Batch()] pairs = main.deduplicate_hyp3_pairs(landsat_pairs) assert len(pairs) == 0 - mock_find_jobs.side_effect = [hyp3_batch_factory(zip(ref_scenes[:-1], sec_scenes[:-1])), sdk.Batch()] + mock_query_jobs_by_status_code.side_effect = [hyp3_batch_factory(zip(ref_scenes[:-1], sec_scenes[:-1])), sdk.Batch()] pairs = main.deduplicate_hyp3_pairs(landsat_pairs) assert len(pairs) == 1 From 13051a69dc68031e8569cb7c7d6eeb45c2e31253 Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Fri, 22 Nov 2024 10:51:11 -0900 Subject: [PATCH 20/45] remove todo --- its_live_monitoring/src/main.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/its_live_monitoring/src/main.py b/its_live_monitoring/src/main.py index 7d9a1e76..3e94588a 100644 --- a/its_live_monitoring/src/main.py +++ b/its_live_monitoring/src/main.py @@ -138,8 +138,6 @@ def format_time(time: datetime) -> str: return utc_time.isoformat(timespec='seconds') -# TODO: -# - unit tests def query_jobs_by_status_code(status_code: str, user: str, name: str, start: datetime.datetime) -> sdk.Batch: """Query dynamodb for jobs by status_code, then filter by user, name, and date. From e8341750402ecfa9cc7d473b5c2b19631c150fa7 Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Fri, 22 Nov 2024 15:20:19 -0500 Subject: [PATCH 21/45] consistant times and extra name case --- tests/its_live_monitoring/test_main.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/tests/its_live_monitoring/test_main.py b/tests/its_live_monitoring/test_main.py index 1b1bed0c..6886c1be 100644 --- a/tests/its_live_monitoring/test_main.py +++ b/tests/its_live_monitoring/test_main.py @@ -151,6 +151,14 @@ def test_query_jobs_by_status_code(tables): 'job_type': 'AUTORIFT', 'name': 'LC09_L1TP_138041_20240120_20240120_02_T1', }, + { + 'job_id': 'job1', + 'user_id': its_live_user, + 'status_code': 'PENDING', + 'request_time': '2024-01-29T00:00:00.000000Z', + 'job_type': 'AUTORIFT', + 'name': 'LC09_L1TP_138041_20240120_20240120_02_T2', + }, { 'job_id': 'job2', 'user_id': its_live_user, @@ -184,7 +192,7 @@ def test_query_jobs_by_status_code(tables): 'PENDING', its_live_user, 'LC09_L1TP_138041_20240120_20240120_02_T1', - datetime.datetime.fromisoformat('2024-01-28T00:00:00.000000Z'), + datetime.datetime.fromisoformat('2024-01-28T00:00:00+00:00'), ) assert jobs == sdk.Batch([sdk.Job.from_dict(table_items[0])]) @@ -200,7 +208,7 @@ def test_query_jobs_by_status_code(tables): 'PENDING', its_live_user, 'LC09_L1TP_138041_20240120_20240120_02_T1', - datetime.datetime.fromisoformat('2024-01-30T00:00:00.000000Z'), + datetime.datetime.fromisoformat('2024-01-30T00:00:00+00:00'), ) assert jobs == sdk.Batch([]) @@ -208,7 +216,7 @@ def test_query_jobs_by_status_code(tables): 'RUNNING', its_live_user, 'LC09_L1TP_138041_20240120_20240120_02_T1', - datetime.datetime.fromisoformat('2024-01-28T00:00:00.000000Z'), + datetime.datetime.fromisoformat('2024-01-28T00:00:00+00:00'), ) assert jobs == sdk.Batch([]) @@ -216,6 +224,6 @@ def test_query_jobs_by_status_code(tables): 'SUCCEEDED', 'non-existant-user', 'non-existant-granule', - datetime.datetime.fromisoformat('2000-01-01T00:00:00.000000Z'), + datetime.datetime.fromisoformat('2000-01-01T00:00:00+00:00'), ) assert jobs == sdk.Batch([]) From a9d63779195f81eabfdd3172700ad5f77f5af65e Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Fri, 22 Nov 2024 11:22:16 -0900 Subject: [PATCH 22/45] fix ruff warning --- its_live_monitoring/src/main.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/its_live_monitoring/src/main.py b/its_live_monitoring/src/main.py index 3e94588a..dd890a56 100644 --- a/its_live_monitoring/src/main.py +++ b/its_live_monitoring/src/main.py @@ -7,7 +7,6 @@ import os import sys from datetime import timezone -from typing import Iterable import boto3 import botocore.config @@ -71,7 +70,7 @@ def regions_from_bounds(min_lon: float, min_lat: float, max_lon: float, max_lat: return {point_to_region(lat, lon) for lat, lon in zip(lats.ravel(), lons.ravel())} -def get_key(tile_prefixes: Iterable[str], reference: str, secondary: str) -> str | None: +def get_key(tile_prefixes: list[str], reference: str, secondary: str) -> str | None: """Search S3 for the key of a processed pair. Args: From c85f3f6d72887c61b5aa766f9c475576f111b354 Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Fri, 22 Nov 2024 15:22:56 -0500 Subject: [PATCH 23/45] fixes --- tests/its_live_monitoring/test_main.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/tests/its_live_monitoring/test_main.py b/tests/its_live_monitoring/test_main.py index 6886c1be..0101a257 100644 --- a/tests/its_live_monitoring/test_main.py +++ b/tests/its_live_monitoring/test_main.py @@ -147,36 +147,36 @@ def test_query_jobs_by_status_code(tables): 'job_id': 'job1', 'user_id': its_live_user, 'status_code': 'PENDING', - 'request_time': '2024-01-29T00:00:00.000000Z', + 'request_time': '2024-01-29T00:00:00+00:00', 'job_type': 'AUTORIFT', 'name': 'LC09_L1TP_138041_20240120_20240120_02_T1', }, { - 'job_id': 'job1', + 'job_id': 'job2', 'user_id': its_live_user, 'status_code': 'PENDING', - 'request_time': '2024-01-29T00:00:00.000000Z', + 'request_time': '2024-01-29T00:00:00+00:00', 'job_type': 'AUTORIFT', 'name': 'LC09_L1TP_138041_20240120_20240120_02_T2', }, { - 'job_id': 'job2', + 'job_id': 'job3', 'user_id': its_live_user, 'status_code': 'PENDING', - 'request_time': '2024-01-01T00:00:00.000000Z', + 'request_time': '2024-01-01T00:00:00+00:00', 'job_type': 'AUTORIFT', 'name': 'LC09_L1TP_138041_20240120_20240120_02_T1', }, { - 'job_id': 'job3', + 'job_id': 'job4', 'user_id': 'other-user', 'status_code': 'PENDING', - 'request_time': '2024-01-29T00:00:00.000000Z', + 'request_time': '2024-01-29T00:00:00+00:00', 'job_type': 'AUTORIFT', 'name': 'LC09_L1TP_138041_20240120_20240120_02_T1', }, { - 'job_id': 'job4', + 'job_id': 'job5', 'user_id': 'other-user', 'status_code': 'RUNNING', 'request_time': '2024-01-29T00:00:00+00:00', @@ -202,7 +202,7 @@ def test_query_jobs_by_status_code(tables): 'LC09_L1TP_138041_20240120_20240120_02_T1', datetime.datetime.fromisoformat('2024-01-01T00:00:00+00:00'), ) - assert jobs == sdk.Batch([sdk.Job.from_dict(table_items[3])]) + assert jobs == sdk.Batch([sdk.Job.from_dict(table_items[4])]) jobs = main.query_jobs_by_status_code( 'PENDING', From 983e34eed7abed5e04465a536612138a759df303 Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Fri, 22 Nov 2024 15:23:11 -0500 Subject: [PATCH 24/45] ruff --- tests/its_live_monitoring/test_main.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/its_live_monitoring/test_main.py b/tests/its_live_monitoring/test_main.py index 0101a257..aa8b1e3f 100644 --- a/tests/its_live_monitoring/test_main.py +++ b/tests/its_live_monitoring/test_main.py @@ -84,7 +84,10 @@ def test_deduplicate_hyp3_pairs(mock_query_jobs_by_status_code, hyp3_batch_facto pairs = main.deduplicate_hyp3_pairs(landsat_pairs) assert len(pairs) == 0 - mock_query_jobs_by_status_code.side_effect = [hyp3_batch_factory(zip(ref_scenes[:-1], sec_scenes[:-1])), sdk.Batch()] + mock_query_jobs_by_status_code.side_effect = [ + hyp3_batch_factory(zip(ref_scenes[:-1], sec_scenes[:-1])), + sdk.Batch(), + ] pairs = main.deduplicate_hyp3_pairs(landsat_pairs) assert len(pairs) == 1 From 0a21ae65f7788a52f286b07b61b6140646373c5f Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Fri, 22 Nov 2024 15:24:36 -0500 Subject: [PATCH 25/45] ruff --- its_live_monitoring/src/main.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/its_live_monitoring/src/main.py b/its_live_monitoring/src/main.py index dd890a56..72047fe6 100644 --- a/its_live_monitoring/src/main.py +++ b/its_live_monitoring/src/main.py @@ -153,11 +153,7 @@ def query_jobs_by_status_code(status_code: str, user: str, name: str, start: dat key_expression = Key('status_code').eq(status_code) - filter_expression = ( - Attr('user_id').eq(user) - & Attr('name').eq(name) - & Attr('request_time').gte(format_time(start)) - ) + filter_expression = Attr('user_id').eq(user) & Attr('name').eq(name) & Attr('request_time').gte(format_time(start)) params = { 'IndexName': 'status_code', From 19e13bd486a679f16e099992f3d6bc8888738094 Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Fri, 22 Nov 2024 15:27:21 -0500 Subject: [PATCH 26/45] add default aws region for tests --- tests/cfg.env | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/cfg.env b/tests/cfg.env index 68736b1d..194c8286 100644 --- a/tests/cfg.env +++ b/tests/cfg.env @@ -1 +1,2 @@ JOBS_TABLE_NAME=job-db-table +AWS_DEFAULT_REGION=us-west-2 From feaa201dac656644dac6d896c7079cacf00c545f Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Fri, 22 Nov 2024 15:29:06 -0500 Subject: [PATCH 27/45] fake creds for testing --- tests/cfg.env | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/cfg.env b/tests/cfg.env index 194c8286..97ca923d 100644 --- a/tests/cfg.env +++ b/tests/cfg.env @@ -1,2 +1,6 @@ JOBS_TABLE_NAME=job-db-table AWS_DEFAULT_REGION=us-west-2 +AWS_ACCESS_KEY_ID=testing +AWS_SECRET_ACCESS_KEY=testing +AWS_SECURITY_TOKEN=testing +AWS_SESSION_TOKEN=testing From a8057fde756097215d1d06a6f420f3ee1639cd34 Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Fri, 22 Nov 2024 15:38:32 -0500 Subject: [PATCH 28/45] equal acquisition time case --- tests/its_live_monitoring/test_main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/its_live_monitoring/test_main.py b/tests/its_live_monitoring/test_main.py index aa8b1e3f..14405af2 100644 --- a/tests/its_live_monitoring/test_main.py +++ b/tests/its_live_monitoring/test_main.py @@ -150,7 +150,7 @@ def test_query_jobs_by_status_code(tables): 'job_id': 'job1', 'user_id': its_live_user, 'status_code': 'PENDING', - 'request_time': '2024-01-29T00:00:00+00:00', + 'request_time': '2024-01-28T00:00:00+00:00', 'job_type': 'AUTORIFT', 'name': 'LC09_L1TP_138041_20240120_20240120_02_T1', }, From 9d5fc6ceaeeca67ab2ae9c885c61ade38a2a8940 Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Fri, 22 Nov 2024 11:45:21 -0900 Subject: [PATCH 29/45] fix datetime type hints --- its_live_monitoring/src/main.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/its_live_monitoring/src/main.py b/its_live_monitoring/src/main.py index 72047fe6..7e19fee3 100644 --- a/its_live_monitoring/src/main.py +++ b/its_live_monitoring/src/main.py @@ -1,12 +1,11 @@ """Lambda function to trigger low-latency Landsat and Sentinel-2 processing from newly acquired scenes.""" import argparse -import datetime import json import logging import os import sys -from datetime import timezone +from datetime import datetime, timezone import boto3 import botocore.config @@ -137,7 +136,7 @@ def format_time(time: datetime) -> str: return utc_time.isoformat(timespec='seconds') -def query_jobs_by_status_code(status_code: str, user: str, name: str, start: datetime.datetime) -> sdk.Batch: +def query_jobs_by_status_code(status_code: str, user: str, name: str, start: datetime) -> sdk.Batch: """Query dynamodb for jobs by status_code, then filter by user, name, and date. Args: From df48e7b73ea8b861b2c80a78b29a4160856aca84 Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Fri, 22 Nov 2024 14:12:10 -0900 Subject: [PATCH 30/45] Update CHANGELOG.md --- CHANGELOG.md | 1 - 1 file changed, 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 03b461c8..1093355d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,7 +5,6 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ## [0.5.9] - ### Changed - Improved deduplication performance by searching HyP3's dynamodb directly for `PENDING` and `RUNNING` jobs, instead of using the `hyp3_sdk`. From 57aef156756fbfeeae02d7908737ed578df1399d Mon Sep 17 00:00:00 2001 From: Joseph H Kennedy Date: Fri, 22 Nov 2024 15:01:55 -0900 Subject: [PATCH 31/45] deepcopy to be extra careful instead of copy so we just don't have to think about this again --- tests/conftest.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 64701edb..f80c6e8a 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,5 +1,5 @@ import datetime as dt -from os import environ +from copy import deepcopy from unittest.mock import NonCallableMock import boto3 @@ -26,12 +26,12 @@ def create_pystac_item( expected_item = pystac.item.Item( id=id, - geometry=geometry if geometry is None else geometry.copy(), + geometry=geometry if geometry is None else deepcopy(geometry), bbox=bbox, datetime=datetime, - properties=properties.copy(), + properties=deepcopy(properties), collection=collection, - assets=assets if assets is None else assets.copy(), + assets=assets if assets is None else deepcopy(assets), ) return expected_item From 2d42e54d5367f4283266a834ff6a6327fe4555e1 Mon Sep 17 00:00:00 2001 From: Joseph H Kennedy Date: Fri, 22 Nov 2024 15:05:39 -0900 Subject: [PATCH 32/45] add back dropped import (how?) --- tests/conftest.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/conftest.py b/tests/conftest.py index f80c6e8a..7e442d63 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,5 +1,6 @@ import datetime as dt from copy import deepcopy +from os import environ from unittest.mock import NonCallableMock import boto3 From 2fc8d4ad99c535422c22279d58fc4c674379d032 Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Fri, 22 Nov 2024 15:13:39 -0900 Subject: [PATCH 33/45] fix requirements --- requirements-all.txt | 1 + requirements-its_live_monitoring.txt | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/requirements-all.txt b/requirements-all.txt index f15dc5da..762c0021 100644 --- a/requirements-all.txt +++ b/requirements-all.txt @@ -4,3 +4,4 @@ cfn-lint==1.19.0 ruff==0.7.4 pytest==8.3.3 responses==0.25.3 +moto==5.0.21 diff --git a/requirements-its_live_monitoring.txt b/requirements-its_live_monitoring.txt index 1df2b1c0..90ddf847 100644 --- a/requirements-its_live_monitoring.txt +++ b/requirements-its_live_monitoring.txt @@ -5,4 +5,4 @@ pystac-client==0.8.5 requests==2.32.3 shapely==2.0.6 numpy==2.1.3 -moto==5.0.21 +boto3==1.35.68 From b84801d483015c7324a19166ae3a3e8b445b8e88 Mon Sep 17 00:00:00 2001 From: Joseph H Kennedy Date: Fri, 22 Nov 2024 15:29:49 -0900 Subject: [PATCH 34/45] move setup python and aws credentials actions workflow steps out of custom action so dependabot can see them --- .github/actions/deploy/action.yml | 14 -------------- .github/workflows/deploy-prod.yml | 11 +++++++++++ .github/workflows/deploy-test.yml | 13 +++++++++++-- 3 files changed, 22 insertions(+), 16 deletions(-) diff --git a/.github/actions/deploy/action.yml b/.github/actions/deploy/action.yml index cfa0e32c..652ea521 100644 --- a/.github/actions/deploy/action.yml +++ b/.github/actions/deploy/action.yml @@ -4,10 +4,6 @@ description: Deploy the ITS_LIVE Monitoring system inputs: STACK_NAME: required: true - AWS_ACCESS_KEY_ID: - required: true - AWS_SECRET_ACCESS_KEY: - required: true CF_TEMPLATE_BUCKET: required: true LANDSAT_TOPIC_ARN: @@ -32,16 +28,6 @@ inputs: runs: using: composite steps: - - uses: aws-actions/configure-aws-credentials@v1 - with: - aws-access-key-id: ${{ inputs.AWS_ACCESS_KEY_ID }} - aws-secret-access-key: ${{ inputs.AWS_SECRET_ACCESS_KEY }} - aws-region: us-west-2 - - - uses: actions/setup-python@v3 - with: - python-version: 3.12 - - name: Install shell: bash run: make install-lambda-deps diff --git a/.github/workflows/deploy-prod.yml b/.github/workflows/deploy-prod.yml index fa5adc86..5c2adcc5 100644 --- a/.github/workflows/deploy-prod.yml +++ b/.github/workflows/deploy-prod.yml @@ -14,6 +14,17 @@ jobs: steps: - uses: actions/checkout@v4 + + - uses: aws-actions/configure-aws-credentials@v1 + with: + aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} + aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + aws-region: us-west-2 + + - uses: actions/setup-python@v3 + with: + python-version: 3.12 + - uses: ./.github/actions/deploy with: STACK_NAME: its-live-monitoring-prod diff --git a/.github/workflows/deploy-test.yml b/.github/workflows/deploy-test.yml index 20eec4dc..2b50b0cb 100644 --- a/.github/workflows/deploy-test.yml +++ b/.github/workflows/deploy-test.yml @@ -14,11 +14,20 @@ jobs: steps: - uses: actions/checkout@v4 + + - uses: aws-actions/configure-aws-credentials@v1 + with: + aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} + aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + aws-region: us-west-2 + + - uses: actions/setup-python@v3 + with: + python-version: 3.12 + - uses: ./.github/actions/deploy with: STACK_NAME: its-live-monitoring-test - AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} - AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} EARTHDATA_USERNAME: ${{ secrets.EARTHDATA_USERNAME }} EARTHDATA_PASSWORD: ${{ secrets.EARTHDATA_PASSWORD }} CF_TEMPLATE_BUCKET: cf-templates-3o5lnspmwmzg-us-west-2 From 1fcebefbce750ae8ea9c2a851183d7c935638b9e Mon Sep 17 00:00:00 2001 From: Joseph H Kennedy Date: Fri, 22 Nov 2024 15:31:27 -0900 Subject: [PATCH 35/45] remove unused inputs --- .github/workflows/deploy-prod.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/workflows/deploy-prod.yml b/.github/workflows/deploy-prod.yml index 5c2adcc5..6ca292e6 100644 --- a/.github/workflows/deploy-prod.yml +++ b/.github/workflows/deploy-prod.yml @@ -28,8 +28,6 @@ jobs: - uses: ./.github/actions/deploy with: STACK_NAME: its-live-monitoring-prod - AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} - AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} EARTHDATA_USERNAME: ${{ secrets.EARTHDATA_USERNAME }} EARTHDATA_PASSWORD: ${{ secrets.EARTHDATA_PASSWORD }} CF_TEMPLATE_BUCKET: cf-templates-3o5lnspmwmzg-us-west-2 From 97c82718fd0057271b4cd7cce058083b6cf74f91 Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Fri, 22 Nov 2024 15:51:04 -0900 Subject: [PATCH 36/45] fix dynamodb:Query perm --- cloudformation.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cloudformation.yml b/cloudformation.yml index c0967bb2..edb2e8f4 100644 --- a/cloudformation.yml +++ b/cloudformation.yml @@ -149,7 +149,7 @@ Resources: Resource: !GetAtt Queue.Arn - Effect: Allow Action: dynamodb:Query - Resource: !Sub "arn:aws:dynamodb:${AWS::Region}:${AWS::AccountId}:table/${Hyp3JobsTable}" + Resource: !Sub "arn:aws:dynamodb:${AWS::Region}:${AWS::AccountId}:table/${Hyp3JobsTable}*" StatusMessages: Type: AWS::CloudFormation::Stack From 8053973d5378fab62713f7bd8900a62bbbe5bc99 Mon Sep 17 00:00:00 2001 From: Joseph H Kennedy Date: Fri, 22 Nov 2024 22:03:06 -0900 Subject: [PATCH 37/45] fix testing API, permissions, and bucket --- .github/workflows/deploy-test.yml | 2 +- README.md | 27 ++++++++++++++++++++------- cloudformation.yml | 7 +++++++ its_live_monitoring/src/main.py | 8 ++------ 4 files changed, 30 insertions(+), 14 deletions(-) diff --git a/.github/workflows/deploy-test.yml b/.github/workflows/deploy-test.yml index 2b50b0cb..4925a39b 100644 --- a/.github/workflows/deploy-test.yml +++ b/.github/workflows/deploy-test.yml @@ -33,7 +33,7 @@ jobs: CF_TEMPLATE_BUCKET: cf-templates-3o5lnspmwmzg-us-west-2 LANDSAT_TOPIC_ARN: arn:aws:sns:us-west-2:986442313181:its-live-notify-landsat-test SENTINEL2_TOPIC_ARN: arn:aws:sns:eu-west-1:986442313181:its-live-notify-sentinel2-test - HYP3_API: https://hyp3-its-live.asf.alaska.edu + HYP3_API: https://hyp3-its-live-test.asf.alaska.edu HYP3_JOBS_TABLE: hyp3-its-live-test-JobsTable-1FT990PLG5K45 LAMBDA_LOGGING_LEVEL: INFO PUBLISH_BUCKET: its-live-data-test diff --git a/README.md b/README.md index 225e6694..b012c9e4 100644 --- a/README.md +++ b/README.md @@ -66,23 +66,36 @@ aws sns publish \ --message file://${MESSAGE_FILE} ``` -where `TOPIC_ARN` is the ARN of the test topic and `MESSAGE_FILE` is the path to a file containing the contents of the message you want published. Example message contents are provided in these files in the [`tests/integration`](tests/integration) directory: +where `TOPIC_ARN` is the ARN of the test topic and `MESSAGE_FILE` is the path to a file containing the contents of the message you want published. Example message contents are provided in these files in the [`tests/integration`](tests/integration) directory, two of which are described here: * [`landsat-l8-valid.json`](tests/integration/landsat-l8-valid.json) - A message containing a Landsat 9 scene over ice that *should* be processed. * [`landsat-l9-wrong-tier.json`](tests/integration/landsat-l9-wrong-tier.json) - A message containing a Landsat 9 scene *not* over ice that should be *filtered out* and *not* processed. -To submit the Landsat integration test payloads to the default Landsat test SNS topic, run: +To submit **all** the integration test payloads to the default test SNS topics, run: +```shell +make integration +``` + +>[!IMPORTANT] +> The integration tests will submit jobs to `hyp3-its-live-test`, which will publish products to `s3://its-live-test-data`. Notably `s3://its-live-test-data` has a lifecycle rule which will delete all products after 14 days. So to test deduplication of HyP3 and S3, you'll need to: +> 1. disable `hyp3-its-live-test`'s compute environment or start execution worker +> 2. submit the integration tests and see jobs submitted +> 3. submit the integration tests again to see _all_ jobs deduplicate with the hung jobs from the previous step +> 4. re-enable the compute environment or start execution worker and wait for all jobs to finish +> 5. once all jobs are finished, submit the integration tests again to see jobs deduplicate against the products in `s3://its-live-data-test` +> +> That means, fully testing of its-live-monitoring requires _at least_ 3 rounds of integration testing! + + + +To submit _just_ the Landsat integration test payloads to the default Landsat test SNS topic, run: ```shell make landsat-integration ``` -Likewise, to submit the Sentinel-2 integration test payloads to the default Sentinel-2 test SNS topic, run: +Likewise, to submit _just_ the Sentinel-2 integration test payloads to the default Sentinel-2 test SNS topic, run: ```shell make Sentinel2-integration ``` -To submit **all** the integration test payloads to the default test SNS topic, run: -```shell -make integration -``` or, you can submit to an alternative SNS topic like: ```shell LANDSAT_TOPIC_ARN=foobar make landsat-integration diff --git a/cloudformation.yml b/cloudformation.yml index edb2e8f4..0314beaf 100644 --- a/cloudformation.yml +++ b/cloudformation.yml @@ -150,6 +150,13 @@ Resources: - Effect: Allow Action: dynamodb:Query Resource: !Sub "arn:aws:dynamodb:${AWS::Region}:${AWS::AccountId}:table/${Hyp3JobsTable}*" + - Effect: Allow + Action: + - s3:List* + - s3:Get* + Resource: + - !Sub "arn:aws:s3:::${PublishBucket}/*" + - !Sub "arn:aws:s3:::${PublishBucket}" StatusMessages: Type: AWS::CloudFormation::Stack diff --git a/its_live_monitoring/src/main.py b/its_live_monitoring/src/main.py index 7e19fee3..8a3a4916 100644 --- a/its_live_monitoring/src/main.py +++ b/its_live_monitoring/src/main.py @@ -8,7 +8,6 @@ from datetime import datetime, timezone import boto3 -import botocore.config import geopandas as gpd import hyp3_sdk as sdk import numpy as np @@ -40,10 +39,7 @@ log = logging.getLogger('its_live_monitoring') log.setLevel(os.environ.get('LOGGING_LEVEL', 'INFO')) -s3 = boto3.client( - 's3', - config=botocore.config.Config(signature_version=botocore.UNSIGNED), -) +s3 = boto3.client('s3') dynamo = boto3.resource('dynamodb') @@ -87,7 +83,7 @@ def get_key(tile_prefixes: list[str], reference: str, secondary: str) -> str | N for tile_prefix in tile_prefixes: prefix = f'{tile_prefix}/{reference}_X_{secondary}' response = s3.list_objects_v2( - Bucket='its-live-data', + Bucket=os.environ.get('PUBLISH_BUCKET', 'its-live-data'), Prefix=prefix, ) for item in response.get('Contents', []): From 0ab47c55fd90efd3371e4ccad9cf8f2f621014a4 Mon Sep 17 00:00:00 2001 From: Joseph H Kennedy Date: Fri, 22 Nov 2024 22:09:48 -0900 Subject: [PATCH 38/45] fix typo in readme --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index b012c9e4..b088d1d2 100644 --- a/README.md +++ b/README.md @@ -76,7 +76,7 @@ make integration ``` >[!IMPORTANT] -> The integration tests will submit jobs to `hyp3-its-live-test`, which will publish products to `s3://its-live-test-data`. Notably `s3://its-live-test-data` has a lifecycle rule which will delete all products after 14 days. So to test deduplication of HyP3 and S3, you'll need to: +> The integration tests will submit jobs to `hyp3-its-live-test`, which will publish products to `s3://its-live-data-test`. Notably `s3://its-live-data-test` has a lifecycle rule which will delete all products after 14 days. So to test deduplication of HyP3 and S3, you'll need to: > 1. disable `hyp3-its-live-test`'s compute environment or start execution worker > 2. submit the integration tests and see jobs submitted > 3. submit the integration tests again to see _all_ jobs deduplicate with the hung jobs from the previous step From 2b1f6a28b0d9f34c52084eaee3460fcb5d85beda Mon Sep 17 00:00:00 2001 From: Joseph H Kennedy Date: Fri, 22 Nov 2024 22:41:38 -0900 Subject: [PATCH 39/45] Use correct username in when querying dynamodb; only dedup if there are pairs --- its_live_monitoring/src/main.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/its_live_monitoring/src/main.py b/its_live_monitoring/src/main.py index 8a3a4916..72b8f8ed 100644 --- a/its_live_monitoring/src/main.py +++ b/its_live_monitoring/src/main.py @@ -178,13 +178,11 @@ def deduplicate_hyp3_pairs(pairs: gpd.GeoDataFrame) -> gpd.GeoDataFrame: Returns: The pairs GeoDataFrame with any already submitted pairs removed. """ - its_live_user = 'hyp3.its_live' - pending_jobs = query_jobs_by_status_code( - 'PENDING', its_live_user, pairs.iloc[0].reference, pairs.iloc[0].reference_acquisition + 'PENDING', EARTHDATA_USERNAME, pairs.iloc[0].reference, pairs.iloc[0].reference_acquisition ) running_jobs = query_jobs_by_status_code( - 'RUNNING', its_live_user, pairs.iloc[0].reference, pairs.iloc[0].reference_acquisition + 'RUNNING', EARTHDATA_USERNAME, pairs.iloc[0].reference, pairs.iloc[0].reference_acquisition ) jobs = pending_jobs + running_jobs @@ -260,6 +258,7 @@ def process_scene( with pd.option_context('display.max_rows', None, 'display.max_columns', None, 'display.width', None): log.debug(pairs.sort_values(by=['secondary'], ascending=False).loc[:, ['reference', 'secondary']]) + if len(pairs) > 0: pairs = deduplicate_s3_pairs(pairs) log.info(f'Deduplicated already published pairs; {len(pairs)} remaining') From 1c8de568cc62df5adc5e75f4c05316430fd1d73b Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 25 Nov 2024 19:55:50 +0000 Subject: [PATCH 40/45] Bump actions/setup-python from 3 to 5 Bumps [actions/setup-python](https://github.com/actions/setup-python) from 3 to 5. - [Release notes](https://github.com/actions/setup-python/releases) - [Commits](https://github.com/actions/setup-python/compare/v3...v5) --- updated-dependencies: - dependency-name: actions/setup-python dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] --- .github/workflows/deploy-prod.yml | 2 +- .github/workflows/deploy-test.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/deploy-prod.yml b/.github/workflows/deploy-prod.yml index 6ca292e6..c88c016e 100644 --- a/.github/workflows/deploy-prod.yml +++ b/.github/workflows/deploy-prod.yml @@ -21,7 +21,7 @@ jobs: aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} aws-region: us-west-2 - - uses: actions/setup-python@v3 + - uses: actions/setup-python@v5 with: python-version: 3.12 diff --git a/.github/workflows/deploy-test.yml b/.github/workflows/deploy-test.yml index 4925a39b..3bed6548 100644 --- a/.github/workflows/deploy-test.yml +++ b/.github/workflows/deploy-test.yml @@ -21,7 +21,7 @@ jobs: aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} aws-region: us-west-2 - - uses: actions/setup-python@v3 + - uses: actions/setup-python@v5 with: python-version: 3.12 From 807acd7c442358c08a17244ddc6b14831544c050 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 25 Nov 2024 19:55:53 +0000 Subject: [PATCH 41/45] Bump aws-actions/configure-aws-credentials from 1 to 4 Bumps [aws-actions/configure-aws-credentials](https://github.com/aws-actions/configure-aws-credentials) from 1 to 4. - [Release notes](https://github.com/aws-actions/configure-aws-credentials/releases) - [Changelog](https://github.com/aws-actions/configure-aws-credentials/blob/main/CHANGELOG.md) - [Commits](https://github.com/aws-actions/configure-aws-credentials/compare/v1...v4) --- updated-dependencies: - dependency-name: aws-actions/configure-aws-credentials dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] --- .github/workflows/deploy-prod.yml | 2 +- .github/workflows/deploy-test.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/deploy-prod.yml b/.github/workflows/deploy-prod.yml index 6ca292e6..36808c96 100644 --- a/.github/workflows/deploy-prod.yml +++ b/.github/workflows/deploy-prod.yml @@ -15,7 +15,7 @@ jobs: steps: - uses: actions/checkout@v4 - - uses: aws-actions/configure-aws-credentials@v1 + - uses: aws-actions/configure-aws-credentials@v4 with: aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} diff --git a/.github/workflows/deploy-test.yml b/.github/workflows/deploy-test.yml index 4925a39b..47cd6828 100644 --- a/.github/workflows/deploy-test.yml +++ b/.github/workflows/deploy-test.yml @@ -15,7 +15,7 @@ jobs: steps: - uses: actions/checkout@v4 - - uses: aws-actions/configure-aws-credentials@v1 + - uses: aws-actions/configure-aws-credentials@v4 with: aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} From 2e8e74d59bef81a4956c89fa4ec93794c155f43e Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 25 Nov 2024 19:56:00 +0000 Subject: [PATCH 42/45] Bump cfn-lint from 1.19.0 to 1.20.0 Bumps [cfn-lint](https://github.com/aws-cloudformation/cfn-lint) from 1.19.0 to 1.20.0. - [Release notes](https://github.com/aws-cloudformation/cfn-lint/releases) - [Changelog](https://github.com/aws-cloudformation/cfn-lint/blob/main/CHANGELOG.md) - [Commits](https://github.com/aws-cloudformation/cfn-lint/compare/v1.19.0...v1.20.0) --- updated-dependencies: - dependency-name: cfn-lint dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- requirements-all.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements-all.txt b/requirements-all.txt index 762c0021..2c0e3e6a 100644 --- a/requirements-all.txt +++ b/requirements-all.txt @@ -1,6 +1,6 @@ -r requirements-its_live_monitoring.txt -r requirements-status-messages.txt -cfn-lint==1.19.0 +cfn-lint==1.20.0 ruff==0.7.4 pytest==8.3.3 responses==0.25.3 From 0d8bf09a1238bd7301da454d69c87fbf0805b5ec Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 25 Nov 2024 20:07:14 +0000 Subject: [PATCH 43/45] Bump ruff from 0.7.4 to 0.8.0 Bumps [ruff](https://github.com/astral-sh/ruff) from 0.7.4 to 0.8.0. - [Release notes](https://github.com/astral-sh/ruff/releases) - [Changelog](https://github.com/astral-sh/ruff/blob/main/CHANGELOG.md) - [Commits](https://github.com/astral-sh/ruff/compare/0.7.4...0.8.0) --- updated-dependencies: - dependency-name: ruff dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- requirements-all.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements-all.txt b/requirements-all.txt index 2c0e3e6a..d6bc1481 100644 --- a/requirements-all.txt +++ b/requirements-all.txt @@ -1,7 +1,7 @@ -r requirements-its_live_monitoring.txt -r requirements-status-messages.txt cfn-lint==1.20.0 -ruff==0.7.4 +ruff==0.8.0 pytest==8.3.3 responses==0.25.3 moto==5.0.21 From b61d958ae3cf47f712b7c3229d71e7533b7c7bd0 Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Mon, 25 Nov 2024 13:51:15 -0900 Subject: [PATCH 44/45] indent yaml list items --- cloudformation.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cloudformation.yml b/cloudformation.yml index 0314beaf..6cb86920 100644 --- a/cloudformation.yml +++ b/cloudformation.yml @@ -155,8 +155,8 @@ Resources: - s3:List* - s3:Get* Resource: - - !Sub "arn:aws:s3:::${PublishBucket}/*" - - !Sub "arn:aws:s3:::${PublishBucket}" + - !Sub "arn:aws:s3:::${PublishBucket}/*" + - !Sub "arn:aws:s3:::${PublishBucket}" StatusMessages: Type: AWS::CloudFormation::Stack From 330f7aa0a27e760554f54b1542c9526239c31e2e Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Mon, 25 Nov 2024 13:59:00 -0900 Subject: [PATCH 45/45] start execution worker -> manager --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index b088d1d2..173770af 100644 --- a/README.md +++ b/README.md @@ -77,10 +77,10 @@ make integration >[!IMPORTANT] > The integration tests will submit jobs to `hyp3-its-live-test`, which will publish products to `s3://its-live-data-test`. Notably `s3://its-live-data-test` has a lifecycle rule which will delete all products after 14 days. So to test deduplication of HyP3 and S3, you'll need to: -> 1. disable `hyp3-its-live-test`'s compute environment or start execution worker +> 1. disable `hyp3-its-live-test`'s compute environment or start execution manager > 2. submit the integration tests and see jobs submitted > 3. submit the integration tests again to see _all_ jobs deduplicate with the hung jobs from the previous step -> 4. re-enable the compute environment or start execution worker and wait for all jobs to finish +> 4. re-enable the compute environment or start execution manager and wait for all jobs to finish > 5. once all jobs are finished, submit the integration tests again to see jobs deduplicate against the products in `s3://its-live-data-test` > > That means, fully testing of its-live-monitoring requires _at least_ 3 rounds of integration testing!