diff --git a/aws_lambda_powertools/utilities/data_masking/base.py b/aws_lambda_powertools/utilities/data_masking/base.py index cb28b8892e8..4dea3a2fdea 100644 --- a/aws_lambda_powertools/utilities/data_masking/base.py +++ b/aws_lambda_powertools/utilities/data_masking/base.py @@ -38,9 +38,9 @@ def lambda_handler(event, context): "sensitive": "password" } - masked = masker.erase(data,fields=["sensitive"]) + erased = masker.erase(data,fields=["sensitive"]) - return masked + return erased ``` """ @@ -180,7 +180,7 @@ def _apply_action_to_fields( ) -> Union[dict, str]: """ This method takes the input data, which can be either a dictionary or a JSON string, - and applies a mask, an encryption, or a decryption to the specified fields. + and erases, encrypts, or decrypts the specified fields. Parameters ---------- @@ -222,7 +222,6 @@ def _apply_action_to_fields( new_dict = {'a': {'b': {'c': 'transformed_value'}}, 'x': {'y': 'transformed_value'}} ``` """ - logger.debug('IN APPLY ACTION TO FIELDS!!') data_parsed: dict = self._normalize_data_to_parse(fields, data) diff --git a/aws_lambda_powertools/utilities/data_masking/provider/base.py b/aws_lambda_powertools/utilities/data_masking/provider/base.py index 1f894c9c169..9f5a3974548 100644 --- a/aws_lambda_powertools/utilities/data_masking/provider/base.py +++ b/aws_lambda_powertools/utilities/data_masking/provider/base.py @@ -68,7 +68,7 @@ def erase(self, data, **kwargs) -> Iterable[str]: This method irreversibly erases data. If the data to be erased is of type `str`, `dict`, or `bytes`, - this method will return a masked string, i.e. "*****". + this method will return an erased string, i.e. "*****". If the data to be erased is of an iterable type like `list`, `tuple`, or `set`, this method will return a new object of the same type as the diff --git a/docs/utilities/data_masking.md b/docs/utilities/data_masking.md index 31575098b1d..354b73f2549 100644 --- a/docs/utilities/data_masking.md +++ b/docs/utilities/data_masking.md @@ -5,7 +5,7 @@ description: Utility -The data masking utility can encrypt, decrypt, or irreversibly mask sensitive information to protect data confidentiality. +The data masking utility can encrypt, decrypt, or irreversibly erase sensitive information to protect data confidentiality. ```mermaid stateDiagram-v2 @@ -14,18 +14,18 @@ stateDiagram-v2 DataMasking: DataMasking Operation: Possible operations Input: Sensitive value - Mask: Mask + Erase: Erase Encrypt: Encrypt Decrypt: Decrypt Provider: AWS Encryption SDK provider - Result: Data transformed (masked, encrypted, or decrypted) + Result: Data transformed (erased, encrypted, or decrypted) LambdaFn --> DataMasking DataMasking --> Operation state Operation { [*] --> Input - Input --> Mask: Irreversible + Input --> Erase: Irreversible Input --> Encrypt Input --> Decrypt Encrypt --> Provider @@ -37,13 +37,13 @@ stateDiagram-v2 ## Key features -* Encrypt, decrypt, or irreversibly mask data with ease +* Encrypt, decrypt, or irreversibly erase data with ease * Remove sensitive information in one or more fields within nested data * Seamless integration with [AWS Encryption SDK](https://docs.aws.amazon.com/encryption-sdk/latest/developer-guide/introduction.html){target="_blank"} for industry and AWS security best practices ## Terminology -**Masking** replaces sensitive information **irreversibly** with a non-sensitive placeholder _(`*****`)_. It replaces data in-memory, hence why being irreversible. +**Erasing** replaces sensitive information **irreversibly** with a non-sensitive placeholder _(`*****`)_. It replaces data in-memory, hence why being irreversible. **Encrypting** transforms plaintext into ciphertext using an encryption algorithm and a cryptographic key. It allows you to encrypt any sensitive data, so only allowed personnel to decrypt it. @@ -97,13 +97,13 @@ Before you start, you will need a KMS symmetric key to encrypt and decrypt your 1. [Key policy examples using IAM Roles](https://docs.aws.amazon.com/kms/latest/developerguide/key-policy-default.html#key-policy-default-allow-administrators){target="_blank"} 2. [SAM generated CloudFormation Resources](https://docs.aws.amazon.com/serverless-application-model/latest/developerguide/sam-specification-generated-resources-function.html#sam-specification-generated-resources-function-not-role){target="_blank"} -### Masking data +### Erasing data -Masking will erase the original data and replace with `*****`. This means you cannot recover masked data, and its type will change to `str`. +Erasing will erase the original data and replace with `*****`. This means you cannot recover erased data, and its type will change to `str`. -=== "getting_started_mask_data.py" +=== "getting_started_erase_data.py" ```python hl_lines="4 8 17" - --8<-- "examples/data_masking/src/getting_started_mask_data.py" + --8<-- "examples/data_masking/src/getting_started_erase_data.py" ``` 1. See [working with nested data](#working-with-nested-data) to learn more about the `fields` parameter.

If we omit `fields` parameter, the entire dictionary will be erased with `*****`. @@ -113,9 +113,9 @@ Masking will erase the original data and replace with `*****`. This means you ca --8<-- "examples/data_masking/src/generic_data_input.json" ``` -=== "getting_started_mask_data_output.json" +=== "getting_started_erase_data_output.json" ```json hl_lines="5 7 12" - --8<-- "examples/data_masking/src/getting_started_mask_data_output.json" + --8<-- "examples/data_masking/src/getting_started_erase_data_output.json" ``` ### Encrypting data @@ -208,14 +208,14 @@ For a stronger security posture, you can add metadata to each encryption operati !!! note "We support `JSON` data types only - see [data serialization for more details](#data-serialization-and-preservation)." -You can use the `fields` parameter with dot notation `.` to choose one or more parts of your data to `mask`, `encrypt`, or `decrypt`. This is useful when you want to keep data structure intact except the confidential fields. +You can use the `fields` parameter with dot notation `.` to choose one or more parts of your data to `erase`, `encrypt`, or `decrypt`. This is useful when you want to keep data structure intact except the confidential fields. -When `fields` is present, `mask` and `encrypt` behave differently: +When `fields` is present, `erase` and `encrypt` behave differently: | Operation | Behavior | Example | Obfuscated | | --------- | ----------------------------------------------------------- | ----------------------- | ------------------------------- | | `encrypt` | Obfuscate entire data and replacing with ciphertext string. | `{"cards": ["a", "b"]}` | `{"cards": "ciphertext"}` | -| `mask` | Replace data while keeping collections type intact. | `{"cards": ["a", "b"]}` | `{"cards": ["*****", "*****"]}` | +| `erase` | Replace data while keeping collections type intact. | `{"cards": ["a", "b"]}` | `{"cards": ["*****", "*****"]}` | Here are common scenarios to best visualize how to use `fields`. @@ -439,9 +439,9 @@ The AWS Encryption SDK defaults to using the `AES_256_GCM_HKDF_SHA512_COMMIT_KEY The following sequence diagrams explain how `DataMasking` behaves under different scenarios. -#### Mask operation +#### Erase operation -Masking operations occur in-memory and we cannot recover the original value. +Erasing operations occur in-memory and we cannot recover the original value.
```mermaid @@ -572,9 +572,9 @@ sequenceDiagram ## Testing your code -### Testing mask operation +### Testing erase operation -Testing your code with a simple mask operation +Testing your code with a simple erase operation === "test_lambda_mask.py" ```python hl_lines="22" diff --git a/examples/data_masking/src/getting_started_mask_data.py b/examples/data_masking/src/getting_started_mask_data.py index 3a676f8ee0a..a3e9fc7217e 100644 --- a/examples/data_masking/src/getting_started_mask_data.py +++ b/examples/data_masking/src/getting_started_mask_data.py @@ -12,8 +12,8 @@ def lambda_handler(event: dict, context: LambdaContext) -> dict: data: dict = event.get("body", {}) - logger.info("Masking fields email, address.street, and company_address") + logger.info("Erasing fields email, address.street, and company_address") - masked = data_masker.erase(data, fields=["email", "address.street", "company_address"]) # (1)! + erased = data_masker.erase(data, fields=["email", "address.street", "company_address"]) # (1)! - return masked + return erased diff --git a/examples/data_masking/tests/lambda_mask.py b/examples/data_masking/tests/lambda_mask.py index 339a771beab..6b2f461e663 100644 --- a/examples/data_masking/tests/lambda_mask.py +++ b/examples/data_masking/tests/lambda_mask.py @@ -9,6 +9,6 @@ def lambda_handler(event: dict, context: LambdaContext) -> dict: data = event - masked = data_masker.erase(data, fields=["testkey"]) + erased = data_masker.erase(data, fields=["testkey"]) - return masked + return erased diff --git a/tests/functional/data_masking/test_aws_encryption_sdk.py b/tests/functional/data_masking/test_aws_encryption_sdk.py index f1386465591..c1dfd22c6b9 100644 --- a/tests/functional/data_masking/test_aws_encryption_sdk.py +++ b/tests/functional/data_masking/test_aws_encryption_sdk.py @@ -46,67 +46,67 @@ def data_masker(monkeypatch) -> DataMasking: return DataMasking(provider=provider) -def test_mask_int(data_masker): +def test_erase_int(data_masker): # GIVEN an int data type - # WHEN mask is called with no fields argument - masked_string = data_masker.erase(42) + # WHEN erase is called with no fields argument + erased_string = data_masker.erase(42) # THEN the result is the data masked - assert masked_string == DATA_MASKING_STRING + assert erased_string == DATA_MASKING_STRING -def test_mask_float(data_masker): +def test_erase_float(data_masker): # GIVEN a float data type - # WHEN mask is called with no fields argument - masked_string = data_masker.erase(4.2) + # WHEN erase is called with no fields argument + erased_string = data_masker.erase(4.2) # THEN the result is the data masked - assert masked_string == DATA_MASKING_STRING + assert erased_string == DATA_MASKING_STRING -def test_mask_bool(data_masker): +def test_erase_bool(data_masker): # GIVEN a bool data type - # WHEN mask is called with no fields argument - masked_string = data_masker.erase(True) + # WHEN erase is called with no fields argument + erased_string = data_masker.erase(True) # THEN the result is the data masked - assert masked_string == DATA_MASKING_STRING + assert erased_string == DATA_MASKING_STRING -def test_mask_none(data_masker): +def test_erase_none(data_masker): # GIVEN a None data type - # WHEN mask is called with no fields argument - masked_string = data_masker.erase(None) + # WHEN erase is called with no fields argument + erased_string = data_masker.erase(None) # THEN the result is the data masked - assert masked_string == DATA_MASKING_STRING + assert erased_string == DATA_MASKING_STRING -def test_mask_str(data_masker): +def test_erase_str(data_masker): # GIVEN a str data type - # WHEN mask is called with no fields argument - masked_string = data_masker.erase("this is a string") + # WHEN erase is called with no fields argument + erased_string = data_masker.erase("this is a string") # THEN the result is the data masked - assert masked_string == DATA_MASKING_STRING + assert erased_string == DATA_MASKING_STRING -def test_mask_list(data_masker): +def test_erase_list(data_masker): # GIVEN a list data type - # WHEN mask is called with no fields argument - masked_string = data_masker.erase([1, 2, "string", 3]) + # WHEN erase is called with no fields argument + erased_string = data_masker.erase([1, 2, "string", 3]) # THEN the result is the data masked, while maintaining type list - assert masked_string == [DATA_MASKING_STRING, DATA_MASKING_STRING, DATA_MASKING_STRING, DATA_MASKING_STRING] + assert erased_string == [DATA_MASKING_STRING, DATA_MASKING_STRING, DATA_MASKING_STRING, DATA_MASKING_STRING] -def test_mask_dict(data_masker): +def test_erase_dict(data_masker): # GIVEN a dict data type data = { "a": { @@ -115,14 +115,14 @@ def test_mask_dict(data_masker): }, } - # WHEN mask is called with no fields argument - masked_string = data_masker.erase(data) + # WHEN erase is called with no fields argument + erased_string = data_masker.erase(data) # THEN the result is the data masked - assert masked_string == DATA_MASKING_STRING + assert erased_string == DATA_MASKING_STRING -def test_mask_dict_with_fields(data_masker): +def test_erase_dict_with_fields(data_masker): # GIVEN a dict data type data = { "a": { @@ -131,11 +131,11 @@ def test_mask_dict_with_fields(data_masker): }, } - # WHEN mask is called with a list of fields specified - masked_string = data_masker.erase(data, fields=["a.'1'.None", "a..'4'"]) + # WHEN erase is called with a list of fields specified + erased_string = data_masker.erase(data, fields=["a.'1'.None", "a..'4'"]) # THEN the result is only the specified fields are masked - assert masked_string == { + assert erased_string == { "a": { "1": {"None": DATA_MASKING_STRING, "four": "world"}, "b": {"3": {"4": DATA_MASKING_STRING, "e": "world"}}, @@ -143,7 +143,7 @@ def test_mask_dict_with_fields(data_masker): } -def test_mask_json_dict_with_fields(data_masker): +def test_erase_json_dict_with_fields(data_masker): # GIVEN the data type is a json representation of a dictionary data = json.dumps( { @@ -154,7 +154,7 @@ def test_mask_json_dict_with_fields(data_masker): }, ) - # WHEN mask is called with a list of fields specified + # WHEN erase is called with a list of fields specified masked_json_string = data_masker.erase(data, fields=["a.'1'.None", "a..'4'"]) # THEN the result is only the specified fields are masked diff --git a/tests/performance/data_masking/test_perf_data_masking.py b/tests/performance/data_masking/test_perf_data_masking.py index 65f1d892646..668da32a6e9 100644 --- a/tests/performance/data_masking/test_perf_data_masking.py +++ b/tests/performance/data_masking/test_perf_data_masking.py @@ -55,7 +55,7 @@ def test_data_masking_init(benchmark): pytest.fail(f"High level imports should be below {DATA_MASKING_INIT_SLA}s: {stat}") -def mask_json_blob(): +def erase_json_blob(): data_masker = DataMasking() data_masker.erase(json_blob, json_blob_fields) @@ -63,7 +63,7 @@ def mask_json_blob(): @pytest.mark.perf @pytest.mark.benchmark(group="core", disable_gc=True, warmup=False) def test_data_masking_encrypt_with_json_blob(benchmark): - benchmark.pedantic(mask_json_blob) + benchmark.pedantic(erase_json_blob) stat = benchmark.stats.stats.max if stat > DATA_MASKING_NESTED_ENCRYPT_SLA: pytest.fail(f"High level imports should be below {DATA_MASKING_NESTED_ENCRYPT_SLA}s: {stat}") diff --git a/tests/unit/data_masking/test_unit_data_masking.py b/tests/unit/data_masking/test_unit_data_masking.py index 350c9f01eb0..4fbbc188ceb 100644 --- a/tests/unit/data_masking/test_unit_data_masking.py +++ b/tests/unit/data_masking/test_unit_data_masking.py @@ -15,67 +15,67 @@ def data_masker() -> DataMasking: return DataMasking() -def test_mask_int(data_masker): +def test_erase_int(data_masker): # GIVEN an int data type - # WHEN mask is called with no fields argument - masked_string = data_masker.erase(42) + # WHEN erase is called with no fields argument + erased_string = data_masker.erase(42) # THEN the result is the data masked - assert masked_string == DATA_MASKING_STRING + assert erased_string == DATA_MASKING_STRING -def test_mask_float(data_masker): +def test_erase_float(data_masker): # GIVEN a float data type - # WHEN mask is called with no fields argument - masked_string = data_masker.erase(4.2) + # WHEN erase is called with no fields argument + erased_string = data_masker.erase(4.2) # THEN the result is the data masked - assert masked_string == DATA_MASKING_STRING + assert erased_string == DATA_MASKING_STRING -def test_mask_bool(data_masker): +def test_erase_bool(data_masker): # GIVEN a bool data type - # WHEN mask is called with no fields argument - masked_string = data_masker.erase(True) + # WHEN erase is called with no fields argument + erased_string = data_masker.erase(True) # THEN the result is the data masked - assert masked_string == DATA_MASKING_STRING + assert erased_string == DATA_MASKING_STRING -def test_mask_none(data_masker): +def test_erase_none(data_masker): # GIVEN a None data type - # WHEN mask is called with no fields argument - masked_string = data_masker.erase(None) + # WHEN erase is called with no fields argument + erased_string = data_masker.erase(None) # THEN the result is the data masked - assert masked_string == DATA_MASKING_STRING + assert erased_string == DATA_MASKING_STRING -def test_mask_str(data_masker): +def test_erase_str(data_masker): # GIVEN a str data type - # WHEN mask is called with no fields argument - masked_string = data_masker.erase("this is a string") + # WHEN erase is called with no fields argument + erased_string = data_masker.erase("this is a string") # THEN the result is the data masked - assert masked_string == DATA_MASKING_STRING + assert erased_string == DATA_MASKING_STRING -def test_mask_list(data_masker): +def test_erase_list(data_masker): # GIVEN a list data type - # WHEN mask is called with no fields argument - masked_string = data_masker.erase([1, 2, "string", 3]) + # WHEN erase is called with no fields argument + erased_string = data_masker.erase([1, 2, "string", 3]) # THEN the result is the data masked, while maintaining type list - assert masked_string == [DATA_MASKING_STRING, DATA_MASKING_STRING, DATA_MASKING_STRING, DATA_MASKING_STRING] + assert erased_string == [DATA_MASKING_STRING, DATA_MASKING_STRING, DATA_MASKING_STRING, DATA_MASKING_STRING] -def test_mask_dict(data_masker): +def test_erase_dict(data_masker): # GIVEN a dict data type data = { "a": { @@ -84,14 +84,14 @@ def test_mask_dict(data_masker): }, } - # WHEN mask is called with no fields argument - masked_string = data_masker.erase(data) + # WHEN erase is called with no fields argument + erased_string = data_masker.erase(data) # THEN the result is the data masked - assert masked_string == DATA_MASKING_STRING + assert erased_string == DATA_MASKING_STRING -def test_mask_dict_with_fields(data_masker): +def test_erase_dict_with_fields(data_masker): # GIVEN a dict data type data = { "a": { @@ -100,11 +100,11 @@ def test_mask_dict_with_fields(data_masker): }, } - # WHEN mask is called with a list of fields specified - masked_string = data_masker.erase(data, fields=["a.'1'.None", "a..'4'"]) + # WHEN erase is called with a list of fields specified + erased_string = data_masker.erase(data, fields=["a.'1'.None", "a..'4'"]) - # THEN the result is only the specified fields are masked - assert masked_string == { + # THEN the result is only the specified fields are erased + assert erased_string == { "a": { "1": {"None": DATA_MASKING_STRING, "four": "world"}, "b": {"3": {"4": DATA_MASKING_STRING, "e": "world"}}, @@ -112,7 +112,7 @@ def test_mask_dict_with_fields(data_masker): } -def test_mask_json_dict_with_fields(data_masker): +def test_erase_json_dict_with_fields(data_masker): # GIVEN the data type is a json representation of a dictionary data = json.dumps( { @@ -123,10 +123,10 @@ def test_mask_json_dict_with_fields(data_masker): }, ) - # WHEN mask is called with a list of fields specified + # WHEN erase is called with a list of fields specified masked_json_string = data_masker.erase(data, fields=["a.'1'.None", "a..'4'"]) - # THEN the result is only the specified fields are masked + # THEN the result is only the specified fields are erased assert masked_json_string == { "a": { "1": {"None": DATA_MASKING_STRING, "four": "world"}, @@ -199,9 +199,9 @@ def test_parsing_nonexistent_fields_warning_on_missing_field(): }, } - # WHEN mask is called with a non-existing field + # WHEN erase is called with a non-existing field with pytest.warns(UserWarning, match="Field or expression*"): masked_json_string = data_masker.erase(data, fields=["non-existing"]) - # THEN the "masked" payload is the same of the original + # THEN the "erased" payload is the same of the original assert masked_json_string == data