diff --git a/aws_lambda_powertools/utilities/data_masking/base.py b/aws_lambda_powertools/utilities/data_masking/base.py
index cb28b8892e8..4dea3a2fdea 100644
--- a/aws_lambda_powertools/utilities/data_masking/base.py
+++ b/aws_lambda_powertools/utilities/data_masking/base.py
@@ -38,9 +38,9 @@ def lambda_handler(event, context):
"sensitive": "password"
}
- masked = masker.erase(data,fields=["sensitive"])
+ erased = masker.erase(data,fields=["sensitive"])
- return masked
+ return erased
```
"""
@@ -180,7 +180,7 @@ def _apply_action_to_fields(
) -> Union[dict, str]:
"""
This method takes the input data, which can be either a dictionary or a JSON string,
- and applies a mask, an encryption, or a decryption to the specified fields.
+ and erases, encrypts, or decrypts the specified fields.
Parameters
----------
@@ -222,7 +222,6 @@ def _apply_action_to_fields(
new_dict = {'a': {'b': {'c': 'transformed_value'}}, 'x': {'y': 'transformed_value'}}
```
"""
- logger.debug('IN APPLY ACTION TO FIELDS!!')
data_parsed: dict = self._normalize_data_to_parse(fields, data)
diff --git a/aws_lambda_powertools/utilities/data_masking/provider/base.py b/aws_lambda_powertools/utilities/data_masking/provider/base.py
index 1f894c9c169..9f5a3974548 100644
--- a/aws_lambda_powertools/utilities/data_masking/provider/base.py
+++ b/aws_lambda_powertools/utilities/data_masking/provider/base.py
@@ -68,7 +68,7 @@ def erase(self, data, **kwargs) -> Iterable[str]:
This method irreversibly erases data.
If the data to be erased is of type `str`, `dict`, or `bytes`,
- this method will return a masked string, i.e. "*****".
+ this method will return an erased string, i.e. "*****".
If the data to be erased is of an iterable type like `list`, `tuple`,
or `set`, this method will return a new object of the same type as the
diff --git a/docs/utilities/data_masking.md b/docs/utilities/data_masking.md
index 31575098b1d..354b73f2549 100644
--- a/docs/utilities/data_masking.md
+++ b/docs/utilities/data_masking.md
@@ -5,7 +5,7 @@ description: Utility
-The data masking utility can encrypt, decrypt, or irreversibly mask sensitive information to protect data confidentiality.
+The data masking utility can encrypt, decrypt, or irreversibly erase sensitive information to protect data confidentiality.
```mermaid
stateDiagram-v2
@@ -14,18 +14,18 @@ stateDiagram-v2
DataMasking: DataMasking
Operation: Possible operations
Input: Sensitive value
- Mask: Mask
+ Erase: Erase
Encrypt: Encrypt
Decrypt: Decrypt
Provider: AWS Encryption SDK provider
- Result: Data transformed (masked, encrypted, or decrypted)
+ Result: Data transformed (erased, encrypted, or decrypted)
LambdaFn --> DataMasking
DataMasking --> Operation
state Operation {
[*] --> Input
- Input --> Mask: Irreversible
+ Input --> Erase: Irreversible
Input --> Encrypt
Input --> Decrypt
Encrypt --> Provider
@@ -37,13 +37,13 @@ stateDiagram-v2
## Key features
-* Encrypt, decrypt, or irreversibly mask data with ease
+* Encrypt, decrypt, or irreversibly erase data with ease
* Remove sensitive information in one or more fields within nested data
* Seamless integration with [AWS Encryption SDK](https://docs.aws.amazon.com/encryption-sdk/latest/developer-guide/introduction.html){target="_blank"} for industry and AWS security best practices
## Terminology
-**Masking** replaces sensitive information **irreversibly** with a non-sensitive placeholder _(`*****`)_. It replaces data in-memory, hence why being irreversible.
+**Erasing** replaces sensitive information **irreversibly** with a non-sensitive placeholder _(`*****`)_. It replaces data in-memory, hence why being irreversible.
**Encrypting** transforms plaintext into ciphertext using an encryption algorithm and a cryptographic key. It allows you to encrypt any sensitive data, so only allowed personnel to decrypt it.
@@ -97,13 +97,13 @@ Before you start, you will need a KMS symmetric key to encrypt and decrypt your
1. [Key policy examples using IAM Roles](https://docs.aws.amazon.com/kms/latest/developerguide/key-policy-default.html#key-policy-default-allow-administrators){target="_blank"}
2. [SAM generated CloudFormation Resources](https://docs.aws.amazon.com/serverless-application-model/latest/developerguide/sam-specification-generated-resources-function.html#sam-specification-generated-resources-function-not-role){target="_blank"}
-### Masking data
+### Erasing data
-Masking will erase the original data and replace with `*****`. This means you cannot recover masked data, and its type will change to `str`.
+Erasing will erase the original data and replace with `*****`. This means you cannot recover erased data, and its type will change to `str`.
-=== "getting_started_mask_data.py"
+=== "getting_started_erase_data.py"
```python hl_lines="4 8 17"
- --8<-- "examples/data_masking/src/getting_started_mask_data.py"
+ --8<-- "examples/data_masking/src/getting_started_erase_data.py"
```
1. See [working with nested data](#working-with-nested-data) to learn more about the `fields` parameter. If we omit `fields` parameter, the entire dictionary will be erased with `*****`.
@@ -113,9 +113,9 @@ Masking will erase the original data and replace with `*****`. This means you ca
--8<-- "examples/data_masking/src/generic_data_input.json"
```
-=== "getting_started_mask_data_output.json"
+=== "getting_started_erase_data_output.json"
```json hl_lines="5 7 12"
- --8<-- "examples/data_masking/src/getting_started_mask_data_output.json"
+ --8<-- "examples/data_masking/src/getting_started_erase_data_output.json"
```
### Encrypting data
@@ -208,14 +208,14 @@ For a stronger security posture, you can add metadata to each encryption operati
!!! note "We support `JSON` data types only - see [data serialization for more details](#data-serialization-and-preservation)."
-You can use the `fields` parameter with dot notation `.` to choose one or more parts of your data to `mask`, `encrypt`, or `decrypt`. This is useful when you want to keep data structure intact except the confidential fields.
+You can use the `fields` parameter with dot notation `.` to choose one or more parts of your data to `erase`, `encrypt`, or `decrypt`. This is useful when you want to keep data structure intact except the confidential fields.
-When `fields` is present, `mask` and `encrypt` behave differently:
+When `fields` is present, `erase` and `encrypt` behave differently:
| Operation | Behavior | Example | Obfuscated |
| --------- | ----------------------------------------------------------- | ----------------------- | ------------------------------- |
| `encrypt` | Obfuscate entire data and replacing with ciphertext string. | `{"cards": ["a", "b"]}` | `{"cards": "ciphertext"}` |
-| `mask` | Replace data while keeping collections type intact. | `{"cards": ["a", "b"]}` | `{"cards": ["*****", "*****"]}` |
+| `erase` | Replace data while keeping collections type intact. | `{"cards": ["a", "b"]}` | `{"cards": ["*****", "*****"]}` |
Here are common scenarios to best visualize how to use `fields`.
@@ -439,9 +439,9 @@ The AWS Encryption SDK defaults to using the `AES_256_GCM_HKDF_SHA512_COMMIT_KEY
The following sequence diagrams explain how `DataMasking` behaves under different scenarios.
-#### Mask operation
+#### Erase operation
-Masking operations occur in-memory and we cannot recover the original value.
+Erasing operations occur in-memory and we cannot recover the original value.
```mermaid
@@ -572,9 +572,9 @@ sequenceDiagram
## Testing your code
-### Testing mask operation
+### Testing erase operation
-Testing your code with a simple mask operation
+Testing your code with a simple erase operation
=== "test_lambda_mask.py"
```python hl_lines="22"
diff --git a/examples/data_masking/src/getting_started_mask_data.py b/examples/data_masking/src/getting_started_mask_data.py
index 3a676f8ee0a..a3e9fc7217e 100644
--- a/examples/data_masking/src/getting_started_mask_data.py
+++ b/examples/data_masking/src/getting_started_mask_data.py
@@ -12,8 +12,8 @@
def lambda_handler(event: dict, context: LambdaContext) -> dict:
data: dict = event.get("body", {})
- logger.info("Masking fields email, address.street, and company_address")
+ logger.info("Erasing fields email, address.street, and company_address")
- masked = data_masker.erase(data, fields=["email", "address.street", "company_address"]) # (1)!
+ erased = data_masker.erase(data, fields=["email", "address.street", "company_address"]) # (1)!
- return masked
+ return erased
diff --git a/examples/data_masking/tests/lambda_mask.py b/examples/data_masking/tests/lambda_mask.py
index 339a771beab..6b2f461e663 100644
--- a/examples/data_masking/tests/lambda_mask.py
+++ b/examples/data_masking/tests/lambda_mask.py
@@ -9,6 +9,6 @@
def lambda_handler(event: dict, context: LambdaContext) -> dict:
data = event
- masked = data_masker.erase(data, fields=["testkey"])
+ erased = data_masker.erase(data, fields=["testkey"])
- return masked
+ return erased
diff --git a/tests/functional/data_masking/test_aws_encryption_sdk.py b/tests/functional/data_masking/test_aws_encryption_sdk.py
index f1386465591..c1dfd22c6b9 100644
--- a/tests/functional/data_masking/test_aws_encryption_sdk.py
+++ b/tests/functional/data_masking/test_aws_encryption_sdk.py
@@ -46,67 +46,67 @@ def data_masker(monkeypatch) -> DataMasking:
return DataMasking(provider=provider)
-def test_mask_int(data_masker):
+def test_erase_int(data_masker):
# GIVEN an int data type
- # WHEN mask is called with no fields argument
- masked_string = data_masker.erase(42)
+ # WHEN erase is called with no fields argument
+ erased_string = data_masker.erase(42)
# THEN the result is the data masked
- assert masked_string == DATA_MASKING_STRING
+ assert erased_string == DATA_MASKING_STRING
-def test_mask_float(data_masker):
+def test_erase_float(data_masker):
# GIVEN a float data type
- # WHEN mask is called with no fields argument
- masked_string = data_masker.erase(4.2)
+ # WHEN erase is called with no fields argument
+ erased_string = data_masker.erase(4.2)
# THEN the result is the data masked
- assert masked_string == DATA_MASKING_STRING
+ assert erased_string == DATA_MASKING_STRING
-def test_mask_bool(data_masker):
+def test_erase_bool(data_masker):
# GIVEN a bool data type
- # WHEN mask is called with no fields argument
- masked_string = data_masker.erase(True)
+ # WHEN erase is called with no fields argument
+ erased_string = data_masker.erase(True)
# THEN the result is the data masked
- assert masked_string == DATA_MASKING_STRING
+ assert erased_string == DATA_MASKING_STRING
-def test_mask_none(data_masker):
+def test_erase_none(data_masker):
# GIVEN a None data type
- # WHEN mask is called with no fields argument
- masked_string = data_masker.erase(None)
+ # WHEN erase is called with no fields argument
+ erased_string = data_masker.erase(None)
# THEN the result is the data masked
- assert masked_string == DATA_MASKING_STRING
+ assert erased_string == DATA_MASKING_STRING
-def test_mask_str(data_masker):
+def test_erase_str(data_masker):
# GIVEN a str data type
- # WHEN mask is called with no fields argument
- masked_string = data_masker.erase("this is a string")
+ # WHEN erase is called with no fields argument
+ erased_string = data_masker.erase("this is a string")
# THEN the result is the data masked
- assert masked_string == DATA_MASKING_STRING
+ assert erased_string == DATA_MASKING_STRING
-def test_mask_list(data_masker):
+def test_erase_list(data_masker):
# GIVEN a list data type
- # WHEN mask is called with no fields argument
- masked_string = data_masker.erase([1, 2, "string", 3])
+ # WHEN erase is called with no fields argument
+ erased_string = data_masker.erase([1, 2, "string", 3])
# THEN the result is the data masked, while maintaining type list
- assert masked_string == [DATA_MASKING_STRING, DATA_MASKING_STRING, DATA_MASKING_STRING, DATA_MASKING_STRING]
+ assert erased_string == [DATA_MASKING_STRING, DATA_MASKING_STRING, DATA_MASKING_STRING, DATA_MASKING_STRING]
-def test_mask_dict(data_masker):
+def test_erase_dict(data_masker):
# GIVEN a dict data type
data = {
"a": {
@@ -115,14 +115,14 @@ def test_mask_dict(data_masker):
},
}
- # WHEN mask is called with no fields argument
- masked_string = data_masker.erase(data)
+ # WHEN erase is called with no fields argument
+ erased_string = data_masker.erase(data)
# THEN the result is the data masked
- assert masked_string == DATA_MASKING_STRING
+ assert erased_string == DATA_MASKING_STRING
-def test_mask_dict_with_fields(data_masker):
+def test_erase_dict_with_fields(data_masker):
# GIVEN a dict data type
data = {
"a": {
@@ -131,11 +131,11 @@ def test_mask_dict_with_fields(data_masker):
},
}
- # WHEN mask is called with a list of fields specified
- masked_string = data_masker.erase(data, fields=["a.'1'.None", "a..'4'"])
+ # WHEN erase is called with a list of fields specified
+ erased_string = data_masker.erase(data, fields=["a.'1'.None", "a..'4'"])
# THEN the result is only the specified fields are masked
- assert masked_string == {
+ assert erased_string == {
"a": {
"1": {"None": DATA_MASKING_STRING, "four": "world"},
"b": {"3": {"4": DATA_MASKING_STRING, "e": "world"}},
@@ -143,7 +143,7 @@ def test_mask_dict_with_fields(data_masker):
}
-def test_mask_json_dict_with_fields(data_masker):
+def test_erase_json_dict_with_fields(data_masker):
# GIVEN the data type is a json representation of a dictionary
data = json.dumps(
{
@@ -154,7 +154,7 @@ def test_mask_json_dict_with_fields(data_masker):
},
)
- # WHEN mask is called with a list of fields specified
+ # WHEN erase is called with a list of fields specified
masked_json_string = data_masker.erase(data, fields=["a.'1'.None", "a..'4'"])
# THEN the result is only the specified fields are masked
diff --git a/tests/performance/data_masking/test_perf_data_masking.py b/tests/performance/data_masking/test_perf_data_masking.py
index 65f1d892646..668da32a6e9 100644
--- a/tests/performance/data_masking/test_perf_data_masking.py
+++ b/tests/performance/data_masking/test_perf_data_masking.py
@@ -55,7 +55,7 @@ def test_data_masking_init(benchmark):
pytest.fail(f"High level imports should be below {DATA_MASKING_INIT_SLA}s: {stat}")
-def mask_json_blob():
+def erase_json_blob():
data_masker = DataMasking()
data_masker.erase(json_blob, json_blob_fields)
@@ -63,7 +63,7 @@ def mask_json_blob():
@pytest.mark.perf
@pytest.mark.benchmark(group="core", disable_gc=True, warmup=False)
def test_data_masking_encrypt_with_json_blob(benchmark):
- benchmark.pedantic(mask_json_blob)
+ benchmark.pedantic(erase_json_blob)
stat = benchmark.stats.stats.max
if stat > DATA_MASKING_NESTED_ENCRYPT_SLA:
pytest.fail(f"High level imports should be below {DATA_MASKING_NESTED_ENCRYPT_SLA}s: {stat}")
diff --git a/tests/unit/data_masking/test_unit_data_masking.py b/tests/unit/data_masking/test_unit_data_masking.py
index 350c9f01eb0..4fbbc188ceb 100644
--- a/tests/unit/data_masking/test_unit_data_masking.py
+++ b/tests/unit/data_masking/test_unit_data_masking.py
@@ -15,67 +15,67 @@ def data_masker() -> DataMasking:
return DataMasking()
-def test_mask_int(data_masker):
+def test_erase_int(data_masker):
# GIVEN an int data type
- # WHEN mask is called with no fields argument
- masked_string = data_masker.erase(42)
+ # WHEN erase is called with no fields argument
+ erased_string = data_masker.erase(42)
# THEN the result is the data masked
- assert masked_string == DATA_MASKING_STRING
+ assert erased_string == DATA_MASKING_STRING
-def test_mask_float(data_masker):
+def test_erase_float(data_masker):
# GIVEN a float data type
- # WHEN mask is called with no fields argument
- masked_string = data_masker.erase(4.2)
+ # WHEN erase is called with no fields argument
+ erased_string = data_masker.erase(4.2)
# THEN the result is the data masked
- assert masked_string == DATA_MASKING_STRING
+ assert erased_string == DATA_MASKING_STRING
-def test_mask_bool(data_masker):
+def test_erase_bool(data_masker):
# GIVEN a bool data type
- # WHEN mask is called with no fields argument
- masked_string = data_masker.erase(True)
+ # WHEN erase is called with no fields argument
+ erased_string = data_masker.erase(True)
# THEN the result is the data masked
- assert masked_string == DATA_MASKING_STRING
+ assert erased_string == DATA_MASKING_STRING
-def test_mask_none(data_masker):
+def test_erase_none(data_masker):
# GIVEN a None data type
- # WHEN mask is called with no fields argument
- masked_string = data_masker.erase(None)
+ # WHEN erase is called with no fields argument
+ erased_string = data_masker.erase(None)
# THEN the result is the data masked
- assert masked_string == DATA_MASKING_STRING
+ assert erased_string == DATA_MASKING_STRING
-def test_mask_str(data_masker):
+def test_erase_str(data_masker):
# GIVEN a str data type
- # WHEN mask is called with no fields argument
- masked_string = data_masker.erase("this is a string")
+ # WHEN erase is called with no fields argument
+ erased_string = data_masker.erase("this is a string")
# THEN the result is the data masked
- assert masked_string == DATA_MASKING_STRING
+ assert erased_string == DATA_MASKING_STRING
-def test_mask_list(data_masker):
+def test_erase_list(data_masker):
# GIVEN a list data type
- # WHEN mask is called with no fields argument
- masked_string = data_masker.erase([1, 2, "string", 3])
+ # WHEN erase is called with no fields argument
+ erased_string = data_masker.erase([1, 2, "string", 3])
# THEN the result is the data masked, while maintaining type list
- assert masked_string == [DATA_MASKING_STRING, DATA_MASKING_STRING, DATA_MASKING_STRING, DATA_MASKING_STRING]
+ assert erased_string == [DATA_MASKING_STRING, DATA_MASKING_STRING, DATA_MASKING_STRING, DATA_MASKING_STRING]
-def test_mask_dict(data_masker):
+def test_erase_dict(data_masker):
# GIVEN a dict data type
data = {
"a": {
@@ -84,14 +84,14 @@ def test_mask_dict(data_masker):
},
}
- # WHEN mask is called with no fields argument
- masked_string = data_masker.erase(data)
+ # WHEN erase is called with no fields argument
+ erased_string = data_masker.erase(data)
# THEN the result is the data masked
- assert masked_string == DATA_MASKING_STRING
+ assert erased_string == DATA_MASKING_STRING
-def test_mask_dict_with_fields(data_masker):
+def test_erase_dict_with_fields(data_masker):
# GIVEN a dict data type
data = {
"a": {
@@ -100,11 +100,11 @@ def test_mask_dict_with_fields(data_masker):
},
}
- # WHEN mask is called with a list of fields specified
- masked_string = data_masker.erase(data, fields=["a.'1'.None", "a..'4'"])
+ # WHEN erase is called with a list of fields specified
+ erased_string = data_masker.erase(data, fields=["a.'1'.None", "a..'4'"])
- # THEN the result is only the specified fields are masked
- assert masked_string == {
+ # THEN the result is only the specified fields are erased
+ assert erased_string == {
"a": {
"1": {"None": DATA_MASKING_STRING, "four": "world"},
"b": {"3": {"4": DATA_MASKING_STRING, "e": "world"}},
@@ -112,7 +112,7 @@ def test_mask_dict_with_fields(data_masker):
}
-def test_mask_json_dict_with_fields(data_masker):
+def test_erase_json_dict_with_fields(data_masker):
# GIVEN the data type is a json representation of a dictionary
data = json.dumps(
{
@@ -123,10 +123,10 @@ def test_mask_json_dict_with_fields(data_masker):
},
)
- # WHEN mask is called with a list of fields specified
+ # WHEN erase is called with a list of fields specified
masked_json_string = data_masker.erase(data, fields=["a.'1'.None", "a..'4'"])
- # THEN the result is only the specified fields are masked
+ # THEN the result is only the specified fields are erased
assert masked_json_string == {
"a": {
"1": {"None": DATA_MASKING_STRING, "four": "world"},
@@ -199,9 +199,9 @@ def test_parsing_nonexistent_fields_warning_on_missing_field():
},
}
- # WHEN mask is called with a non-existing field
+ # WHEN erase is called with a non-existing field
with pytest.warns(UserWarning, match="Field or expression*"):
masked_json_string = data_masker.erase(data, fields=["non-existing"])
- # THEN the "masked" payload is the same of the original
+ # THEN the "erased" payload is the same of the original
assert masked_json_string == data