From 1fe6eed4f0b7ec5e0c262a074cee5c9bce7a142d Mon Sep 17 00:00:00 2001 From: BethanyG Date: Tue, 28 Mar 2023 16:24:43 -0700 Subject: [PATCH 1/2] First draft of approaches for ETL exercise. --- .../practice/etl/.approaches/config.json | 43 +++++ .../dict-constructor-and-generator/content.md | 10 ++ .../snippet.txt | 6 + .../etl/.approaches/dict-items/content.md | 11 ++ .../etl/.approaches/dict-items/snippet.txt | 7 + .../dict-keys-and-dict-methods/content.md | 12 ++ .../dict-keys-and-dict-methods/snippet.txt | 7 + .../dict-keys-and-generator/content.md | 12 ++ .../dict-keys-and-generator/snippet.txt | 8 + .../dictionary-comprehension/content.md | 8 + .../dictionary-comprehension/snippet.txt | 4 + .../practice/etl/.approaches/introduction.md | 167 ++++++++++++++++++ 12 files changed, 295 insertions(+) create mode 100644 exercises/practice/etl/.approaches/config.json create mode 100644 exercises/practice/etl/.approaches/dict-constructor-and-generator/content.md create mode 100644 exercises/practice/etl/.approaches/dict-constructor-and-generator/snippet.txt create mode 100644 exercises/practice/etl/.approaches/dict-items/content.md create mode 100644 exercises/practice/etl/.approaches/dict-items/snippet.txt create mode 100644 exercises/practice/etl/.approaches/dict-keys-and-dict-methods/content.md create mode 100644 exercises/practice/etl/.approaches/dict-keys-and-dict-methods/snippet.txt create mode 100644 exercises/practice/etl/.approaches/dict-keys-and-generator/content.md create mode 100644 exercises/practice/etl/.approaches/dict-keys-and-generator/snippet.txt create mode 100644 exercises/practice/etl/.approaches/dictionary-comprehension/content.md create mode 100644 exercises/practice/etl/.approaches/dictionary-comprehension/snippet.txt create mode 100644 exercises/practice/etl/.approaches/introduction.md diff --git a/exercises/practice/etl/.approaches/config.json b/exercises/practice/etl/.approaches/config.json new file mode 100644 index 0000000000..bd6bdfaeb1 --- /dev/null +++ b/exercises/practice/etl/.approaches/config.json @@ -0,0 +1,43 @@ +{ + "introduction": { + "authors": ["BethanyG"], + "contributors": [] + }, + "approaches": [ + { + "uuid": "0a408f3f-d1ea-4739-a900-dbb65ab34520", + "slug": "dict-keys-and-generator", + "title": "Iterate Over Dictionary Keys and Use a Generator to Lowercase Strings.", + "blurb": "Use dict.keys() to iterate, and a generator to lowercase values.", + "authors": ["BethanyG"] + }, + { + "uuid": "a35a1496-b092-4634-a514-2c02d7c899c5", + "slug": "dict-keys-and-dict-methods", + "title": "Iterate Over Dictionary Keys and Use Dictionary Methods to Update.", + "blurb": "Used dict.keys() for iteration and dict.get() with dict.setdefault() to Update", + "authors": ["BethanyG"] + }, + { + "uuid": "c8de8d53-154c-4f05-ba44-44d8fcd739aa", + "slug": "dict-items ", + "title": "Iterate over Dictionary Items", + "blurb": "Use dict.items() for iteration.", + "authors": ["BethanyG"] + }, + { + "uuid": "5105b287-5062-4404-81df-0afe865315da", + "slug": "dict-constructor-and-generator", + "title": "Dictionary Constructor with a Passed Generator Expression.", + "blurb": "Pass a generator expression to a dictionary constructor to create a new dictionary.", + "authors": ["BethanyG"] + }, + { + "uuid": "59fad251-66ca-4f7d-a466-3bbd19260849", + "slug": "dictionary-comprehension", + "title": "Dictionary Comprehension", + "blurb": "Use a dictionary comprehension to process and transform data into new dictionary.", + "authors": ["BethanyG"] + } + ] +} \ No newline at end of file diff --git a/exercises/practice/etl/.approaches/dict-constructor-and-generator/content.md b/exercises/practice/etl/.approaches/dict-constructor-and-generator/content.md new file mode 100644 index 0000000000..fb9bcbfcde --- /dev/null +++ b/exercises/practice/etl/.approaches/dict-constructor-and-generator/content.md @@ -0,0 +1,10 @@ +# Dictionary Constructor with Generator Expression + +```python +def transform(legacy_data): + new_data = dict((letter.lower(), score) + for score, tiles in + legacy_data.items() + for letter in tiles) + return new_data +``` \ No newline at end of file diff --git a/exercises/practice/etl/.approaches/dict-constructor-and-generator/snippet.txt b/exercises/practice/etl/.approaches/dict-constructor-and-generator/snippet.txt new file mode 100644 index 0000000000..aee81d51a2 --- /dev/null +++ b/exercises/practice/etl/.approaches/dict-constructor-and-generator/snippet.txt @@ -0,0 +1,6 @@ +def transform(legacy_data): + new_data = dict((letter.lower(), score) + for score, tiles in + legacy_data.items() + for letter in tiles) + return new_data \ No newline at end of file diff --git a/exercises/practice/etl/.approaches/dict-items/content.md b/exercises/practice/etl/.approaches/dict-items/content.md new file mode 100644 index 0000000000..fd00efad85 --- /dev/null +++ b/exercises/practice/etl/.approaches/dict-items/content.md @@ -0,0 +1,11 @@ +# Iterate over Dictionary Items + +```python +def transform(input_dict): + new_data = {} + + for key, value in input_dict.items(): + for item in value: + new_data[item.lower()] = key + return new_data +``` \ No newline at end of file diff --git a/exercises/practice/etl/.approaches/dict-items/snippet.txt b/exercises/practice/etl/.approaches/dict-items/snippet.txt new file mode 100644 index 0000000000..8e91307024 --- /dev/null +++ b/exercises/practice/etl/.approaches/dict-items/snippet.txt @@ -0,0 +1,7 @@ +def transform(input_dict): + new_data = {} + + for key, value in input_dict.items(): + for item in value: + new_data[item.lower()] = key + return new_data \ No newline at end of file diff --git a/exercises/practice/etl/.approaches/dict-keys-and-dict-methods/content.md b/exercises/practice/etl/.approaches/dict-keys-and-dict-methods/content.md new file mode 100644 index 0000000000..f307486db4 --- /dev/null +++ b/exercises/practice/etl/.approaches/dict-keys-and-dict-methods/content.md @@ -0,0 +1,12 @@ +# Iterate Over Dictionary Keys and Use Dictionary Methods + + +```python +def transform(input_data): + transformed = {} + + for key in input_data: + for value in input_data.get(key): + transformed.setdefault(value.lower(), key) + return transformed +``` \ No newline at end of file diff --git a/exercises/practice/etl/.approaches/dict-keys-and-dict-methods/snippet.txt b/exercises/practice/etl/.approaches/dict-keys-and-dict-methods/snippet.txt new file mode 100644 index 0000000000..e9b7607e46 --- /dev/null +++ b/exercises/practice/etl/.approaches/dict-keys-and-dict-methods/snippet.txt @@ -0,0 +1,7 @@ +def transform(input_data): + transformed = {} + + for key in input_data: + for value in input_data.get(key): + transformed.setdefault(value.lower(), key) + return transformed \ No newline at end of file diff --git a/exercises/practice/etl/.approaches/dict-keys-and-generator/content.md b/exercises/practice/etl/.approaches/dict-keys-and-generator/content.md new file mode 100644 index 0000000000..b7fa0777b1 --- /dev/null +++ b/exercises/practice/etl/.approaches/dict-keys-and-generator/content.md @@ -0,0 +1,12 @@ +# Iterate over Dictionary Keys and Process Values in a Generator + +```python +def transform(input_dict): + result = {} + + for key in input_dict: + values = (item.lower() for item in input_dict[key]) + for value in values: + result[value] = key + return result +``` \ No newline at end of file diff --git a/exercises/practice/etl/.approaches/dict-keys-and-generator/snippet.txt b/exercises/practice/etl/.approaches/dict-keys-and-generator/snippet.txt new file mode 100644 index 0000000000..421f598b41 --- /dev/null +++ b/exercises/practice/etl/.approaches/dict-keys-and-generator/snippet.txt @@ -0,0 +1,8 @@ +def transform(input_dict): + result = {} + + for key in input_dict: + values = (item.lower() for item in input_dict[key]) + for value in values: + result[value] = key + return result \ No newline at end of file diff --git a/exercises/practice/etl/.approaches/dictionary-comprehension/content.md b/exercises/practice/etl/.approaches/dictionary-comprehension/content.md new file mode 100644 index 0000000000..9761277e61 --- /dev/null +++ b/exercises/practice/etl/.approaches/dictionary-comprehension/content.md @@ -0,0 +1,8 @@ +# Dictionary Comprehension + +```python +def transform(input_dict): + return {value.lower():key for key in + input_dict for + value in input_dict[key]} +``` \ No newline at end of file diff --git a/exercises/practice/etl/.approaches/dictionary-comprehension/snippet.txt b/exercises/practice/etl/.approaches/dictionary-comprehension/snippet.txt new file mode 100644 index 0000000000..2fc81f1d56 --- /dev/null +++ b/exercises/practice/etl/.approaches/dictionary-comprehension/snippet.txt @@ -0,0 +1,4 @@ +def transform(input_dict): + return {value.lower():key for key in + input_dict for + value in input_dict[key]} \ No newline at end of file diff --git a/exercises/practice/etl/.approaches/introduction.md b/exercises/practice/etl/.approaches/introduction.md new file mode 100644 index 0000000000..546076df30 --- /dev/null +++ b/exercises/practice/etl/.approaches/introduction.md @@ -0,0 +1,167 @@ +# Introduction + +There are multiple Pythonic ways to solve the ETL exercise. +Among them are: + +- Iterate over `dict.keys()` & lowercase all values in a `generator-expression` before inserting into the new dict. +- Iterate over `dict.keys()` & use `dict` methods `dict.get()` and `dict.setdefault()` to retrieve values and insert keys into new dict. +- Iterate over`dict.items()` and deal with lowercasing the values `list` in a nested loop. +- Use the `dict()` constructor with a `generator expression` to unpack and lowercase values in a nested loop. +- Use a dictionary comprehension + + + +## General guidance + +The goal of the ETL exercise is to: + +* **E**xtract the data from the 'legacy' dictionary given as input. It has numeric **keys** with a `list`of uppercased strings as **values**. +* **T**ransform the data, by turning the `list` of **values** into individual lowercased **keys**, with the former **keys** used as **values**. +* **L**oad the data into a new dictionary and return it. + + +The challenge here is to deal efficiently with lowercasing the **values**, which are `lists` containing strings. +Unfortunately, there is no way to avoid an extra loop for lowercasing the string values, so all current approaches to this exercise have equivalent performance. + +But there may be other considerations such as readability, or how to deal with duplicate data in **values** (_and whether that is necessary or not_) when selecting an approach. + +Additionally, while the test data for this exercise does not contain any [unhashable][unhashable] values, if this code were to be used in a situation where the legacy values were of an unknown datatype, measures would need to be taken to test the values before attempting to create keys with them. + + +## Approach: Iterate over `dict.keys()` & Lowercase values in a generator or list comprehension. + +```python +def transform(input_dict): + result = {} + + for key in input_dict: + values = (item.lower() for item in input_dict[key]) + for value in values: + result[value] = key + return result + + ##OR## + + def transform(input_dict): + result = {} + + for key in input_dict: + values = [item.lower() for item in input_dict[key]] + for value in values: + result[value] = key + return result +``` + + +This approach iterates over `dict.keys()` , converting all the strings in the returned values `list` to lowercase via `generator expression` or `list comprehension`. +Once the values are converted to lowercase, they are iterated through in an inner loop. +Each value is then inserted into the new dictionary as a key, with the 'old' key (_from the outer loop_) used as the value. +For more details, see the [dictionary keys and generator][dict-keys-and-generator ] approach. + + +## Approach: Use Dictionary Methods `dict.get()` and `dict.setdefault()` + +```python +def transform(input_data): + transformed = {} + + for key in input_data: + for value in input_data.get(key): + transformed.setdefault(value.lower(), key) + return transformed +``` + + +As with the approach described above, this iterates through the keys of `input_data`. +Each value `list` is looked up via `input_data.get(key)`, and the new dictionary (_transformed_) is updated via `dict.setdefault(value.lower(), key)`. +For details, read the [dictionary keys and dictionary methods][dict-keys-and-dict-methods] approach. + + +## Approach: Iterate over `dict.items()` + +```python +def transform(input_dict): + new_data = {} + + for key, value in input_dict.items(): + for item in value: + new_data[item.lower()] = key + return new_data +``` + + +This approach iterates over both keys and values via `dict.items()`. +The inner loop then iterates over the values `list`, transforming each string and inserting it into the `new_data` dictionary using _bracket notation_, with the lowercased string as key and the former key as the new value. +For more details, see the [dictionary items][dict-items] approach. + + +## Approach: Use a generator with the Dictionary Constructor + +```python +def transform(legacy_data): + new_data = dict((letter.lower(), score) + for score, tiles in + legacy_data.items() + for letter in tiles) + return new_data +``` + + +This approach encapsulates the loops described in prior approaches within a `generator expression`. +The generator includes a nested loop to iterate over the strings within the value `list`, lowercasing them. +The generator is then passed to the `dict()` constructor, which unpacks it and creates a new dictionary. +For more information, see the [dictionary constructor with generator][dict-constructor-and-generator] approach. + + +## Approach: Use a Dictionary Comprehension + +```python +def transform(input_dict): + return {value.lower():key for key in + input_dict for + value in input_dict[key]} +``` + + + +This approach is very similar to the one above, but uses a `dictionary comprehension` format instead of a generator fed to a constructor. +For more details, see the [dictionary comprehension][dictionary-comprehension] approach. + + + +## Other approaches + +Besides these five idiomatic approaches, there are a multitude of possible variations using different string or dictionary methods or strategies for extracting and lowercasing the input dictionary values. + +The strategy below employs `zip_longest` with `dict.items()` to re-package keys and values. + +```python +from itertools import zip_longest + +def transform(input_dict): + + lowercased = (zip_longest([element.lower() for element in item], + key, fillvalue=key) for + key, item in input_dict.items()) + + return dict(lowercased) +``` + + +But note that it still has the nested loop all of these solutions share -- as the values returned by `dict.items()` still needs to be unpacked and lowercased before anything can be added to the new dictionary. + + + +## Which approach to use? + +All of these approaches are roughly equivalent given that the values in the input dictionary are a list of strings that must be lowercased. +This demands that those values be looped through, making all strategies loop-within-loop. +Using generators or comprehensions might still give a slight performance boost, but they may also be harder to read or understand for others. + + +[dict-constructor-and-generator]: https://exercism.org/tracks/python/exercises/etl/approaches/dict-constructor-and-generator +[dict-items]: https://exercism.org/tracks/python/exercises/etl/approaches/dict-items +[dict-keys-and-dict-methods]: https://exercism.org/tracks/python/exercises/etl/approaches/dict-keys-and-dict-methods +[dict-keys-and-generator ]: https://exercism.org/tracks/python/exercises/etl/approaches/dict-keys-and-generator +[dictionary-comprehension]: https://exercism.org/tracks/python/exercises/etl/approaches/dictionary-comprehension +[unhashable]: https://docs.python.org/3/glossary.html#term-hashable From 63109eb1c4f4fdaa317c65a9f36537b5380e47ff Mon Sep 17 00:00:00 2001 From: BethanyG Date: Tue, 28 Mar 2023 16:27:48 -0700 Subject: [PATCH 2/2] Fixed lowercase kebab nit from configlet. --- exercises/practice/etl/.approaches/config.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exercises/practice/etl/.approaches/config.json b/exercises/practice/etl/.approaches/config.json index bd6bdfaeb1..a655952804 100644 --- a/exercises/practice/etl/.approaches/config.json +++ b/exercises/practice/etl/.approaches/config.json @@ -20,7 +20,7 @@ }, { "uuid": "c8de8d53-154c-4f05-ba44-44d8fcd739aa", - "slug": "dict-items ", + "slug": "dict-items", "title": "Iterate over Dictionary Items", "blurb": "Use dict.items() for iteration.", "authors": ["BethanyG"]