From c4a3a3d05495f2153e03eb67149ca9f98b0abae0 Mon Sep 17 00:00:00 2001 From: Michelle Ark Date: Tue, 21 Nov 2023 18:04:39 -0500 Subject: [PATCH 1/9] first pass: unit test typing --- dev-requirements.txt | 4 ++-- .../unit_testing/test_unit_testing_types.py | 19 +++++++++++++++++++ 2 files changed, 21 insertions(+), 2 deletions(-) create mode 100644 tests/functional/adapter/unit_testing/test_unit_testing_types.py diff --git a/dev-requirements.txt b/dev-requirements.txt index 48598298b..4ebb849af 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -1,7 +1,7 @@ # install latest changes in dbt-core # TODO: how to automate switching from develop to version branches? -git+https://github.com/dbt-labs/dbt-core.git#egg=dbt-core&subdirectory=core -git+https://github.com/dbt-labs/dbt-core.git#egg=dbt-tests-adapter&subdirectory=tests/adapter +git+https://github.com/dbt-labs/dbt-core.git@support-complex-types-unit-testing#egg=dbt-core&subdirectory=core +git+https://github.com/dbt-labs/dbt-core.git@support-complex-types-unit-testing#egg=dbt-tests-adapter&subdirectory=tests/adapter # if version 1.x or greater -> pin to major version # if version 0.x -> pin to minor diff --git a/tests/functional/adapter/unit_testing/test_unit_testing_types.py b/tests/functional/adapter/unit_testing/test_unit_testing_types.py new file mode 100644 index 000000000..267ad1248 --- /dev/null +++ b/tests/functional/adapter/unit_testing/test_unit_testing_types.py @@ -0,0 +1,19 @@ +import pytest +from dbt.tests.adapter.unit_testing.test_unit_testing_types import BaseUnitTestingTypes + + +class TestBigQueryUnitTestingTypes(BaseUnitTestingTypes): + @pytest.fixture + def data_types(self): + # sql_value, yaml_value + return [ + ["1", "1"], + ["'1'", "1"], + ["cast('true' as boolean)", "true"], + ["cast('2019-01-01' as date)", "2019-01-01"], + ["cast('2013-11-03 00:00:00-07' as TIMESTAMP)", "2013-11-03 00:00:00-07"], + ["cast(['a','b','c'] as array)", "['a','b','c']"], + ["cast([1,2,3] as array)", "[1,2,3]"], + ["cast(1 as NUMERIC)", "1"], + # ["""JSON '{"name": "Cooper", "forname": "Alice"}'""", """'{"name": "Cooper", "forname": "Alice"}'"""], #TODO + ] From f58fb4d9e0b3c63cddb67c89ccc17d4be1a4e663 Mon Sep 17 00:00:00 2001 From: Michelle Ark Date: Mon, 4 Dec 2023 15:59:34 +0900 Subject: [PATCH 2/9] expand test coverage, update safe_cast --- .../bigquery/macros/utils/safe_cast.sql | 6 +++++ .../unit_testing/test_unit_testing_types.py | 23 ++++++++++++++++--- 2 files changed, 26 insertions(+), 3 deletions(-) diff --git a/dbt/include/bigquery/macros/utils/safe_cast.sql b/dbt/include/bigquery/macros/utils/safe_cast.sql index ac62bb050..57eaf7b1e 100644 --- a/dbt/include/bigquery/macros/utils/safe_cast.sql +++ b/dbt/include/bigquery/macros/utils/safe_cast.sql @@ -1,3 +1,9 @@ {% macro bigquery__safe_cast(field, type) %} +{%- if type.lower().startswith('array') and field is iterable and (field is not string and field is not mapping) and field | length > 0 -%} + (select array_agg(safe_cast(i as {{type.lower()[6:-1]}})) from unnest({{field}}) i) +{%- elif type.lower() == 'json' and field is mapping -%} + safe_cast(json {{ dbt.string_literal(tojson(field)) }} as json) +{%- else -%} safe_cast({{field}} as {{type}}) +{%- endif -%} {% endmacro %} diff --git a/tests/functional/adapter/unit_testing/test_unit_testing_types.py b/tests/functional/adapter/unit_testing/test_unit_testing_types.py index 267ad1248..4d3daaaec 100644 --- a/tests/functional/adapter/unit_testing/test_unit_testing_types.py +++ b/tests/functional/adapter/unit_testing/test_unit_testing_types.py @@ -10,10 +10,27 @@ def data_types(self): ["1", "1"], ["'1'", "1"], ["cast('true' as boolean)", "true"], + ["1.0", "1.0"], + ["'string value'", "string value"], + ["cast(1.0 as numeric)", "1.0"], + ["cast(1 as bigint)", 1], ["cast('2019-01-01' as date)", "2019-01-01"], - ["cast('2013-11-03 00:00:00-07' as TIMESTAMP)", "2013-11-03 00:00:00-07"], + ["cast('2013-11-03 00:00:00-07' as timestamp)", "2013-11-03 00:00:00-07"], ["cast(['a','b','c'] as array)", "['a','b','c']"], ["cast([1,2,3] as array)", "[1,2,3]"], - ["cast(1 as NUMERIC)", "1"], - # ["""JSON '{"name": "Cooper", "forname": "Alice"}'""", """'{"name": "Cooper", "forname": "Alice"}'"""], #TODO + ["cast([true,true,false] as array)", "[true,true,false]"], + # array of date + ["[date '2019-01-01']", "['2020-01-01']"], + ["[date '2019-01-01']", "[]"], + ["[date '2019-01-01']", "null"], + # array of timestamp + ["[timestamp '2019-01-01']", "['2020-01-01']"], + ["[timestamp '2019-01-01']", "[]"], + ["[timestamp '2019-01-01']", "null"], + # json + [ + """json '{"name": "Cooper", "forname": "Alice"}'""", + """{"name": "Cooper", "forname": "Alice"}""", + ], + ["""json '{"name": "Cooper", "forname": "Alice"}'""", "{}"], ] From 35eb2f840b95e17f1233a4f3488adfe0e74d9f99 Mon Sep 17 00:00:00 2001 From: Michelle Ark Date: Mon, 5 Feb 2024 17:41:16 -0500 Subject: [PATCH 3/9] TestBigQueryUnitTestCaseInsensitivity --- tests/functional/adapter/unit_testing/test_unit_testing.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/functional/adapter/unit_testing/test_unit_testing.py b/tests/functional/adapter/unit_testing/test_unit_testing.py index 8570b6842..9b1497b25 100644 --- a/tests/functional/adapter/unit_testing/test_unit_testing.py +++ b/tests/functional/adapter/unit_testing/test_unit_testing.py @@ -1,5 +1,6 @@ import pytest from dbt.tests.adapter.unit_testing.test_unit_testing import BaseUnitTestingTypes +from dbt.tests.adapter.unit_testing.test_case_insensitivity import BaseUnitTestCaseInsensivity class TestBigQueryUnitTestingTypes(BaseUnitTestingTypes): @@ -34,3 +35,7 @@ def data_types(self): ], ["""json '{"name": "Cooper", "forname": "Alice"}'""", "{}"], ] + + +class TestBigQueryUnitTestCaseInsensitivity(BaseUnitTestCaseInsensivity): + pass From f486f73346a90c30a348b1ff898d028b27a5021c Mon Sep 17 00:00:00 2001 From: Michelle Ark Date: Mon, 5 Feb 2024 17:46:19 -0500 Subject: [PATCH 4/9] changelog entry --- .changes/unreleased/Features-20240205-174614.yaml | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 .changes/unreleased/Features-20240205-174614.yaml diff --git a/.changes/unreleased/Features-20240205-174614.yaml b/.changes/unreleased/Features-20240205-174614.yaml new file mode 100644 index 000000000..299adbf46 --- /dev/null +++ b/.changes/unreleased/Features-20240205-174614.yaml @@ -0,0 +1,7 @@ +kind: Features +body: Support non-STRUCT types for unit testing in dbt-bigquery, expand coverage of + safe_cast macro +time: 2024-02-05T17:46:14.505597-05:00 +custom: + Author: michelleark + Issue: "1090" From 1f2a06b5827e778b6b3d55a0134ae1fc3143b17f Mon Sep 17 00:00:00 2001 From: Michelle Ark Date: Mon, 5 Feb 2024 20:48:40 -0500 Subject: [PATCH 5/9] structs --- dbt/include/bigquery/macros/utils/safe_cast.sql | 2 ++ tests/functional/adapter/unit_testing/test_unit_testing.py | 6 ++++++ 2 files changed, 8 insertions(+) diff --git a/dbt/include/bigquery/macros/utils/safe_cast.sql b/dbt/include/bigquery/macros/utils/safe_cast.sql index 57eaf7b1e..be9f0c620 100644 --- a/dbt/include/bigquery/macros/utils/safe_cast.sql +++ b/dbt/include/bigquery/macros/utils/safe_cast.sql @@ -3,6 +3,8 @@ (select array_agg(safe_cast(i as {{type.lower()[6:-1]}})) from unnest({{field}}) i) {%- elif type.lower() == 'json' and field is mapping -%} safe_cast(json {{ dbt.string_literal(tojson(field)) }} as json) +{%- elif type.lower().startswith('struct') and field is string -%} + safe_cast({{field.strip('"').strip("'")}} as {{type}}) {%- else -%} safe_cast({{field}} as {{type}}) {%- endif -%} diff --git a/tests/functional/adapter/unit_testing/test_unit_testing.py b/tests/functional/adapter/unit_testing/test_unit_testing.py index 9b1497b25..4bae77116 100644 --- a/tests/functional/adapter/unit_testing/test_unit_testing.py +++ b/tests/functional/adapter/unit_testing/test_unit_testing.py @@ -34,6 +34,12 @@ def data_types(self): """{"name": "Cooper", "forname": "Alice"}""", ], ["""json '{"name": "Cooper", "forname": "Alice"}'""", "{}"], + # structs + ["STRUCT('Isha' as name, 22 as age)", """'STRUCT("Isha" as name, 22 as age)'"""], + [ + "STRUCT('Kipketer' AS name, [23.2, 26.1, 27.3, 29.4] AS laps)", + """'STRUCT("Kipketer" AS name, [23.2, 26.1, 27.3, 29.4] AS laps)'""", + ], ] From 1c72bf101284d1b798a99d16afff864133db11fc Mon Sep 17 00:00:00 2001 From: Michelle Ark Date: Tue, 6 Feb 2024 17:28:18 -0500 Subject: [PATCH 6/9] structs of structs, arrays of structs, clean up safe_cast --- .../bigquery/macros/utils/safe_cast.sql | 20 +++++++++++++-- .../adapter/unit_testing/test_unit_testing.py | 25 ++++++++++++++++--- 2 files changed, 39 insertions(+), 6 deletions(-) diff --git a/dbt/include/bigquery/macros/utils/safe_cast.sql b/dbt/include/bigquery/macros/utils/safe_cast.sql index be9f0c620..ec312af11 100644 --- a/dbt/include/bigquery/macros/utils/safe_cast.sql +++ b/dbt/include/bigquery/macros/utils/safe_cast.sql @@ -1,11 +1,27 @@ {% macro bigquery__safe_cast(field, type) %} {%- if type.lower().startswith('array') and field is iterable and (field is not string and field is not mapping) and field | length > 0 -%} - (select array_agg(safe_cast(i as {{type.lower()[6:-1]}})) from unnest({{field}}) i) + {#-- Extract nested type from 'array' --#} + {% set nested_type = type.lower()[6:-1] %} + {#-- BigQuery does not support direct casts to arrays. instead, each element must be cast individually + reaggregated into an array --#} + {%- if cast_from_string_unsupported_for(nested_type) %} + (select array_agg(safe_cast(i as {{ nested_type }})) from unnest([ + {%- for nested_field in field %} + {{ nested_field.strip('"').strip("'") }}{{ ',' if not loop.last }} + {%- endfor %} + ]) i) + {%- else -%} + (select array_agg(safe_cast(i as {{nested_type}})) from unnest({{field}}) i) + {%- endif -%} + {%- elif type.lower() == 'json' and field is mapping -%} safe_cast(json {{ dbt.string_literal(tojson(field)) }} as json) -{%- elif type.lower().startswith('struct') and field is string -%} +{%- elif cast_from_string_unsupported_for(type) and field is string -%} safe_cast({{field.strip('"').strip("'")}} as {{type}}) {%- else -%} safe_cast({{field}} as {{type}}) {%- endif -%} {% endmacro %} + +{% macro cast_from_string_unsupported_for(type) %} + {{ return(type.lower().startswith('struct') or type.lower() == 'geography') }} +{% endmacro %} diff --git a/tests/functional/adapter/unit_testing/test_unit_testing.py b/tests/functional/adapter/unit_testing/test_unit_testing.py index 4bae77116..2545ee295 100644 --- a/tests/functional/adapter/unit_testing/test_unit_testing.py +++ b/tests/functional/adapter/unit_testing/test_unit_testing.py @@ -1,6 +1,7 @@ import pytest -from dbt.tests.adapter.unit_testing.test_unit_testing import BaseUnitTestingTypes +from dbt.tests.adapter.unit_testing.test_types import BaseUnitTestingTypes from dbt.tests.adapter.unit_testing.test_case_insensitivity import BaseUnitTestCaseInsensivity +from dbt.tests.adapter.unit_testing.test_invalid_input import BaseUnitTestInvalidInput class TestBigQueryUnitTestingTypes(BaseUnitTestingTypes): @@ -17,6 +18,8 @@ def data_types(self): ["cast(1 as bigint)", 1], ["cast('2019-01-01' as date)", "2019-01-01"], ["cast('2013-11-03 00:00:00-07' as timestamp)", "2013-11-03 00:00:00-07"], + ["st_geogpoint(75, 45)", "'st_geogpoint(75, 45)'"], + # arrays ["cast(['a','b','c'] as array)", "['a','b','c']"], ["cast([1,2,3] as array)", "[1,2,3]"], ["cast([true,true,false] as array)", "[true,true,false]"], @@ -35,13 +38,27 @@ def data_types(self): ], ["""json '{"name": "Cooper", "forname": "Alice"}'""", "{}"], # structs - ["STRUCT('Isha' as name, 22 as age)", """'STRUCT("Isha" as name, 22 as age)'"""], + ["struct('Isha' as name, 22 as age)", """'struct("Isha" as name, 22 as age)'"""], [ - "STRUCT('Kipketer' AS name, [23.2, 26.1, 27.3, 29.4] AS laps)", - """'STRUCT("Kipketer" AS name, [23.2, 26.1, 27.3, 29.4] AS laps)'""", + "struct('Kipketer' AS name, [23.2, 26.1, 27.3, 29.4] AS laps)", + """'struct("Kipketer" AS name, [23.2, 26.1, 27.3, 29.4] AS laps)'""", + ], + # struct of struct + [ + "struct(struct(1 as id, 'blue' as color) as my_struct)", + """'struct(struct(1 as id, "blue" as color) as my_struct)'""", + ], + # array of struct + [ + "[struct(st_geogpoint(75, 45) as my_point), struct(st_geogpoint(75, 35) as my_point)]", + "['struct(st_geogpoint(75, 45) as my_point)', 'struct(st_geogpoint(75, 35) as my_point)']", ], ] class TestBigQueryUnitTestCaseInsensitivity(BaseUnitTestCaseInsensivity): pass + + +class TestBigqueryeUnitTestInvalidInput(BaseUnitTestInvalidInput): + pass From 613189585e530209e3332fa40ea089bfd3a1c0a7 Mon Sep 17 00:00:00 2001 From: Michelle Ark Date: Tue, 6 Feb 2024 17:57:09 -0500 Subject: [PATCH 7/9] typos --- tests/functional/adapter/unit_testing/test_unit_testing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/functional/adapter/unit_testing/test_unit_testing.py b/tests/functional/adapter/unit_testing/test_unit_testing.py index 2545ee295..f4d4ef1e2 100644 --- a/tests/functional/adapter/unit_testing/test_unit_testing.py +++ b/tests/functional/adapter/unit_testing/test_unit_testing.py @@ -60,5 +60,5 @@ class TestBigQueryUnitTestCaseInsensitivity(BaseUnitTestCaseInsensivity): pass -class TestBigqueryeUnitTestInvalidInput(BaseUnitTestInvalidInput): +class TestBigQueryUnitTestInvalidInput(BaseUnitTestInvalidInput): pass From 72c6372e61a65c676589d158217dd9a9ff59f9b7 Mon Sep 17 00:00:00 2001 From: Michelle Ark Date: Thu, 8 Feb 2024 22:25:39 -0500 Subject: [PATCH 8/9] restore dev-requirements --- dev-requirements.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dev-requirements.txt b/dev-requirements.txt index 3d2064a85..d630d5175 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -1,8 +1,8 @@ # install latest changes in dbt-core # TODO: how to automate switching from develop to version branches? git+https://github.com/dbt-labs/dbt-core.git#egg=dbt-core&subdirectory=core -git+https://github.com/dbt-labs/dbt-adapters.git@unit-testing-case-insensitive-comparisons -git+https://github.com/dbt-labs/dbt-adapters.git@unit-testing-case-insensitive-comparisons#subdirectory=dbt-tests-adapter +git+https://github.com/dbt-labs/dbt-adapters.git +git+https://github.com/dbt-labs/dbt-adapters.git#subdirectory=dbt-tests-adapter # if version 1.x or greater -> pin to major version # if version 0.x -> pin to minor black~=23.12 From f8d9f672bf15af2e0ea6c3904a1481b75421f3cf Mon Sep 17 00:00:00 2001 From: Michelle Ark Date: Fri, 9 Feb 2024 10:40:04 -0500 Subject: [PATCH 9/9] changelog entry --- .changes/unreleased/Features-20240205-174614.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.changes/unreleased/Features-20240205-174614.yaml b/.changes/unreleased/Features-20240205-174614.yaml index 299adbf46..192273d3d 100644 --- a/.changes/unreleased/Features-20240205-174614.yaml +++ b/.changes/unreleased/Features-20240205-174614.yaml @@ -1,5 +1,5 @@ kind: Features -body: Support non-STRUCT types for unit testing in dbt-bigquery, expand coverage of +body: Support all types for unit testing in dbt-bigquery, expand coverage of safe_cast macro time: 2024-02-05T17:46:14.505597-05:00 custom: