From f6e26d0073fbf3830457532f57b3b59e60af0535 Mon Sep 17 00:00:00 2001 From: Mike Alfare <13974384+mikealfare@users.noreply.github.com> Date: Tue, 31 Oct 2023 00:41:17 -0400 Subject: [PATCH] ADAP-972: Fix issue where materialized views were being mapped as tables in catalog queries (#996) * changelog * add test demonstrating issue * update catalog query to correctly identify materialized views --- .../unreleased/Fixes-20231030-222134.yaml | 6 +++ dbt/include/bigquery/macros/catalog.sql | 40 +++++++++++------ .../functional/adapter/catalog_tests/files.py | 33 ++++++++++++++ .../catalog_tests/test_relation_types.py | 44 +++++++++++++++++++ 4 files changed, 109 insertions(+), 14 deletions(-) create mode 100644 .changes/unreleased/Fixes-20231030-222134.yaml create mode 100644 tests/functional/adapter/catalog_tests/files.py create mode 100644 tests/functional/adapter/catalog_tests/test_relation_types.py diff --git a/.changes/unreleased/Fixes-20231030-222134.yaml b/.changes/unreleased/Fixes-20231030-222134.yaml new file mode 100644 index 000000000..62bfc5f27 --- /dev/null +++ b/.changes/unreleased/Fixes-20231030-222134.yaml @@ -0,0 +1,6 @@ +kind: Fixes +body: Assign the correct relation type to materialized views in catalog queries +time: 2023-10-30T22:21:34.401675-04:00 +custom: + Author: mikealfare + Issue: "995" diff --git a/dbt/include/bigquery/macros/catalog.sql b/dbt/include/bigquery/macros/catalog.sql index d430b80f8..25166c7b4 100644 --- a/dbt/include/bigquery/macros/catalog.sql +++ b/dbt/include/bigquery/macros/catalog.sql @@ -7,30 +7,42 @@ {%- else -%} {%- set query -%} - with tables as ( + with materialized_views as ( select - project_id as table_database, - dataset_id as table_schema, - table_id as original_table_name, + table_catalog as project_id, + table_schema as dataset_id, + table_name as table_id + from {{ information_schema.replace(information_schema_view='MATERIALIZED_VIEWS') }} + ), + tables as ( + select + tables.project_id as table_database, + tables.dataset_id as table_schema, + tables.table_id as original_table_name, - concat(project_id, '.', dataset_id, '.', table_id) as relation_id, + concat(tables.project_id, '.', tables.dataset_id, '.', tables.table_id) as relation_id, - row_count, - size_bytes as size_bytes, + tables.row_count, + tables.size_bytes as size_bytes, case - when type = 1 then 'table' - when type = 2 then 'view' + when materialized_views.table_id is not null then 'materialized view' + when tables.type = 1 then 'table' + when tables.type = 2 then 'view' else 'external' end as table_type, - REGEXP_CONTAINS(table_id, '^.+[0-9]{8}$') and coalesce(type, 0) = 1 as is_date_shard, - REGEXP_EXTRACT(table_id, '^(.+)[0-9]{8}$') as shard_base_name, - REGEXP_EXTRACT(table_id, '^.+([0-9]{8})$') as shard_name + REGEXP_CONTAINS(tables.table_id, '^.+[0-9]{8}$') and coalesce(type, 0) = 1 as is_date_shard, + REGEXP_EXTRACT(tables.table_id, '^(.+)[0-9]{8}$') as shard_base_name, + REGEXP_EXTRACT(tables.table_id, '^.+([0-9]{8})$') as shard_name - from {{ information_schema.replace(information_schema_view='__TABLES__') }} + from {{ information_schema.replace(information_schema_view='__TABLES__') }} tables + left join materialized_views + on materialized_views.project_id = tables.project_id + and materialized_views.dataset_id = tables.dataset_id + and materialized_views.table_id = tables.table_id where ( {%- for schema in schemas -%} - upper(dataset_id) = upper('{{ schema }}'){%- if not loop.last %} or {% endif -%} + upper(tables.dataset_id) = upper('{{ schema }}'){%- if not loop.last %} or {% endif -%} {%- endfor -%} ) ), diff --git a/tests/functional/adapter/catalog_tests/files.py b/tests/functional/adapter/catalog_tests/files.py new file mode 100644 index 000000000..595517bf2 --- /dev/null +++ b/tests/functional/adapter/catalog_tests/files.py @@ -0,0 +1,33 @@ +MY_SEED = """ +id,value,record_valid_date +1,100,2023-01-01 00:00:00 +2,200,2023-01-02 00:00:00 +3,300,2023-01-02 00:00:00 +""".strip() + + +MY_TABLE = """ +{{ config( + materialized='table', +) }} +select * +from {{ ref('my_seed') }} +""" + + +MY_VIEW = """ +{{ config( + materialized='view', +) }} +select * +from {{ ref('my_seed') }} +""" + + +MY_MATERIALIZED_VIEW = """ +{{ config( + materialized='materialized_view', +) }} +select * +from {{ ref('my_table') }} +""" diff --git a/tests/functional/adapter/catalog_tests/test_relation_types.py b/tests/functional/adapter/catalog_tests/test_relation_types.py new file mode 100644 index 000000000..96beb69c9 --- /dev/null +++ b/tests/functional/adapter/catalog_tests/test_relation_types.py @@ -0,0 +1,44 @@ +from dbt.contracts.results import CatalogArtifact +from dbt.tests.util import run_dbt +import pytest + +from tests.functional.adapter.catalog_tests import files + + +class TestCatalogRelationTypes: + @pytest.fixture(scope="class", autouse=True) + def seeds(self): + return {"my_seed.csv": files.MY_SEED} + + @pytest.fixture(scope="class", autouse=True) + def models(self): + yield { + "my_table.sql": files.MY_TABLE, + "my_view.sql": files.MY_VIEW, + "my_materialized_view.sql": files.MY_MATERIALIZED_VIEW, + } + + @pytest.fixture(scope="class", autouse=True) + def docs(self, project): + run_dbt(["seed"]) + run_dbt(["run"]) + yield run_dbt(["docs", "generate"]) + + @pytest.mark.parametrize( + "node_name,relation_type", + [ + ("seed.test.my_seed", "table"), + ("model.test.my_table", "table"), + ("model.test.my_view", "view"), + ("model.test.my_materialized_view", "materialized view"), + ], + ) + def test_relation_types_populate_correctly( + self, docs: CatalogArtifact, node_name: str, relation_type: str + ): + """ + This test addresses: https://github.com/dbt-labs/dbt-bigquery/issues/995 + """ + assert node_name in docs.nodes + node = docs.nodes[node_name] + assert node.metadata.type == relation_type