Skip to content

Commit

Permalink
Merge branch 'main' into add-ghost-record-numeric-values-variables
Browse files Browse the repository at this point in the history
  • Loading branch information
tkiehn authored Sep 5, 2024
2 parents f24ebe5 + 846a926 commit b40a8c8
Show file tree
Hide file tree
Showing 19 changed files with 121 additions and 74 deletions.
Empty file removed analyses/.gitkeep
Empty file.
16 changes: 4 additions & 12 deletions dbt_project.yml
Original file line number Diff line number Diff line change
@@ -1,20 +1,12 @@
name: datavault4dbt
version: 1.0.0
version: 1.4.0
require-dbt-version: [">=1.0.0", "<2.0.0"]
config-version: 2

# This setting configures which "profile" dbt uses for this project.
profile: 'datavault4dbt'

# These configurations specify where dbt should look for different types of files.
# The `model-paths` config, for example, states that models in this project can be
# found in the "models/" directory. You probably won't need to change these!
model-paths: ["models"]
analysis-paths: ["analyses"]
test-paths: ["tests"]
seed-paths: ["seeds"]
macro-paths: ["macros"]
snapshot-paths: ["snapshots"]

target-path: "target" # directory which will store compiled SQL files
clean-targets: # directories to be removed by `dbt clean`
Expand Down Expand Up @@ -50,9 +42,9 @@ vars:
datavault4dbt.end_of_all_times: {"bigquery":"8888-12-31T23-59-59","snowflake":"8888-12-31T23:59:59", "exasol": "8888-12-31 23:59:59", "postgres": "8888-12-31 23:59:59", "redshift": "8888-12-31 23:59:59", "synapse": "8888-12-31T23:59:59"}
datavault4dbt.timestamp_format: {"bigquery":"%Y-%m-%dT%H-%M-%S","snowflake":"YYYY-MM-DDTHH24:MI:SS", "exasol": "YYYY-mm-dd HH:MI:SS", "postgres": "YYYY-MM-DD HH24:MI:SS", "redshift": "YYYY-MM-DD HH24:MI:SS", "synapse": 126}

#datavault4dbt.beginning_of_all_times_date: {"bigquery":"0001-01-01","snowflake":"0001-01-01", "exasol": "0001-01-01", "postgres": "0001-01-01", "redshift": "0001-01-01", "synapse": "1901-01-01"}
#datavault4dbt.end_of_all_times_date: {"bigquery":"8888-12-31","snowflake":"8888-12-31", "exasol": "8888-12-31", "postgres": "8888-12-31", "redshift": "8888-12-31", "synapse": "8888-12-31"}
#datavault4dbt.date_format: {"bigquery":"%Y-%m-%d","snowflake":"YYYY-MM-DD", "exasol": "YYYY-mm-dd", "postgres": "YYYY-MM-DD", "redshift": "YYYY-MM-DD", "synapse": "yyyy-MM-dd"}
datavault4dbt.beginning_of_all_times_date: {"bigquery":"0001-01-01","snowflake":"0001-01-01", "exasol": "0001-01-01", "postgres": "0001-01-01", "redshift": "0001-01-01", "synapse": "1901-01-01"}
datavault4dbt.end_of_all_times_date: {"bigquery":"8888-12-31","snowflake":"8888-12-31", "exasol": "8888-12-31", "postgres": "8888-12-31", "redshift": "8888-12-31", "synapse": "8888-12-31"}
datavault4dbt.date_format: {"bigquery":"%Y-%m-%d","snowflake":"YYYY-MM-DD", "exasol": "YYYY-mm-dd", "postgres": "YYYY-MM-DD", "redshift": "YYYY-MM-DD", "synapse": "yyyy-MM-dd"}

datavault4dbt.default_unknown_rsrc: 'SYSTEM'
datavault4dbt.default_error_rsrc: 'ERROR'
Expand Down
11 changes: 8 additions & 3 deletions macros/staging/bigquery/stage.sql
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@
sequence,
prejoined_columns,
missing_columns,
multi_active_config) -%}
multi_active_config,
enable_ghost_records) -%}

{% if (source_model is none) and execute %}

Expand Down Expand Up @@ -147,7 +148,7 @@

{%- set source_columns_to_select = only_include_from_source -%}

{%- endif-%}
{%- endif -%}

{%- set final_columns_to_select = final_columns_to_select + source_columns_to_select -%}
{%- set derived_columns_to_select = datavault4dbt.process_columns_to_select(source_and_derived_column_names, hashed_column_names) | unique | list -%}
Expand Down Expand Up @@ -437,6 +438,7 @@ hashed_columns AS (
{%- endif -%}
{%- endif -%}

{%- if enable_ghost_records and not is_incremental() %}
{# Creating Ghost Record for unknown case, based on datatype #}
unknown_values AS (

Expand Down Expand Up @@ -580,6 +582,7 @@ ghost_records AS (
UNION ALL
SELECT * FROM error_values
),
{%- endif -%}

{%- if not include_source_columns -%}
{% set final_columns_to_select = datavault4dbt.process_columns_to_select(columns_list=final_columns_to_select, exclude_columns_list=source_columns_to_select) %}
Expand All @@ -594,13 +597,15 @@ columns_to_select AS (

FROM {{ last_cte }}

{%- if enable_ghost_records and not is_incremental() %}
UNION ALL

SELECT

{{ datavault4dbt.print_list(datavault4dbt.escape_column_names(final_columns_to_select)) }}

FROM ghost_records
{%- endif %}
)

SELECT * FROM columns_to_select
Expand Down
12 changes: 10 additions & 2 deletions macros/staging/exasol/stage.sql
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@
sequence,
prejoined_columns,
missing_columns,
multi_active_config) -%}
multi_active_config,
enable_ghost_records) -%}

{% if (source_model is none) and execute %}

Expand Down Expand Up @@ -142,7 +143,7 @@

{%- set source_columns_to_select = only_include_from_source -%}

{%- endif-%}
{%- endif -%}

{%- set final_columns_to_select = final_columns_to_select + source_columns_to_select -%}
{%- set derived_columns_to_select = datavault4dbt.process_columns_to_select(source_and_derived_column_names, hashed_column_names) | unique | list -%}
Expand Down Expand Up @@ -425,6 +426,7 @@ hashed_columns AS (
{%- endif -%}
{%- endif -%}

{%- if enable_ghost_records and not is_incremental() %}
{# Creating Ghost Record for unknown case, based on datatype #}
unknown_values AS (
SELECT
Expand Down Expand Up @@ -563,9 +565,12 @@ ghost_records AS (
UNION ALL
SELECT * FROM error_values
),
{%- endif -%}

{%- if not include_source_columns -%}
{% set final_columns_to_select = datavault4dbt.process_columns_to_select(columns_list=final_columns_to_select, exclude_columns_list=source_columns_to_select) %}
{%- endif -%}

{# Combining the two ghost records with the regular data #}
columns_to_select AS (

Expand All @@ -574,12 +579,15 @@ columns_to_select AS (
{{ datavault4dbt.print_list(datavault4dbt.escape_column_names(final_columns_to_select)) }}

FROM {{ last_cte }}

{%- if enable_ghost_records and not is_incremental() %}
UNION ALL
SELECT

{{ datavault4dbt.print_list(datavault4dbt.escape_column_names(final_columns_to_select)) }}

FROM ghost_records
{%- endif -%}
)

SELECT * FROM columns_to_select
Expand Down
9 changes: 7 additions & 2 deletions macros/staging/postgres/stage.sql
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@
sequence,
prejoined_columns,
missing_columns,
multi_active_config) -%}
multi_active_config,
enable_ghost_records) -%}

{% if (source_model is none) and execute %}

Expand Down Expand Up @@ -148,7 +149,7 @@

{%- set source_columns_to_select = only_include_from_source -%}

{%- endif-%}
{%- endif -%}

{%- set final_columns_to_select = final_columns_to_select + source_columns_to_select -%}
{%- set derived_columns_to_select = datavault4dbt.process_columns_to_select(source_and_derived_column_names, hashed_column_names) | unique | list -%}
Expand Down Expand Up @@ -437,6 +438,7 @@ hashed_columns AS (
{%- endif -%}
{%- endif -%}

{%- if enable_ghost_records and not is_incremental() %}
{# Creating Ghost Record for unknown case, based on datatype #}
unknown_values AS (

Expand Down Expand Up @@ -580,6 +582,7 @@ ghost_records AS (
UNION ALL
SELECT * FROM error_values
),
{%- endif -%}

{%- if not include_source_columns -%}
{% set final_columns_to_select = datavault4dbt.process_columns_to_select(columns_list=final_columns_to_select, exclude_columns_list=source_columns_to_select) %}
Expand All @@ -594,13 +597,15 @@ columns_to_select AS (

FROM {{ last_cte }}

{%- if enable_ghost_records and not is_incremental() %}
UNION ALL

SELECT

{{ datavault4dbt.print_list(datavault4dbt.escape_column_names(final_columns_to_select)) }}

FROM ghost_records
{% endif %}
)

SELECT * FROM columns_to_select
Expand Down
27 changes: 25 additions & 2 deletions macros/staging/redshift/stage.sql
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@
sequence,
prejoined_columns,
missing_columns,
multi_active_config) -%}
multi_active_config,
enable_ghost_records) -%}

{% if (source_model is none) and execute %}

Expand Down Expand Up @@ -147,7 +148,7 @@

{%- set source_columns_to_select = only_include_from_source -%}

{%- endif-%}
{%- endif -%}

{%- set final_columns_to_select = final_columns_to_select + source_columns_to_select -%}
{%- set derived_columns_to_select = datavault4dbt.process_columns_to_select(source_and_derived_column_names, hashed_column_names) | unique | list -%}
Expand Down Expand Up @@ -338,6 +339,24 @@ derived_columns AS (
),
{%- endif -%}

{# Checking data_type from hashed_columns to enable trim functions on byte datatypes as super / geometry / boolean #}

{%- if execute -%}

{%- if datavault4dbt.is_something(derived_columns) %}
{%- set derived_columns_dict = derived_columns_with_datatypes_DICT -%}
{%- else -%}
{%- set derived_columns_dict = [] -%}
{%- endif -%}
{%- for hash_column_key in hashed_columns.keys() -%}
{%- if hashed_columns[hash_column_key] is mapping -%}
{%- do hashed_columns[hash_column_key].update({'columns': datavault4dbt.get_field_hash_by_datatype(hashed_columns=hashed_columns[hash_column_key]['columns'], all_datatype_columns=all_columns, derived_columns=derived_columns_dict)}) -%}
{%- elif datavault4dbt.is_list(hashed_columns[hash_column_key]) -%}
{%- do hashed_columns.update({hash_column_key: datavault4dbt.get_field_hash_by_datatype(hashed_columns=hashed_columns[hash_column_key], all_datatype_columns=all_columns, derived_columns=derived_columns_dict)}) -%}
{%- endif -%}
{%- endfor -%}
{%- endif -%}

{%- if datavault4dbt.is_something(hashed_columns) and hashed_columns is mapping %}
{# Generating Hashed Columns (hashkeys and hashdiffs for Hubs/Links/Satellites) #}
{% if datavault4dbt.is_something(multi_active_config) %}
Expand Down Expand Up @@ -428,6 +447,7 @@ hashed_columns AS (
{%- endif -%}
{%- endif -%}

{%- if enable_ghost_records and not is_incremental() %}
{# Creating Ghost Record for unknown case, based on datatype #}
unknown_values AS (

Expand Down Expand Up @@ -571,6 +591,7 @@ ghost_records AS (
UNION ALL
SELECT * FROM error_values
),
{%- endif -%}

{%- if not include_source_columns -%}
{% set final_columns_to_select = datavault4dbt.process_columns_to_select(columns_list=final_columns_to_select, exclude_columns_list=source_columns_to_select) %}
Expand All @@ -585,13 +606,15 @@ columns_to_select AS (

FROM {{ last_cte }}

{%- if enable_ghost_records and not is_incremental() %}
UNION ALL

SELECT

{{ datavault4dbt.print_list(datavault4dbt.escape_column_names(final_columns_to_select)) }}

FROM ghost_records
{%- endif -%}
)

SELECT * FROM columns_to_select
Expand Down
11 changes: 6 additions & 5 deletions macros/staging/snowflake/stage.sql
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@
sequence,
prejoined_columns,
missing_columns,
multi_active_config) -%}
multi_active_config,
enable_ghost_records) -%}

{% if (source_model is none) and execute %}

Expand Down Expand Up @@ -151,7 +152,7 @@
{%- set source_columns_to_select = only_include_from_source -%}
{{ log('source_columns_to_select when include_source_columns=false: '~ source_columns_to_select, false) }}

{%- endif-%}
{%- endif -%}

{%- set final_columns_to_select = final_columns_to_select + source_columns_to_select -%}
{%- set derived_columns_to_select = datavault4dbt.process_columns_to_select(source_and_derived_column_names, hashed_column_names) | unique | list -%}
Expand Down Expand Up @@ -388,7 +389,7 @@ hashed_columns AS (
{%- endif -%}
{%- endif -%}

{% if not is_incremental() %}
{%- if enable_ghost_records and not is_incremental() %}
{# Creating Ghost Record for unknown case, based on datatype #}
unknown_values AS (

Expand Down Expand Up @@ -548,15 +549,15 @@ columns_to_select AS (

FROM {{ last_cte }}

{% if not is_incremental() %}
{%- if enable_ghost_records and not is_incremental() %}
UNION ALL

SELECT

{{ datavault4dbt.print_list(datavault4dbt.escape_column_names(final_columns_to_select)) }}

FROM ghost_records
{% endif %}
{% endif %}
)

SELECT * FROM columns_to_select
Expand Down
17 changes: 15 additions & 2 deletions macros/staging/stage.sql
Original file line number Diff line number Diff line change
Expand Up @@ -98,17 +98,29 @@
{'multi_active_key': ['phonetype', 'company'], This source data comes with two multi-active keys. The combination of those two, the main_hashkey and ldts is unique
'main_hashkey_column': 'hk_contact_h'} inside the source system.

enable_ghost_records::boolean If set to true, the stage will be created with ghost records. By default, ghost records are enabled. Optional Parameter

#}



{%- macro stage(ldts, rsrc, source_model, include_source_columns=true, hashed_columns=none, derived_columns=none, sequence=none, prejoined_columns=none, missing_columns=none, multi_active_config=none) -%}
{%- macro stage(ldts, rsrc, source_model, include_source_columns=true, hashed_columns=none, derived_columns=none, sequence=none, prejoined_columns=none, missing_columns=none, multi_active_config=none, enable_ghost_records=true) -%}

{# If include_source_columns is passed but its empty then it is set with the default value (true) #}
{%- if include_source_columns is none or include_source_columns == "" -%}
{%- set include_source_columns = true -%}
{%- endif -%}

{# If enable_ghost_records is passed but its empty then it is set with the default value (true) #}
{%- if enable_ghost_records is none or enable_ghost_records == "" -%}
{%- set enable_ghost_records = true -%}
{%- endif -%}

{# If ldts is empty replace it with the current timestamp #}
{%- if datavault4dbt.is_nothing(ldts) -%}
{%- set ldts = datavault4dbt.current_timestamp() -%}
{%- endif -%}

{{- adapter.dispatch('stage', 'datavault4dbt')(include_source_columns=include_source_columns,
ldts=ldts,
rsrc=rsrc,
Expand All @@ -118,6 +130,7 @@
sequence=sequence,
prejoined_columns=prejoined_columns,
missing_columns=missing_columns,
multi_active_config=multi_active_config) -}}
multi_active_config=multi_active_config,
enable_ghost_records=enable_ghost_records) -}}

{%- endmacro -%}
Loading

0 comments on commit b40a8c8

Please sign in to comment.