Skip to content

Commit

Permalink
Merge pull request #218 from ScalefreeCOM/dev
Browse files Browse the repository at this point in the history
Pulling in the latest dev-branch
  • Loading branch information
tkirschke authored Aug 26, 2024
2 parents fa7c330 + b78dcf7 commit 33d1a3c
Show file tree
Hide file tree
Showing 67 changed files with 982 additions and 630 deletions.
17 changes: 9 additions & 8 deletions dbt_project.yml
Original file line number Diff line number Diff line change
Expand Up @@ -48,17 +48,18 @@ vars:
#Ghost Record Configuration
datavault4dbt.beginning_of_all_times: {"bigquery":"0001-01-01T00-00-01","snowflake":"0001-01-01T00:00:01", "exasol": "0001-01-01 00:00:01", "postgres": "0001-01-01 00:00:01", "redshift": "0001-01-01 00:00:01", "synapse": "1901-01-01T00:00:01"}
datavault4dbt.end_of_all_times: {"bigquery":"8888-12-31T23-59-59","snowflake":"8888-12-31T23:59:59", "exasol": "8888-12-31 23:59:59", "postgres": "8888-12-31 23:59:59", "redshift": "8888-12-31 23:59:59", "synapse": "8888-12-31T23:59:59"}
datavault4dbt.timestamp_format: {"bigquery":"%Y-%m-%dT%H-%M-%S","snowflake":"YYYY-MM-DDTHH24:MI:SS", "exasol": "YYYY-mm-dd HH:MI:SS", "postgres": "%Y-%m-%dT%H-%M-%S", "redshift": "YYYY-MM-DD HH24:MI:SS", "synapse": 126}
datavault4dbt.timestamp_format: {"bigquery":"%Y-%m-%dT%H-%M-%S","snowflake":"YYYY-MM-DDTHH24:MI:SS", "exasol": "YYYY-mm-dd HH:MI:SS", "postgres": "YYYY-MM-DD HH24:MI:SS", "redshift": "YYYY-MM-DD HH24:MI:SS", "synapse": 126}

datavault4dbt.beginning_of_all_times_date: {"bigquery":"0001-01-01","snowflake":"0001-01-01", "exasol": "0001-01-01", "postgres": "0001-01-01", "redshift": "0001-01-01", "synapse": "1901-01-01"}
datavault4dbt.end_of_all_times_date: {"bigquery":"8888-12-31","snowflake":"8888-12-31", "exasol": "8888-12-31", "postgres": "8888-12-31", "redshift": "8888-12-31", "synapse": "8888-12-31"}
datavault4dbt.date_format: {"bigquery":"%Y-%m-%d","snowflake":"YYYY-MM-DD", "exasol": "YYYY-mm-dd", "postgres": "%Y-%m-%d", "redshift": "YYYY-MM-DD", "synapse": "yyyy-MM-dd"}
#datavault4dbt.beginning_of_all_times_date: {"bigquery":"0001-01-01","snowflake":"0001-01-01", "exasol": "0001-01-01", "postgres": "0001-01-01", "redshift": "0001-01-01", "synapse": "1901-01-01"}
#datavault4dbt.end_of_all_times_date: {"bigquery":"8888-12-31","snowflake":"8888-12-31", "exasol": "8888-12-31", "postgres": "8888-12-31", "redshift": "8888-12-31", "synapse": "8888-12-31"}
#datavault4dbt.date_format: {"bigquery":"%Y-%m-%d","snowflake":"YYYY-MM-DD", "exasol": "YYYY-mm-dd", "postgres": "YYYY-MM-DD", "redshift": "YYYY-MM-DD", "synapse": "yyyy-MM-dd"}

datavault4dbt.default_unknown_rsrc: 'SYSTEM'
datavault4dbt.default_error_rsrc: 'ERROR'
datavault4dbt.rsrc_default_dtype: 'VARCHAR(255)'
datavault4dbt.stg_default_dtype: 'VARCHAR(255)'
datavault4dbt.derived_columns_default_dtype: 'VARCHAR(255)'
datavault4dbt.rsrc_default_dtype: {"bigquery":"STRING","snowflake":"VARCHAR", "exasol": "VARCHAR (2000000) UTF8", "postgres": "VARCHAR", "redshift": "VARCHAR", "synapse": "VARCHAR"}
datavault4dbt.timestamp_default_dtype: {"bigquery":"TIMESTAMP","snowflake":"TIMESTAMP_TZ", "exasol": "TIMESTAMP(3) WITH LOCAL TIME ZONE", "postgres": "TIMESTAMPTZ", "redshift": "TIMESTAMPTZ", "synapse": "datetimeoffset"}
datavault4dbt.stg_default_dtype: {"bigquery":"STRING","snowflake":"VARCHAR", "exasol": "VARCHAR (2000000) UTF8", "postgres": "VARCHAR", "redshift": "VARCHAR", "synapse": "VARCHAR"}
datavault4dbt.derived_columns_default_dtype: {"bigquery":"STRING","snowflake":"VARCHAR", "exasol": "VARCHAR (2000000) UTF8", "postgres": "VARCHAR", "redshift": "VARCHAR", "synapse": "VARCHAR"}

#Datatype specific default values
datavault4dbt.error_value__STRING: '(error)'
Expand All @@ -81,4 +82,4 @@ models:
+materialized: view
raw_vault:
+schema: <name of your RDV schema>
+materialized: table
+materialized: table
2 changes: 1 addition & 1 deletion macros/internal/helpers/stage_processing_macros.sql
Original file line number Diff line number Diff line change
Expand Up @@ -123,4 +123,4 @@
{%- endif %}
{%- endfor -%}

{%- endmacro -%}
{%- endmacro -%}
24 changes: 23 additions & 1 deletion macros/internal/metadata_processing/escape_column_names.sql
Original file line number Diff line number Diff line change
Expand Up @@ -139,8 +139,30 @@
{%- set escape_char_left = var('escape_char_left', "") -%}
{%- set escape_char_right = var('escape_char_right', "") -%}

{%- set escaped_column_name = escape_char_left ~ column | lower | replace(escape_char_left, '') | replace(escape_char_right, '') | trim ~ escape_char_right | indent(4) -%}

{%- do return(escaped_column_name) -%}

{%- endmacro -%}

{%- macro redshift__escape_column_name(column) -%}

{%- set escape_char_left = var('escape_char_left', '"') -%}
{%- set escape_char_right = var('escape_char_right', '"') -%}

{%- set escaped_column_name = escape_char_left ~ column | lower | replace(escape_char_left, '') | replace(escape_char_right, '') | trim ~ escape_char_right | indent(4) -%}

{%- do return(escaped_column_name) -%}

{%- endmacro -%}

{%- macro exasol__escape_column_name(column) -%}

{%- set escape_char_left = var('escape_char_left', '') -%}
{%- set escape_char_right = var('escape_char_right', '') -%}

{%- set escaped_column_name = escape_char_left ~ column | upper | replace(escape_char_left, '') | replace(escape_char_right, '') | trim ~ escape_char_right | indent(4) -%}

{%- do return(escaped_column_name) -%}

{%- endmacro -%}
{%- endmacro -%}
19 changes: 14 additions & 5 deletions macros/staging/bigquery/stage.sql
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,10 @@
{% set unknown_value_rsrc = var('datavault4dbt.default_unknown_rsrc', 'SYSTEM') %}

{# Setting the rsrc default datatype and length #}
{% set rsrc_default_dtype = var('datavault4dbt.rsrc_default_dtype', 'STRING') %}
{% set rsrc_default_dtype = datavault4dbt.string_default_dtype(type='rsrc') %}

{# Setting the ldts default datatype #}
{% set ldts_default_dtype = datavault4dbt.timestamp_default_dtype() %}

WITH

Expand Down Expand Up @@ -205,7 +208,7 @@ source_data AS (
ldts_rsrc_data AS (

SELECT
{{ ldts }} AS {{ load_datetime_col_name}},
CAST( {{ ldts }} as {{ ldts_default_dtype }} ) AS {{ load_datetime_col_name }},
CAST( {{ rsrc }} as {{ rsrc_default_dtype }} ) AS {{ record_source_col_name }}
{%- if datavault4dbt.is_something(sequence) %},
{{ sequence }} AS edwSequence
Expand All @@ -224,7 +227,7 @@ ldts_rsrc_data AS (

{%- set columns_without_excluded_columns_tmp = [] -%}
{%- for column in columns_without_excluded_columns -%}
{%- if column.name not in derived_column_names -%}
{%- if column.name | lower not in derived_column_names | map('lower') -%}
{%- do columns_without_excluded_columns_tmp.append(column) -%}
{%- endif -%}
{%- endfor -%}
Expand Down Expand Up @@ -369,7 +372,7 @@ ma_hashdiff_prep AS (
SELECT

{% set processed_hash_columns = datavault4dbt.process_hash_column_excludes(tmp_ns.hashdiff_dict) -%}

{{ multi_active_config['main_hashkey_column'] }},
{# Generates only all hashdiffs. #}
{{- datavault4dbt.hash_columns(columns=processed_hash_columns, multi_active_key=multi_active_config['multi_active_key'], main_hashkey_column=multi_active_config['main_hashkey_column']) | indent(4) }},
Expand All @@ -378,6 +381,10 @@ ma_hashdiff_prep AS (
FROM main_hashkey_generation
GROUP BY {{ multi_active_config['main_hashkey_column'] }}, {{ ldts_alias }}


{% do processed_hash_columns.update(datavault4dbt.process_hash_column_excludes(tmp_ns.main_hashkey_dict)) -%} {# Add main hashkey to list of processed columns, otherwise ghost records dont get created #}


),

hashed_columns AS (
Expand All @@ -389,9 +396,11 @@ hashed_columns AS (
{# Generates only all remaining hashkeys, that are no hashdiffs #}

{%- if datavault4dbt.is_something(processed_remaining_hash_columns) %}
{{- datavault4dbt.hash_columns(columns=processed_remaining_hash_columns) | indent(4) }}, {# All remaining hashed_columns get calculated. #}
{{- datavault4dbt.hash_columns(columns=processed_remaining_hash_columns) | indent(4) }},
{%- do processed_hash_columns.update(datavault4dbt.process_hash_column_excludes(tmp_ns.remaining_hashed_columns)) -%} {# All remaining hashed_columns get calculated. #}
{% endif -%}


{{ datavault4dbt.print_list(datavault4dbt.escape_column_names(tmp_ns.hashdiff_names)) }}, {# All MA Hashdiffs are selected. #}
main_hashkey_generation.{{ multi_active_config['main_hashkey_column'] }} {# Main Hashkey selected. #}

Expand Down
2 changes: 1 addition & 1 deletion macros/staging/derived_column_datatypes.sql
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
{%- if not datavault4dbt.is_attribute(column_value) -%}
{# If the value is a static value, it is not an attribute and no datatype needs to be detected. Instead a default datatype is applied. #}

{%- set datatype = var('datavault4dbt.derived_columns_default_dtype', 'STRING') -%}
{%- set datatype = datavault4dbt.string_default_dtype(type='derived_columns') -%}
{%- set value = column_value -%}
{%- set col_size = "" -%}

Expand Down
45 changes: 33 additions & 12 deletions macros/staging/exasol/stage.sql
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,10 @@
{% set unknown_value_rsrc = var('datavault4dbt.default_unknown_rsrc', 'SYSTEM') %}

{# Setting the rsrc default datatype and length #}
{% set rsrc_default_dtype = var('datavault4dbt.rsrc_default_dtype', 'VARCHAR (2000000) UTF8') %}
{% set rsrc_default_dtype = datavault4dbt.string_default_dtype(type='rsrc') %}

{# Setting the ldts default datatype #}
{% set ldts_default_dtype = datavault4dbt.timestamp_default_dtype() %}

WITH

Expand All @@ -194,7 +197,7 @@ source_data AS (
ldts_rsrc_data AS (

SELECT
{{ ldts }} AS {{ load_datetime_col_name}},
CAST( {{ ldts }} as {{ ldts_default_dtype }} ) AS {{ load_datetime_col_name }},
CAST( {{ rsrc }} as {{ rsrc_default_dtype }} ) AS {{ record_source_col_name }}
{%- if datavault4dbt.is_something(sequence) %},
{{ sequence }} AS edwSequence
Expand All @@ -209,6 +212,14 @@ ldts_rsrc_data AS (

{%- set last_cte = "ldts_rsrc_data" -%}
{%- set final_columns_to_select = alias_columns + final_columns_to_select %}

{%- set columns_without_excluded_columns_tmp = [] -%}
{%- for column in columns_without_excluded_columns -%}
{%- if column.name | lower not in derived_column_names | map('lower') -%}
{%- do columns_without_excluded_columns_tmp.append(column) -%}
{%- endif -%}
{%- endfor -%}
{%- set columns_without_excluded_columns = columns_without_excluded_columns_tmp |list -%}
),

{%- if datavault4dbt.is_something(missing_columns) %}
Expand Down Expand Up @@ -302,6 +313,8 @@ prejoined_columns AS (
{# Adding derived columns to the selection #}
derived_columns AS (

{%- set final_columns_to_select = datavault4dbt.process_columns_to_select(final_columns_to_select, derived_column_names) -%}

SELECT
{% if final_columns_to_select | length > 0 -%}
{{ datavault4dbt.print_list(datavault4dbt.escape_column_names(final_columns_to_select)) }},
Expand All @@ -318,15 +331,16 @@ derived_columns AS (
{# Generating Hashed Columns (hashkeys and hashdiffs for Hubs/Links/Satellites) #}
{% if datavault4dbt.is_something(multi_active_config) %}

{%- set tmp_ns = namespace(main_hashkey_dict={}, remaining_hashed_columns={}, hashdiff_names=[]) -%}
{%- set tmp_ns = namespace(main_hashkey_dict={}, remaining_hashed_columns={}, hashdiff_names=[], hashdiff_dict={}) -%}

{%- for column in hashed_columns.keys() -%}
{%- if column | lower == multi_active_config['main_hashkey_column']| lower and not hashed_columns[column].is_hashdiff -%}
{%- if column == multi_active_config['main_hashkey_column'] and not hashed_columns[column].is_hashdiff -%}
{%- do tmp_ns.main_hashkey_dict.update({column: hashed_columns[column]}) -%}
{% elif column != multi_active_config['main_hashkey_column'] and not hashed_columns[column].is_hashdiff -%}
{%- do tmp_ns.remaining_hashed_columns.update({column: hashed_columns[column]}) -%}
{%- elif hashed_columns[column].is_hashdiff -%}
{%- do tmp_ns.hashdiff_names.append(column) -%}
{%- do tmp_ns.hashdiff_dict.update({column: hashed_columns[column]}) -%}
{%- endif -%}
{%- endfor -%}

Expand All @@ -345,31 +359,38 @@ ma_hashdiff_prep AS (

SELECT

{% set processed_hash_columns = datavault4dbt.process_hash_column_excludes(hashed_columns) -%}
{% set processed_hash_columns = datavault4dbt.process_hash_column_excludes(tmp_ns.hashdiff_dict) -%}

{{ multi_active_config['main_hashkey_column'] }},
{# Generates only all hashdiffs. #}
{{- datavault4dbt.hash_columns(columns=processed_hash_columns, multi_active_key=multi_active_config['multi_active_key'], main_hashkey_column=multi_active_config['main_hashkey_column']) | indent(4) }},
{{ ldts_alias }}

FROM {{ last_cte }}
GROUP BY local.{{ multi_active_config['main_hashkey_column'] }}, {{ ldts_alias }}
FROM main_hashkey_generation
GROUP BY {{ multi_active_config['main_hashkey_column'] }}, {{ ldts_alias }}


{% do processed_hash_columns.update(datavault4dbt.process_hash_column_excludes(tmp_ns.main_hashkey_dict)) -%} {# Add main hashkey to list of processed columns, otherwise ghost records dont get created #}


),

hashed_columns AS (

SELECT

{{ datavault4dbt.alias_all(columns=final_columns_to_select, prefix='main_hashkey_generation') }}, {# Everything from last_cte before hashed_columns. #}
{{ datavault4dbt.alias_all(columns=final_columns_to_select, prefix='main_hashkey_generation') }}, {# Everything from last_cte before hashed_columns. #}
{% set processed_remaining_hash_columns = datavault4dbt.process_hash_column_excludes(tmp_ns.remaining_hashed_columns) -%}
{# Generates only all remaining hashkeys, that are no hashdiffs #}

{%- if datavault4dbt.is_something(processed_remaining_hash_columns) %}
{{- datavault4dbt.hash_columns(columns=processed_remaining_hash_columns) | indent(4) }}, {# All remaining hashed_columns get calculated. #}
{{- datavault4dbt.hash_columns(columns=processed_remaining_hash_columns) | indent(4) }},
{%- do processed_hash_columns.update(datavault4dbt.process_hash_column_excludes(tmp_ns.remaining_hashed_columns)) -%} {# All remaining hashed_columns get calculated. #}
{% endif -%}


{{ datavault4dbt.print_list(datavault4dbt.escape_column_names(tmp_ns.hashdiff_names)) }}, {# All MA Hashdiffs are selected. #}
main_hashkey_generation.{{ multi_active_config['main_hashkey_column'] }} {# Main Hashkey selected. #}
main_hashkey_generation.{{ multi_active_config['main_hashkey_column'] }} {# Main Hashkey selected. #}

FROM main_hashkey_generation
LEFT JOIN ma_hashdiff_prep
Expand Down Expand Up @@ -461,7 +482,7 @@ unknown_values AS (
{%- if datavault4dbt.is_something(processed_hash_columns) -%},

{%- for hash_column in processed_hash_columns %}
CAST({{ datavault4dbt.as_constant(column_str=unknown_key) }} as {{ hash_dtype }}) as "{{ hash_column }}"
CAST({{ datavault4dbt.as_constant(column_str=unknown_key) }} as {{ hash_dtype }}) as {{ hash_column }}
{%- if not loop.last %},{% endif %}
{%- endfor -%}

Expand Down Expand Up @@ -528,7 +549,7 @@ error_values AS (
{%- if datavault4dbt.is_something(processed_hash_columns) -%},

{%- for hash_column in processed_hash_columns %}
CAST({{ datavault4dbt.as_constant(column_str=error_key) }} as {{ hash_dtype }}) as "{{ hash_column }}"
CAST({{ datavault4dbt.as_constant(column_str=error_key) }} as {{ hash_dtype }}) as {{ hash_column }}
{%- if not loop.last %},{% endif %}
{%- endfor -%}

Expand Down
22 changes: 12 additions & 10 deletions macros/staging/hash_columns.sql
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@

{%- endmacro -%}


{%- macro redshift__hash_columns(columns, multi_active_key, main_hashkey_column) -%}
{%- if columns is mapping and columns is not none -%}

Expand All @@ -89,33 +90,34 @@
{%- endif -%}


{%- else -%}

{%- else -%}
{% if columns[col] is mapping and columns[col].is_hashdiff -%}
{%- if columns[col].use_rtrim -%}
{%- set rtrim_hashdiff = true -%}
{%- else -%}
{%- set rtrim_hashdiff = false -%}
{%- endif -%}

{{- datavault4dbt.hash(columns=columns[col]['columns'],
alias=col,
is_hashdiff=columns[col]['is_hashdiff'],
rtrim_hashdiff=rtrim_hashdiff) -}}
is_hashdiff=columns[col]['is_hashdiff']) -}}

{%- elif columns[col] is not mapping -%}

{{- datavault4dbt.hash(columns=columns[col],
alias=col,
is_hashdiff=false) -}}

{%- elif columns[col] is mapping and not columns[col].is_hashdiff -%}

{%- if execute -%}
{%- do exceptions.warn("[" ~ this ~ "] Warning: You provided a list of columns under a 'columns' key, but did not provide the 'is_hashdiff' flag. Use list syntax for PKs.") -%}
{% endif %}

{{- datavault4dbt.hash(columns=columns[col]['columns'], alias=col) -}}

{%- endif -%}

{{- ",\n" if not loop.last -}}

{%- endif -%}

{%- endfor -%}

{%- endif %}
{%- endmacro -%}

Loading

0 comments on commit 33d1a3c

Please sign in to comment.