diff --git a/macros/staging/bigquery/stage.sql b/macros/staging/bigquery/stage.sql index 8c94c387..8e9b1d47 100644 --- a/macros/staging/bigquery/stage.sql +++ b/macros/staging/bigquery/stage.sql @@ -96,7 +96,7 @@ {# Getting the column names for all additional columns #} {%- set derived_column_names = datavault4dbt.extract_column_names(derived_columns) -%} {%- set hashed_column_names = datavault4dbt.extract_column_names(hashed_columns) -%} -{%- set prejoined_column_names = datavault4dbt.extract_column_names(prejoined_columns) -%} +{%- set prejoined_column_names = datavault4dbt.extract_prejoin_column_names(prejoined_columns) -%} {%- set missing_column_names = datavault4dbt.extract_column_names(missing_columns) -%} {%- set exclude_column_names = hashed_column_names + prejoined_column_names + missing_column_names + ldts_rsrc_input_column_names %} {%- set source_and_derived_column_names = (all_source_columns + derived_column_names) | unique | list -%} @@ -183,6 +183,8 @@ {# Setting the ldts default datatype #} {% set ldts_default_dtype = datavault4dbt.timestamp_default_dtype() %} +{{ datavault4dbt.prepend_generated_by() }} + WITH {# Selecting everything that we need from the source relation. #} @@ -256,6 +258,7 @@ missing_columns AS ( ), {%- endif -%} + {%- if datavault4dbt.is_something(prejoined_columns) %} {# Prejoining Business Keys of other source objects for Link purposes #} prejoined_columns AS ( @@ -263,19 +266,43 @@ prejoined_columns AS ( SELECT {% if final_columns_to_select | length > 0 -%} {{ datavault4dbt.print_list(datavault4dbt.prefix(columns=datavault4dbt.escape_column_names(final_columns_to_select), prefix_str='lcte').split(',')) }} - {% endif %} - {%- for col, vals in prejoined_columns.items() -%} - ,pj_{{loop.index}}.{{ vals['bk'] }} AS {{ col }} - {% endfor -%} + {%- endif -%} + + {# Iterate over each prejoin, doing logic checks and generating the select-statements #} + {%- for prejoin in prejoined_columns -%} + {%- set prejoin_alias = 'pj_' + loop.index|string -%} + + {# If extract_columns and/or aliases are passed as string convert them to a list so they can be used as iterators later #} + {%- if not datavault4dbt.is_list(prejoin['extract_columns'])-%} + {%- do prejoin.update({'extract_columns': [prejoin['extract_columns']]}) -%} + {%- endif -%} + {%- if not datavault4dbt.is_list(prejoin['aliases']) and datavault4dbt.is_something(prejoin['aliases']) -%} + {%- do prejoin.update({'aliases': [prejoin['aliases']]}) -%} + {%- endif -%} + + {# If passed, make sure there are as many aliases as there are extract_columns, ensuring a 1:1 mapping #} + {%- if datavault4dbt.is_something(prejoin['aliases']) -%} + {%- if not prejoin['aliases']|length == prejoin['extract_columns']|length -%} + {%- do exceptions.raise_compiler_error("Prejoin aliases must have the same length as extract_columns. Got " + ~ prejoin['extract_columns']|length ~ " extract_column(s) and " ~ prejoin['aliases']|length ~ " aliase(s).") -%} + {%- endif -%} + {%- endif -%} + + {# Generate the columns for the SELECT-statement #} + {%- for column in prejoin['extract_columns'] %} + ,{{ prejoin_alias }}.{{ column }} {% if datavault4dbt.is_something(prejoin['aliases']) -%} AS {{ prejoin['aliases'][loop.index0] }} {% endif -%} + {%- endfor -%} + {%- endfor %} FROM {{ last_cte }} lcte - {% for col, vals in prejoined_columns.items() %} + {# Iterate over prejoins and generate the join-statements #} + {%- for prejoin in prejoined_columns -%} - {%- if 'src_name' in vals.keys() or 'src_table' in vals.keys() -%} - {%- set relation = source(vals['src_name']|string, vals['src_table']) -%} - {%- elif 'ref_model' in vals.keys() -%} - {%- set relation = ref(vals['ref_model']) -%} + {%- if 'ref_model' in prejoin.keys() -%} + {% set relation = ref(prejoin['ref_model']) -%} + {%- elif 'src_name' in prejoin.keys() and 'src_table' in prejoin.keys() -%} + {%- set relation = source(prejoin['src_name']|string, prejoin['src_table']) -%} {%- else -%} {%- set error_message -%} Prejoin error: Invalid target entity definition. Allowed are: @@ -296,28 +323,25 @@ prejoined_columns AS ( ref_column_name: join_columns_in_ref_model Got: - {{ col }}: {{ vals }} + {{ prejoin }} {%- endset -%} {%- do exceptions.raise_compiler_error(error_message) -%} {%- endif -%} -{# This sets a default value for the operator that connects multiple joining conditions. Only when it is not set by user. #} - {%- if 'operator' not in vals.keys() -%} + {%- if 'operator' not in prejoin.keys() -%} {%- set operator = 'AND' -%} {%- else -%} - {%- set operator = vals['operator'] -%} + {%- set operator = prejoin['operator'] -%} {%- endif -%} - - {%- set prejoin_alias = 'pj_' + loop.index|string -%} - - left join {{ relation }} as {{ prejoin_alias }} - on {{ datavault4dbt.multikey(columns=vals['this_column_name'], prefix=['lcte', prejoin_alias], condition='=', operator=operator, right_columns=vals['ref_column_name']) }} - - {% endfor %} + {%- set prejoin_alias = 'pj_' + loop.index|string %} + + left join {{ relation }} as {{ prejoin_alias }} + on {{ datavault4dbt.multikey(columns=prejoin['this_column_name'], prefix=['lcte', prejoin_alias], condition='=', operator=operator, right_columns=prejoin['ref_column_name']) }} + {%- endfor -%} {% set last_cte = "prejoined_columns" -%} - {%- set final_columns_to_select = final_columns_to_select + prejoined_column_names %} + {%- set final_columns_to_select = final_columns_to_select + prejoined_column_names -%} ), {%- endif -%} @@ -444,65 +468,61 @@ unknown_values AS ( SELECT - {{ datavault4dbt.string_to_timestamp(timestamp_format, beginning_of_all_times) }} as {{ load_datetime_col_name }}, - '{{ unknown_value_rsrc }}' as {{ record_source_col_name }} + {{ datavault4dbt.string_to_timestamp(timestamp_format, beginning_of_all_times) }} as {{ load_datetime_col_name }} + ,'{{ unknown_value_rsrc }}' as {{ record_source_col_name }} - {%- if columns_without_excluded_columns is defined and columns_without_excluded_columns| length > 0 -%}, + {%- if columns_without_excluded_columns is defined and columns_without_excluded_columns| length > 0 -%} {# Generating Ghost Records for all source columns, except the ldts, rsrc & edwSequence column #} {%- for column in columns_without_excluded_columns %} - {{ datavault4dbt.ghost_record_per_datatype(column_name=column.name, datatype=column.dtype, ghost_record_type='unknown') }} - {%- if not loop.last %},{% endif -%} + ,{{ datavault4dbt.ghost_record_per_datatype(column_name=column.name, datatype=column.dtype, ghost_record_type='unknown') }} {%- endfor -%} {%- endif -%} - {%- if datavault4dbt.is_something(missing_columns) -%}, + {%- if datavault4dbt.is_something(missing_columns) -%} {# Additionally generating ghost record for missing columns #} {%- for col, dtype in missing_columns.items() %} - {{ datavault4dbt.ghost_record_per_datatype(column_name=col, datatype=dtype, ghost_record_type='unknown') }} - {%- if not loop.last %},{% endif -%} + ,{{- datavault4dbt.ghost_record_per_datatype(column_name=col, datatype=dtype, ghost_record_type='unknown') }} {%- endfor -%} {%- endif -%} - {%- if datavault4dbt.is_something(prejoined_columns) -%}, - {# Additionally generating ghost records for the prejoined attributes#} - {% for col, vals in prejoined_columns.items() %} + {%- if datavault4dbt.is_something(prejoined_columns) -%} + {# Additionally generating ghost records for the prejoined attributes #} + {%- for prejoin in prejoined_columns -%} - {%- if 'src_name' in vals.keys() or 'src_table' in vals.keys() -%} - {%- set relation = source(vals['src_name']|string, vals['src_table']) -%} - {%- elif 'ref_model' in vals.keys() -%} - {%- set relation = ref(vals['ref_model']) -%} + {%- if 'ref_model' in prejoin.keys() -%} + {%- set relation = ref(prejoin['ref_model']) -%} + {%- elif 'src_name' in prejoin.keys() and 'src_table' in prejoin.keys() -%} + {%- set relation = source(prejoin['src_name']|string, prejoin['src_table']) -%} {%- endif -%} {%- set pj_relation_columns = adapter.get_columns_in_relation( relation ) -%} - {{ log('pj_relation_columns: ' ~ pj_relation_columns, false ) }} - - {% for column in pj_relation_columns -%} - - {% if column.name|lower == vals['bk']|lower -%} - {{ log('column found? yes, for column :' ~ column.name , false) }} - {{ datavault4dbt.ghost_record_per_datatype(column_name=column.name, datatype=column.dtype, ghost_record_type='unknown', alias=col) }} + {{ log('pj_relation_columns for '~relation~': ' ~ pj_relation_columns, false ) }} + + {%- for column in pj_relation_columns -%} + {%- if column.name|lower in prejoin['extract_columns']|map('lower') -%} + {%- set prejoin_extract_cols_lower = prejoin['extract_columns']|map('lower')|list -%} + {%- set prejoin_col_index = prejoin_extract_cols_lower.index(column.name|lower) -%} + {{ log('column found? yes, for column: ' ~ column.name , false) }} + ,{{ datavault4dbt.ghost_record_per_datatype(column_name=column.name, datatype=column.dtype, ghost_record_type='unknown', alias=prejoin['aliases'][prejoin_col_index]) }} {%- endif -%} {%- endfor -%} - {%- if not loop.last %},{% endif %} {% endfor -%} {%- endif %} - {%- if datavault4dbt.is_something(derived_columns) -%}, - {# Additionally generating Ghost Records for Derived Columns #} - {%- for column_name, properties in derived_columns_with_datatypes_DICT.items() %} - {{ datavault4dbt.ghost_record_per_datatype(column_name=column_name, datatype=properties.datatype, ghost_record_type='unknown') }} - {%- if not loop.last %},{% endif -%} + {%- if datavault4dbt.is_something(derived_columns) -%} + {# Additionally generating Ghost Records for Derived Columns #} + {% for column_name, properties in derived_columns_with_datatypes_DICT.items() %} + ,{{ datavault4dbt.ghost_record_per_datatype(column_name=column_name, datatype=properties.datatype, ghost_record_type='unknown') }} {%- endfor -%} {%- endif -%} - {%- if datavault4dbt.is_something(processed_hash_columns) -%}, + {%- if datavault4dbt.is_something(processed_hash_columns) -%} {%- for hash_column in processed_hash_columns %} - CAST({{ datavault4dbt.as_constant(column_str=unknown_key) }} as {{ hash_dtype }}) as {{ hash_column }} - {%- if not loop.last %},{% endif %} + ,CAST({{ datavault4dbt.as_constant(column_str=unknown_key) }} as {{ hash_dtype }}) as {{ hash_column }} {%- endfor -%} {%- endif -%} @@ -514,62 +534,61 @@ error_values AS ( SELECT - {{ datavault4dbt.string_to_timestamp(timestamp_format , end_of_all_times) }} as {{ load_datetime_col_name }}, - '{{ error_value_rsrc }}' as {{ record_source_col_name }} + {{ datavault4dbt.string_to_timestamp(timestamp_format , end_of_all_times) }} as {{ load_datetime_col_name }} + ,'{{ error_value_rsrc }}' as {{ record_source_col_name }} - {%- if columns_without_excluded_columns is defined and columns_without_excluded_columns| length > 0 -%}, + {%- if columns_without_excluded_columns is defined and columns_without_excluded_columns| length > 0 -%} {# Generating Ghost Records for Source Columns #} {%- for column in columns_without_excluded_columns %} - {{ datavault4dbt.ghost_record_per_datatype(column_name=column.name, datatype=column.dtype, ghost_record_type='error') }} - {%- if not loop.last %},{% endif -%} + ,{{ datavault4dbt.ghost_record_per_datatype(column_name=column.name, datatype=column.dtype, ghost_record_type='error') }} {%- endfor -%} {%- endif -%} - {%- if datavault4dbt.is_something(missing_columns) -%}, + {%- if datavault4dbt.is_something(missing_columns) -%} {# Additionally generating ghost record for Missing columns #} {%- for col, dtype in missing_columns.items() %} - {{ datavault4dbt.ghost_record_per_datatype(column_name=col, datatype=dtype, ghost_record_type='error') }} - {%- if not loop.last %},{% endif -%} + ,{{ datavault4dbt.ghost_record_per_datatype(column_name=col, datatype=dtype, ghost_record_type='error') }} {%- endfor -%} {%- endif -%} - {%- if datavault4dbt.is_something(prejoined_columns) -%}, - {# Additionally generating ghost records for the prejoined attributes #} - {%- for col, vals in prejoined_columns.items() %} + {%- if datavault4dbt.is_something(prejoined_columns) -%} + {# Additionally generating ghost records for the prejoined attributes#} + {% for prejoin in prejoined_columns %} - {%- if 'src_name' in vals.keys() or 'src_table' in vals.keys() -%} - {%- set relation = source(vals['src_name']|string, vals['src_table']) -%} - {%- elif 'ref_model' in vals.keys() -%} - {%- set relation = ref(vals['ref_model']) -%} + {%- if 'ref_model' in prejoin.keys() -%} + {% set relation = ref(prejoin['ref_model']) -%} + {%- elif 'src_name' in prejoin.keys() and 'src_table' in prejoin.keys() -%} + {%- set relation = source(prejoin['src_name']|string, prejoin['src_table']) -%} {%- endif -%} {%- set pj_relation_columns = adapter.get_columns_in_relation( relation ) -%} + {{- log('pj_relation_columns for '~relation~': ' ~ pj_relation_columns, false ) -}} {% for column in pj_relation_columns -%} - {% if column.name|lower == vals['bk']|lower -%} - {{ datavault4dbt.ghost_record_per_datatype(column_name=column.name, datatype=column.dtype, ghost_record_type='error', alias=col) -}} + {%- if column.name|lower in prejoin['extract_columns']|map('lower') -%} + {%- set prejoin_extract_cols_lower = prejoin['extract_columns']|map('lower')|list -%} + {%- set prejoin_col_index = prejoin_extract_cols_lower.index(column.name|lower) -%} + {{ log('column found? yes, for column: ' ~ column.name , false) }} + ,{{ datavault4dbt.ghost_record_per_datatype(column_name=column.name, datatype=column.dtype, ghost_record_type='error', alias=prejoin['aliases'][prejoin_col_index]) }} {%- endif -%} + {%- endfor -%} - {%- if not loop.last -%},{%- endif %} {% endfor -%} + {%- endif %} - {%- endif -%} - - {%- if datavault4dbt.is_something(derived_columns) %}, + {%- if datavault4dbt.is_something(derived_columns) %} {# Additionally generating Ghost Records for Derived Columns #} {%- for column_name, properties in derived_columns_with_datatypes_DICT.items() %} - {{ datavault4dbt.ghost_record_per_datatype(column_name=column_name, datatype=properties.datatype, ghost_record_type='error') }} - {%- if not loop.last %},{% endif %} + ,{{ datavault4dbt.ghost_record_per_datatype(column_name=column_name, datatype=properties.datatype, ghost_record_type='error') }} {%- endfor -%} {%- endif -%} - {%- if datavault4dbt.is_something(processed_hash_columns) -%}, + {%- if datavault4dbt.is_something(processed_hash_columns) -%} {%- for hash_column in processed_hash_columns %} - CAST({{ datavault4dbt.as_constant(column_str=error_key) }} as {{ hash_dtype }}) as {{ hash_column }} - {%- if not loop.last %},{% endif %} + ,CAST({{ datavault4dbt.as_constant(column_str=error_key) }} as {{ hash_dtype }}) as {{ hash_column }} {%- endfor -%} {%- endif -%} diff --git a/macros/staging/databricks/stage.sql b/macros/staging/databricks/stage.sql index fc76044b..e7bd16f3 100644 --- a/macros/staging/databricks/stage.sql +++ b/macros/staging/databricks/stage.sql @@ -96,7 +96,7 @@ {# Getting the column names for all additional columns #} {%- set derived_column_names = datavault4dbt.extract_column_names(derived_columns) -%} {%- set hashed_column_names = datavault4dbt.extract_column_names(hashed_columns) -%} -{%- set prejoined_column_names = datavault4dbt.extract_column_names(prejoined_columns) -%} +{%- set prejoined_column_names = datavault4dbt.extract_prejoin_column_names(prejoined_columns) -%} {%- set missing_column_names = datavault4dbt.extract_column_names(missing_columns) -%} {%- set exclude_column_names = hashed_column_names + prejoined_column_names + missing_column_names + ldts_rsrc_input_column_names %} {%- set source_and_derived_column_names = (all_source_columns + derived_column_names) | unique | list -%} @@ -177,8 +177,13 @@ {% set error_value_rsrc = var('datavault4dbt.default_error_rsrc', 'ERROR') %} {% set unknown_value_rsrc = var('datavault4dbt.default_unknown_rsrc', 'SYSTEM') %} -{# Setting the rsrc default datatype #} -{% set rsrc_default_dtype = datavault4dbt.string_default_dtype(type=rsrc) %} +{# Setting the rsrc default datatype and length #} +{% set rsrc_default_dtype = datavault4dbt.string_default_dtype(type='rsrc') %} + +{# Setting the ldts default datatype #} +{% set ldts_default_dtype = datavault4dbt.timestamp_default_dtype() %} + +{{ datavault4dbt.prepend_generated_by() }} WITH @@ -206,7 +211,7 @@ source_data AS ( ldts_rsrc_data AS ( SELECT - {{ ldts }} AS {{ load_datetime_col_name}}, + CAST( {{ ldts }} as {{ ldts_default_dtype }} ) AS {{ load_datetime_col_name }}, CAST( {{ rsrc }} as {{ rsrc_default_dtype }} ) AS {{ record_source_col_name }} {%- if datavault4dbt.is_something(sequence) %}, {{ sequence }} AS edwSequence @@ -253,6 +258,7 @@ missing_columns AS ( ), {%- endif -%} + {%- if datavault4dbt.is_something(prejoined_columns) %} {# Prejoining Business Keys of other source objects for Link purposes #} prejoined_columns AS ( @@ -260,19 +266,43 @@ prejoined_columns AS ( SELECT {% if final_columns_to_select | length > 0 -%} {{ datavault4dbt.print_list(datavault4dbt.prefix(columns=datavault4dbt.escape_column_names(final_columns_to_select), prefix_str='lcte').split(',')) }} - {% endif %} - {%- for col, vals in prejoined_columns.items() -%} - ,pj_{{loop.index}}.{{ vals['bk'] }} AS {{ col }} - {% endfor -%} + {%- endif -%} + + {# Iterate over each prejoin, doing logic checks and generating the select-statements #} + {%- for prejoin in prejoined_columns -%} + {%- set prejoin_alias = 'pj_' + loop.index|string -%} + + {# If extract_columns and/or aliases are passed as string convert them to a list so they can be used as iterators later #} + {%- if not datavault4dbt.is_list(prejoin['extract_columns'])-%} + {%- do prejoin.update({'extract_columns': [prejoin['extract_columns']]}) -%} + {%- endif -%} + {%- if not datavault4dbt.is_list(prejoin['aliases']) and datavault4dbt.is_something(prejoin['aliases']) -%} + {%- do prejoin.update({'aliases': [prejoin['aliases']]}) -%} + {%- endif -%} + + {# If passed, make sure there are as many aliases as there are extract_columns, ensuring a 1:1 mapping #} + {%- if datavault4dbt.is_something(prejoin['aliases']) -%} + {%- if not prejoin['aliases']|length == prejoin['extract_columns']|length -%} + {%- do exceptions.raise_compiler_error("Prejoin aliases must have the same length as extract_columns. Got " + ~ prejoin['extract_columns']|length ~ " extract_column(s) and " ~ prejoin['aliases']|length ~ " aliase(s).") -%} + {%- endif -%} + {%- endif -%} + + {# Generate the columns for the SELECT-statement #} + {%- for column in prejoin['extract_columns'] %} + ,{{ prejoin_alias }}.{{ column }} {% if datavault4dbt.is_something(prejoin['aliases']) -%} AS {{ prejoin['aliases'][loop.index0] }} {% endif -%} + {%- endfor -%} + {%- endfor %} FROM {{ last_cte }} lcte - {% for col, vals in prejoined_columns.items() %} + {# Iterate over prejoins and generate the join-statements #} + {%- for prejoin in prejoined_columns -%} - {%- if 'src_name' in vals.keys() or 'src_table' in vals.keys() -%} - {%- set relation = source(vals['src_name']|string, vals['src_table']) -%} - {%- elif 'ref_model' in vals.keys() -%} - {%- set relation = ref(vals['ref_model']) -%} + {%- if 'ref_model' in prejoin.keys() -%} + {% set relation = ref(prejoin['ref_model']) -%} + {%- elif 'src_name' in prejoin.keys() and 'src_table' in prejoin.keys() -%} + {%- set relation = source(prejoin['src_name']|string, prejoin['src_table']) -%} {%- else -%} {%- set error_message -%} Prejoin error: Invalid target entity definition. Allowed are: @@ -293,28 +323,25 @@ prejoined_columns AS ( ref_column_name: join_columns_in_ref_model Got: - {{ col }}: {{ vals }} + {{ prejoin }} {%- endset -%} {%- do exceptions.raise_compiler_error(error_message) -%} {%- endif -%} -{# This sets a default value for the operator that connects multiple joining conditions. Only when it is not set by user. #} - {%- if 'operator' not in vals.keys() -%} + {%- if 'operator' not in prejoin.keys() -%} {%- set operator = 'AND' -%} {%- else -%} - {%- set operator = vals['operator'] -%} + {%- set operator = prejoin['operator'] -%} {%- endif -%} - - {%- set prejoin_alias = 'pj_' + loop.index|string -%} - - left join {{ relation }} as {{ prejoin_alias }} - on {{ datavault4dbt.multikey(columns=vals['this_column_name'], prefix=['lcte', prejoin_alias], condition='=', operator=operator, right_columns=vals['ref_column_name']) }} - - {% endfor %} + {%- set prejoin_alias = 'pj_' + loop.index|string %} + + left join {{ relation }} as {{ prejoin_alias }} + on {{ datavault4dbt.multikey(columns=prejoin['this_column_name'], prefix=['lcte', prejoin_alias], condition='=', operator=operator, right_columns=prejoin['ref_column_name']) }} + {%- endfor -%} {% set last_cte = "prejoined_columns" -%} - {%- set final_columns_to_select = final_columns_to_select + prejoined_column_names %} + {%- set final_columns_to_select = final_columns_to_select + prejoined_column_names -%} ), {%- endif -%} @@ -439,65 +466,61 @@ unknown_values AS ( SELECT - {{ datavault4dbt.string_to_timestamp(timestamp_format, beginning_of_all_times) }} as {{ load_datetime_col_name }}, - '{{ unknown_value_rsrc }}' as {{ record_source_col_name }} + {{ datavault4dbt.string_to_timestamp(timestamp_format, beginning_of_all_times) }} as {{ load_datetime_col_name }} + ,'{{ unknown_value_rsrc }}' as {{ record_source_col_name }} - {%- if columns_without_excluded_columns is defined and columns_without_excluded_columns| length > 0 -%}, + {%- if columns_without_excluded_columns is defined and columns_without_excluded_columns| length > 0 -%} {# Generating Ghost Records for all source columns, except the ldts, rsrc & edwSequence column #} {%- for column in columns_without_excluded_columns %} - {{ datavault4dbt.ghost_record_per_datatype(column_name=column.name, datatype=column.dtype, ghost_record_type='unknown') }} - {%- if not loop.last %},{% endif -%} + ,{{ datavault4dbt.ghost_record_per_datatype(column_name=column.name, datatype=column.dtype, ghost_record_type='unknown') }} {%- endfor -%} {%- endif -%} - {%- if datavault4dbt.is_something(missing_columns) -%}, + {%- if datavault4dbt.is_something(missing_columns) -%} {# Additionally generating ghost record for missing columns #} {%- for col, dtype in missing_columns.items() %} - {{ datavault4dbt.ghost_record_per_datatype(column_name=col, datatype=dtype, ghost_record_type='unknown') }} - {%- if not loop.last %},{% endif -%} + ,{{- datavault4dbt.ghost_record_per_datatype(column_name=col, datatype=dtype, ghost_record_type='unknown') }} {%- endfor -%} {%- endif -%} - {%- if datavault4dbt.is_something(prejoined_columns) -%}, - {# Additionally generating ghost records for the prejoined attributes#} - {% for col, vals in prejoined_columns.items() %} + {%- if datavault4dbt.is_something(prejoined_columns) -%} + {# Additionally generating ghost records for the prejoined attributes #} + {%- for prejoin in prejoined_columns -%} - {%- if 'src_name' in vals.keys() or 'src_table' in vals.keys() -%} - {%- set relation = source(vals['src_name']|string, vals['src_table']) -%} - {%- elif 'ref_model' in vals.keys() -%} - {%- set relation = ref(vals['ref_model']) -%} + {%- if 'ref_model' in prejoin.keys() -%} + {%- set relation = ref(prejoin['ref_model']) -%} + {%- elif 'src_name' in prejoin.keys() and 'src_table' in prejoin.keys() -%} + {%- set relation = source(prejoin['src_name']|string, prejoin['src_table']) -%} {%- endif -%} {%- set pj_relation_columns = adapter.get_columns_in_relation( relation ) -%} - {{ log('pj_relation_columns: ' ~ pj_relation_columns, false ) }} - - {% for column in pj_relation_columns -%} - - {% if column.name|lower == vals['bk']|lower -%} - {{ log('column found? yes, for column :' ~ column.name , false) }} - {{ datavault4dbt.ghost_record_per_datatype(column_name=column.name, datatype=column.dtype, ghost_record_type='unknown', alias=col) }} + {{ log('pj_relation_columns for '~relation~': ' ~ pj_relation_columns, false ) }} + + {%- for column in pj_relation_columns -%} + {%- if column.name|lower in prejoin['extract_columns']|map('lower') -%} + {%- set prejoin_extract_cols_lower = prejoin['extract_columns']|map('lower')|list -%} + {%- set prejoin_col_index = prejoin_extract_cols_lower.index(column.name|lower) -%} + {{ log('column found? yes, for column: ' ~ column.name , false) }} + ,{{ datavault4dbt.ghost_record_per_datatype(column_name=column.name, datatype=column.dtype, ghost_record_type='unknown', alias=prejoin['aliases'][prejoin_col_index]) }} {%- endif -%} {%- endfor -%} - {%- if not loop.last %},{% endif %} {% endfor -%} {%- endif %} - {%- if datavault4dbt.is_something(derived_columns) -%}, - {# Additionally generating Ghost Records for Derived Columns #} - {%- for column_name, properties in derived_columns_with_datatypes_DICT.items() %} - {{ datavault4dbt.ghost_record_per_datatype(column_name=column_name, datatype=properties.datatype, ghost_record_type='unknown') }} - {%- if not loop.last %},{% endif -%} + {%- if datavault4dbt.is_something(derived_columns) -%} + {# Additionally generating Ghost Records for Derived Columns #} + {% for column_name, properties in derived_columns_with_datatypes_DICT.items() %} + ,{{ datavault4dbt.ghost_record_per_datatype(column_name=column_name, datatype=properties.datatype, ghost_record_type='unknown') }} {%- endfor -%} {%- endif -%} - {%- if datavault4dbt.is_something(processed_hash_columns) -%}, + {%- if datavault4dbt.is_something(processed_hash_columns) -%} {%- for hash_column in processed_hash_columns %} - CAST({{ datavault4dbt.as_constant(column_str=unknown_key) }} as {{ hash_dtype }}) as {{ hash_column }} - {%- if not loop.last %},{% endif %} + ,CAST({{ datavault4dbt.as_constant(column_str=unknown_key) }} as {{ hash_dtype }}) as {{ hash_column }} {%- endfor -%} {%- endif -%} @@ -509,62 +532,61 @@ error_values AS ( SELECT - {{ datavault4dbt.string_to_timestamp(timestamp_format , end_of_all_times) }} as {{ load_datetime_col_name }}, - '{{ error_value_rsrc }}' as {{ record_source_col_name }} + {{ datavault4dbt.string_to_timestamp(timestamp_format , end_of_all_times) }} as {{ load_datetime_col_name }} + ,'{{ error_value_rsrc }}' as {{ record_source_col_name }} - {%- if columns_without_excluded_columns is defined and columns_without_excluded_columns| length > 0 -%}, + {%- if columns_without_excluded_columns is defined and columns_without_excluded_columns| length > 0 -%} {# Generating Ghost Records for Source Columns #} {%- for column in columns_without_excluded_columns %} - {{ datavault4dbt.ghost_record_per_datatype(column_name=column.name, datatype=column.dtype, ghost_record_type='error') }} - {%- if not loop.last %},{% endif -%} + ,{{ datavault4dbt.ghost_record_per_datatype(column_name=column.name, datatype=column.dtype, ghost_record_type='error') }} {%- endfor -%} {%- endif -%} - {%- if datavault4dbt.is_something(missing_columns) -%}, + {%- if datavault4dbt.is_something(missing_columns) -%} {# Additionally generating ghost record for Missing columns #} {%- for col, dtype in missing_columns.items() %} - {{ datavault4dbt.ghost_record_per_datatype(column_name=col, datatype=dtype, ghost_record_type='error') }} - {%- if not loop.last %},{% endif -%} + ,{{ datavault4dbt.ghost_record_per_datatype(column_name=col, datatype=dtype, ghost_record_type='error') }} {%- endfor -%} {%- endif -%} - {%- if datavault4dbt.is_something(prejoined_columns) -%}, - {# Additionally generating ghost records for the prejoined attributes #} - {%- for col, vals in prejoined_columns.items() %} + {%- if datavault4dbt.is_something(prejoined_columns) -%} + {# Additionally generating ghost records for the prejoined attributes#} + {% for prejoin in prejoined_columns %} - {%- if 'src_name' in vals.keys() or 'src_table' in vals.keys() -%} - {%- set relation = source(vals['src_name']|string, vals['src_table']) -%} - {%- elif 'ref_model' in vals.keys() -%} - {%- set relation = ref(vals['ref_model']) -%} + {%- if 'ref_model' in prejoin.keys() -%} + {% set relation = ref(prejoin['ref_model']) -%} + {%- elif 'src_name' in prejoin.keys() and 'src_table' in prejoin.keys() -%} + {%- set relation = source(prejoin['src_name']|string, prejoin['src_table']) -%} {%- endif -%} {%- set pj_relation_columns = adapter.get_columns_in_relation( relation ) -%} + {{- log('pj_relation_columns for '~relation~': ' ~ pj_relation_columns, false ) -}} {% for column in pj_relation_columns -%} - {% if column.name|lower == vals['bk']|lower -%} - {{ datavault4dbt.ghost_record_per_datatype(column_name=column.name, datatype=column.dtype, ghost_record_type='error', alias=col) -}} + {%- if column.name|lower in prejoin['extract_columns']|map('lower') -%} + {%- set prejoin_extract_cols_lower = prejoin['extract_columns']|map('lower')|list -%} + {%- set prejoin_col_index = prejoin_extract_cols_lower.index(column.name|lower) -%} + {{ log('column found? yes, for column: ' ~ column.name , false) }} + ,{{ datavault4dbt.ghost_record_per_datatype(column_name=column.name, datatype=column.dtype, ghost_record_type='error', alias=prejoin['aliases'][prejoin_col_index]) }} {%- endif -%} + {%- endfor -%} - {%- if not loop.last -%},{%- endif %} {% endfor -%} + {%- endif %} - {%- endif -%} - - {%- if datavault4dbt.is_something(derived_columns) %}, + {%- if datavault4dbt.is_something(derived_columns) %} {# Additionally generating Ghost Records for Derived Columns #} {%- for column_name, properties in derived_columns_with_datatypes_DICT.items() %} - {{ datavault4dbt.ghost_record_per_datatype(column_name=column_name, datatype=properties.datatype, ghost_record_type='error') }} - {%- if not loop.last %},{% endif %} + ,{{ datavault4dbt.ghost_record_per_datatype(column_name=column_name, datatype=properties.datatype, ghost_record_type='error') }} {%- endfor -%} {%- endif -%} - {%- if datavault4dbt.is_something(processed_hash_columns) -%}, + {%- if datavault4dbt.is_something(processed_hash_columns) -%} {%- for hash_column in processed_hash_columns %} - CAST({{ datavault4dbt.as_constant(column_str=error_key) }} as {{ hash_dtype }}) as {{ hash_column }} - {%- if not loop.last %},{% endif %} + ,CAST({{ datavault4dbt.as_constant(column_str=error_key) }} as {{ hash_dtype }}) as {{ hash_column }} {%- endfor -%} {%- endif -%} diff --git a/macros/staging/exasol/stage.sql b/macros/staging/exasol/stage.sql index 50dd35d3..a654057c 100644 --- a/macros/staging/exasol/stage.sql +++ b/macros/staging/exasol/stage.sql @@ -178,6 +178,8 @@ {# Setting the ldts default datatype #} {% set ldts_default_dtype = datavault4dbt.timestamp_default_dtype() %} +{{ datavault4dbt.prepend_generated_by() }} + WITH {# Selecting everything that we need from the source relation. #} @@ -188,6 +190,12 @@ source_data AS ( FROM {{ source_relation }} + {% if is_incremental() %} + WHERE {{ ldts }} > (SELECT max({{ load_datetime_col_name}}) + FROM {{ this }} + WHERE {{ load_datetime_col_name}} != {{ datavault4dbt.string_to_timestamp(timestamp_format , end_of_all_times) }} ) + {%- endif -%} + {% set last_cte = "source_data" -%} ), @@ -244,6 +252,7 @@ missing_columns AS ( ), {%- endif -%} + {%- if datavault4dbt.is_something(prejoined_columns) %} {# Prejoining Business Keys of other source objects for Link purposes #} prejoined_columns AS ( @@ -251,19 +260,43 @@ prejoined_columns AS ( SELECT {% if final_columns_to_select | length > 0 -%} {{ datavault4dbt.print_list(datavault4dbt.prefix(columns=datavault4dbt.escape_column_names(final_columns_to_select), prefix_str='lcte').split(',')) }} - {% endif %} - {%- for col, vals in prejoined_columns.items() -%} - ,pj_{{loop.index}}.{{ vals['bk'] }} AS "{{ col | upper }}" - {% endfor -%} + {%- endif -%} + + {# Iterate over each prejoin, doing logic checks and generating the select-statements #} + {%- for prejoin in prejoined_columns -%} + {%- set prejoin_alias = 'pj_' + loop.index|string -%} + + {# If extract_columns and/or aliases are passed as string convert them to a list so they can be used as iterators later #} + {%- if not datavault4dbt.is_list(prejoin['extract_columns'])-%} + {%- do prejoin.update({'extract_columns': [prejoin['extract_columns']]}) -%} + {%- endif -%} + {%- if not datavault4dbt.is_list(prejoin['aliases']) and datavault4dbt.is_something(prejoin['aliases']) -%} + {%- do prejoin.update({'aliases': [prejoin['aliases']]}) -%} + {%- endif -%} + + {# If passed, make sure there are as many aliases as there are extract_columns, ensuring a 1:1 mapping #} + {%- if datavault4dbt.is_something(prejoin['aliases']) -%} + {%- if not prejoin['aliases']|length == prejoin['extract_columns']|length -%} + {%- do exceptions.raise_compiler_error("Prejoin aliases must have the same length as extract_columns. Got " + ~ prejoin['extract_columns']|length ~ " extract_column(s) and " ~ prejoin['aliases']|length ~ " aliase(s).") -%} + {%- endif -%} + {%- endif -%} + + {# Generate the columns for the SELECT-statement #} + {%- for column in prejoin['extract_columns'] %} + ,{{ prejoin_alias }}.{{ column }} {% if datavault4dbt.is_something(prejoin['aliases']) -%} AS {{ prejoin['aliases'][loop.index0] }} {% endif -%} + {%- endfor -%} + {%- endfor %} FROM {{ last_cte }} lcte - {% for col, vals in prejoined_columns.items() %} + {# Iterate over prejoins and generate the join-statements #} + {%- for prejoin in prejoined_columns -%} - {%- if 'src_name' in vals.keys() or 'src_table' in vals.keys() -%} - {%- set relation = source(vals['src_name']|string, vals['src_table']) -%} - {%- elif 'ref_model' in vals.keys() -%} - {%- set relation = ref(vals['ref_model']) -%} + {%- if 'ref_model' in prejoin.keys() -%} + {% set relation = ref(prejoin['ref_model']) -%} + {%- elif 'src_name' in prejoin.keys() and 'src_table' in prejoin.keys() -%} + {%- set relation = source(prejoin['src_name']|string, prejoin['src_table']) -%} {%- else -%} {%- set error_message -%} Prejoin error: Invalid target entity definition. Allowed are: @@ -284,25 +317,22 @@ prejoined_columns AS ( ref_column_name: join_columns_in_ref_model Got: - {{ col }}: {{ vals }} + {{ prejoin }} {%- endset -%} {%- do exceptions.raise_compiler_error(error_message) -%} {%- endif -%} -{# This sets a default value for the operator that connects multiple joining conditions. Only when it is not set by user. #} - {%- if 'operator' not in vals.keys() -%} + {%- if 'operator' not in prejoin.keys() -%} {%- set operator = 'AND' -%} {%- else -%} - {%- set operator = vals['operator'] -%} + {%- set operator = prejoin['operator'] -%} {%- endif -%} - - {%- set prejoin_alias = 'pj_' + loop.index|string -%} - - left join {{ relation }} as {{ prejoin_alias }} - on {{ datavault4dbt.multikey(columns=vals['this_column_name'], prefix=['lcte', prejoin_alias], condition='=', operator=operator, right_columns=vals['ref_column_name']) }} - - {% endfor %} + {%- set prejoin_alias = 'pj_' + loop.index|string %} + + left join {{ relation }} as {{ prejoin_alias }} + on {{ datavault4dbt.multikey(columns=prejoin['this_column_name'], prefix=['lcte', prejoin_alias], condition='=', operator=operator, right_columns=prejoin['ref_column_name']) }} + {%- endfor -%} {% set last_cte = "prejoined_columns" -%} {%- set final_columns_to_select = final_columns_to_select + prejoined_column_names %} @@ -429,63 +459,64 @@ hashed_columns AS ( {%- if enable_ghost_records and not is_incremental() %} {# Creating Ghost Record for unknown case, based on datatype #} unknown_values AS ( + SELECT - {{ datavault4dbt.string_to_timestamp(timestamp_format, beginning_of_all_times) }} as {{ load_datetime_col_name }}, - '{{ unknown_value_rsrc }}' as {{ record_source_col_name }} + {{ datavault4dbt.string_to_timestamp(timestamp_format, beginning_of_all_times) }} as {{ load_datetime_col_name }} + ,'{{ unknown_value_rsrc }}' as {{ record_source_col_name }} - {%- if columns_without_excluded_columns is defined and columns_without_excluded_columns| length > 0 -%}, + {%- if columns_without_excluded_columns is defined and columns_without_excluded_columns| length > 0 -%} {# Generating Ghost Records for all source columns, except the ldts, rsrc & edwSequence column #} {%- for column in columns_without_excluded_columns %} - {{ datavault4dbt.ghost_record_per_datatype(column_name=column.name, datatype=column.dtype, col_size=column.char_size, ghost_record_type='unknown') }} - {%- if not loop.last %},{% endif -%} + ,{{ datavault4dbt.ghost_record_per_datatype(column_name=column.name, datatype=column.dtype, ghost_record_type='unknown') }} {%- endfor -%} {%- endif -%} - {%- if datavault4dbt.is_something(missing_columns) -%}, + {%- if datavault4dbt.is_something(missing_columns) -%} {# Additionally generating ghost record for missing columns #} {%- for col, dtype in missing_columns.items() %} - {{ datavault4dbt.ghost_record_per_datatype(column_name=col|upper, datatype=dtype, ghost_record_type='unknown') }} - {%- if not loop.last %},{% endif -%} + ,{{- datavault4dbt.ghost_record_per_datatype(column_name=col, datatype=dtype, ghost_record_type='unknown') }} {%- endfor -%} {%- endif -%} - {%- if datavault4dbt.is_something(prejoined_columns) -%}, - {# Additionally generating ghost records for the prejoined attributes#} - {% for col, vals in prejoined_columns.items() %} + {%- if datavault4dbt.is_something(prejoined_columns) -%} + {# Additionally generating ghost records for the prejoined attributes #} + {%- for prejoin in prejoined_columns -%} - {%- if 'src_name' in vals.keys() or 'src_table' in vals.keys() -%} - {%- set relation = source(vals['src_name']|string, vals['src_table']) -%} - {%- elif 'ref_model' in vals.keys() -%} - {%- set relation = ref(vals['ref_model']) -%} + {%- if 'ref_model' in prejoin.keys() -%} + {%- set relation = ref(prejoin['ref_model']) -%} + {%- elif 'src_name' in prejoin.keys() and 'src_table' in prejoin.keys() -%} + {%- set relation = source(prejoin['src_name']|string, prejoin['src_table']) -%} {%- endif -%} {%- set pj_relation_columns = adapter.get_columns_in_relation( relation ) -%} - {% for column in pj_relation_columns -%} - {% if column.name|lower == vals['bk']|lower -%} - {{ datavault4dbt.ghost_record_per_datatype(column_name=col|upper, datatype=column.dtype, col_size=column.char_size, ghost_record_type='unknown') }} + {{ log('pj_relation_columns for '~relation~': ' ~ pj_relation_columns, false ) }} + + {%- for column in pj_relation_columns -%} + {%- if column.name|lower in prejoin['extract_columns']|map('lower') -%} + {%- set prejoin_extract_cols_lower = prejoin['extract_columns']|map('lower')|list -%} + {%- set prejoin_col_index = prejoin_extract_cols_lower.index(column.name|lower) -%} + {{ log('column found? yes, for column: ' ~ column.name , false) }} + ,{{ datavault4dbt.ghost_record_per_datatype(column_name=column.name, datatype=column.dtype, ghost_record_type='unknown', alias=prejoin['aliases'][prejoin_col_index]) }} {%- endif -%} {%- endfor -%} - {%- if not loop.last %},{% endif %} {% endfor -%} {%- endif %} - {%- if datavault4dbt.is_something(derived_columns) -%}, - {# Additionally generating Ghost Records for Derived Columns #} - {%- for column_name, properties in derived_columns_with_datatypes_DICT.items() %} - {{ datavault4dbt.ghost_record_per_datatype(column_name=column_name, datatype=properties.datatype, col_size=properties.col_size, ghost_record_type='unknown') }} - {%- if not loop.last %},{% endif -%} + {%- if datavault4dbt.is_something(derived_columns) -%} + {# Additionally generating Ghost Records for Derived Columns #} + {% for column_name, properties in derived_columns_with_datatypes_DICT.items() %} + ,{{ datavault4dbt.ghost_record_per_datatype(column_name=column_name, datatype=properties.datatype, ghost_record_type='unknown') }} {%- endfor -%} {%- endif -%} - {%- if datavault4dbt.is_something(processed_hash_columns) -%}, + {%- if datavault4dbt.is_something(processed_hash_columns) -%} {%- for hash_column in processed_hash_columns %} - CAST({{ datavault4dbt.as_constant(column_str=unknown_key) }} as {{ hash_dtype }}) as {{ hash_column }} - {%- if not loop.last %},{% endif %} + ,CAST({{ datavault4dbt.as_constant(column_str=unknown_key) }} as {{ hash_dtype }}) as {{ hash_column }} {%- endfor -%} {%- endif -%} @@ -497,62 +528,61 @@ error_values AS ( SELECT - {{ datavault4dbt.string_to_timestamp(timestamp_format , end_of_all_times) }} as {{ load_datetime_col_name }}, - '{{ error_value_rsrc }}' as {{ record_source_col_name }} + {{ datavault4dbt.string_to_timestamp(timestamp_format , end_of_all_times) }} as {{ load_datetime_col_name }} + ,'{{ error_value_rsrc }}' as {{ record_source_col_name }} - {%- if columns_without_excluded_columns is defined and columns_without_excluded_columns| length > 0 -%}, + {%- if columns_without_excluded_columns is defined and columns_without_excluded_columns| length > 0 -%} {# Generating Ghost Records for Source Columns #} {%- for column in columns_without_excluded_columns %} - {{ datavault4dbt.ghost_record_per_datatype(column_name=column.name, datatype=column.dtype, col_size=column.char_size, ghost_record_type='error') }} - {%- if not loop.last %},{% endif -%} + ,{{ datavault4dbt.ghost_record_per_datatype(column_name=column.name, datatype=column.dtype, ghost_record_type='error') }} {%- endfor -%} {%- endif -%} - {%- if datavault4dbt.is_something(missing_columns) -%}, + {%- if datavault4dbt.is_something(missing_columns) -%} {# Additionally generating ghost record for Missing columns #} {%- for col, dtype in missing_columns.items() %} - {{ datavault4dbt.ghost_record_per_datatype(column_name=col|upper, datatype=dtype, ghost_record_type='error') }} - {%- if not loop.last %},{% endif -%} + ,{{ datavault4dbt.ghost_record_per_datatype(column_name=col, datatype=dtype, ghost_record_type='error') }} {%- endfor -%} {%- endif -%} - {%- if datavault4dbt.is_something(prejoined_columns) -%}, - {# Additionally generating ghost records for the prejoined attributes #} - {%- for col, vals in prejoined_columns.items() %} + {%- if datavault4dbt.is_something(prejoined_columns) -%} + {# Additionally generating ghost records for the prejoined attributes#} + {% for prejoin in prejoined_columns %} - {%- if 'src_name' in vals.keys() or 'src_table' in vals.keys() -%} - {%- set relation = source(vals['src_name']|string, vals['src_table']) -%} - {%- elif 'ref_model' in vals.keys() -%} - {%- set relation = ref(vals['ref_model']) -%} + {%- if 'ref_model' in prejoin.keys() -%} + {% set relation = ref(prejoin['ref_model']) -%} + {%- elif 'src_name' in prejoin.keys() and 'src_table' in prejoin.keys() -%} + {%- set relation = source(prejoin['src_name']|string, prejoin['src_table']) -%} {%- endif -%} {%- set pj_relation_columns = adapter.get_columns_in_relation( relation ) -%} + {{- log('pj_relation_columns for '~relation~': ' ~ pj_relation_columns, false ) -}} {% for column in pj_relation_columns -%} - {% if column.name|lower == vals['bk']|lower -%} - {{ datavault4dbt.ghost_record_per_datatype(column_name=col|upper, datatype=column.dtype, col_size=column.char_size, ghost_record_type='error') -}} + {%- if column.name|lower in prejoin['extract_columns']|map('lower') -%} + {%- set prejoin_extract_cols_lower = prejoin['extract_columns']|map('lower')|list -%} + {%- set prejoin_col_index = prejoin_extract_cols_lower.index(column.name|lower) -%} + {{ log('column found? yes, for column: ' ~ column.name , false) }} + ,{{ datavault4dbt.ghost_record_per_datatype(column_name=column.name, datatype=column.dtype, ghost_record_type='error', alias=prejoin['aliases'][prejoin_col_index]) }} {%- endif -%} + {%- endfor -%} - {%- if not loop.last -%},{%- endif %} {% endfor -%} + {%- endif %} - {%- endif -%} - - {%- if datavault4dbt.is_something(derived_columns) %}, + {%- if datavault4dbt.is_something(derived_columns) %} {# Additionally generating Ghost Records for Derived Columns #} {%- for column_name, properties in derived_columns_with_datatypes_DICT.items() %} - {{ datavault4dbt.ghost_record_per_datatype(column_name=column_name, datatype=properties.datatype, col_size=properties.col_size, ghost_record_type='error') }} - {%- if not loop.last %},{% endif %} + ,{{ datavault4dbt.ghost_record_per_datatype(column_name=column_name, datatype=properties.datatype, ghost_record_type='error') }} {%- endfor -%} {%- endif -%} - {%- if datavault4dbt.is_something(processed_hash_columns) -%}, + {%- if datavault4dbt.is_something(processed_hash_columns) -%} {%- for hash_column in processed_hash_columns %} - CAST({{ datavault4dbt.as_constant(column_str=error_key) }} as {{ hash_dtype }}) as {{ hash_column }} - {%- if not loop.last %},{% endif %} + ,CAST({{ datavault4dbt.as_constant(column_str=error_key) }} as {{ hash_dtype }}) as {{ hash_column }} {%- endfor -%} {%- endif -%} @@ -582,12 +612,13 @@ columns_to_select AS ( {%- if enable_ghost_records and not is_incremental() %} UNION ALL + SELECT {{ datavault4dbt.print_list(datavault4dbt.escape_column_names(final_columns_to_select)) }} FROM ghost_records -{%- endif -%} +{% endif %} ) SELECT * FROM columns_to_select diff --git a/macros/staging/fabric/stage.sql b/macros/staging/fabric/stage.sql index 605f6861..ef94f2de 100644 --- a/macros/staging/fabric/stage.sql +++ b/macros/staging/fabric/stage.sql @@ -89,12 +89,11 @@ {# Getting the column names for all additional columns #} {%- set derived_column_names = datavault4dbt.extract_column_names(derived_columns) -%} {%- set hashed_column_names = datavault4dbt.extract_column_names(hashed_columns) -%} -{%- set prejoined_column_names = datavault4dbt.extract_column_names(prejoined_columns) -%} +{%- set prejoined_column_names = datavault4dbt.extract_prejoin_column_names(prejoined_columns) -%} {%- set missing_column_names = datavault4dbt.extract_column_names(missing_columns) -%} {%- set exclude_column_names = derived_column_names + hashed_column_names + prejoined_column_names + missing_column_names + ldts_rsrc_input_column_names %} {%- set source_and_derived_column_names = (all_source_columns + derived_column_names) | unique | list -%} {%- set all_columns = adapter.get_columns_in_relation( source_relation ) -%} - {%- set columns_without_excluded_columns = [] -%} {%- set final_columns_to_select = [] -%} @@ -134,8 +133,11 @@ {%- set only_include_from_source = (derived_input_columns + hashed_input_columns + prejoined_input_columns + ma_keys) | unique | list -%} {%- else -%} + {%- set only_include_from_source = (derived_input_columns + hashed_input_columns + prejoined_input_columns) | unique | list -%} + {%- endif -%} + {%- set source_columns_to_select = only_include_from_source -%} {%- endif-%} @@ -253,28 +255,54 @@ missing_columns AS ( ), {%- endif -%} + {%- if datavault4dbt.is_something(prejoined_columns) %} +{# Prejoining Business Keys of other source objects for Link purposes #} {%- set final_columns_to_select = (final_columns_to_select + derived_input_columns) | unique | list -%} -{# Prejoining Business Keys of other source objects for Link purposes #} + prejoined_columns AS ( SELECT {% if final_columns_to_select | length > 0 -%} {{ datavault4dbt.print_list(datavault4dbt.prefix(columns=datavault4dbt.escape_column_names(final_columns_to_select), prefix_str='lcte').split(',')) }} - {% endif %} - {%- for col, vals in prejoined_columns.items() -%} - ,pj_{{loop.index}}.{{datavault4dbt.escape_column_names(vals['bk'])}} AS {{datavault4dbt.escape_column_names(col)}} - {% endfor -%} + {%- endif -%} + + {# Iterate over each prejoin, doing logic checks and generating the select-statements #} + {%- for prejoin in prejoined_columns -%} + {%- set prejoin_alias = 'pj_' + loop.index|string -%} + + {# If extract_columns and/or aliases are passed as string convert them to a list so they can be used as iterators later #} + {%- if not datavault4dbt.is_list(prejoin['extract_columns'])-%} + {%- do prejoin.update({'extract_columns': [prejoin['extract_columns']]}) -%} + {%- endif -%} + {%- if not datavault4dbt.is_list(prejoin['aliases']) and datavault4dbt.is_something(prejoin['aliases']) -%} + {%- do prejoin.update({'aliases': [prejoin['aliases']]}) -%} + {%- endif -%} + + {# If passed, make sure there are as many aliases as there are extract_columns, ensuring a 1:1 mapping #} + {%- if datavault4dbt.is_something(prejoin['aliases']) -%} + {%- if not prejoin['aliases']|length == prejoin['extract_columns']|length -%} + {%- do exceptions.raise_compiler_error("Prejoin aliases must have the same length as extract_columns. Got " + ~ prejoin['extract_columns']|length ~ " extract_column(s) and " ~ prejoin['aliases']|length ~ " aliase(s).") -%} + {%- endif -%} + {%- endif -%} + + {# Generate the columns for the SELECT-statement #} + {%- for column in prejoin['extract_columns'] %} + ,{{ prejoin_alias }}.{{ datavault4dbt.escape_column_names(column) }} {% if datavault4dbt.is_something(prejoin['aliases']) -%} AS {{ datavault4dbt.escape_column_names(prejoin['aliases'][loop.index0]) }} {% endif -%} + {%- endfor -%} + {%- endfor %} FROM {{ last_cte }} lcte - {% for col, vals in prejoined_columns.items() %} + {# Iterate over prejoins and generate the join-statements #} + {%- for prejoin in prejoined_columns -%} - {%- if 'src_name' in vals.keys() or 'src_table' in vals.keys() -%} - {%- set relation = source(vals['src_name']|string, vals['src_table']) -%} - {%- elif 'ref_model' in vals.keys() -%} - {%- set relation = ref(vals['ref_model']) -%} + {%- if 'ref_model' in prejoin.keys() -%} + {% set relation = ref(prejoin['ref_model']) -%} + {%- elif 'src_name' in prejoin.keys() and 'src_table' in prejoin.keys() -%} + {%- set relation = source(prejoin['src_name']|string, prejoin['src_table']) -%} {%- else -%} {%- set error_message -%} Prejoin error: Invalid target entity definition. Allowed are: @@ -295,28 +323,25 @@ prejoined_columns AS ( ref_column_name: join_columns_in_ref_model Got: - {{ col }}: {{ vals }} + {{ prejoin }} {%- endset -%} {%- do exceptions.raise_compiler_error(error_message) -%} {%- endif -%} -{# This sets a default value for the operator that connects multiple joining conditions. Only when it is not set by user. #} - {%- if 'operator' not in vals.keys() -%} + {%- if 'operator' not in prejoin.keys() -%} {%- set operator = 'AND' -%} {%- else -%} - {%- set operator = vals['operator'] -%} + {%- set operator = prejoin['operator'] -%} {%- endif -%} - - {%- set prejoin_alias = 'pj_' + loop.index|string -%} - - left join {{ relation }} as {{ prejoin_alias }} - on {{ datavault4dbt.multikey(columns=datavault4dbt.escape_column_names(vals['this_column_name']), prefix=['lcte', prejoin_alias], condition='=', operator=operator, right_columns=datavault4dbt.escape_column_names(vals['ref_column_name'])) }} - - {% endfor %} + {%- set prejoin_alias = 'pj_' + loop.index|string %} + + left join {{ relation }} as {{ prejoin_alias }} + on {{ datavault4dbt.multikey(columns=datavault4dbt.escape_column_names(prejoin['this_column_name']), prefix=['lcte', prejoin_alias], condition='=', operator=operator, right_columns=datavault4dbt.escape_column_names(prejoin['ref_column_name'])) }} + {%- endfor -%} {% set last_cte = "prejoined_columns" -%} - {%- set final_columns_to_select = final_columns_to_select + prejoined_column_names %} + {%- set final_columns_to_select = final_columns_to_select + prejoined_column_names -%} ), {%- endif -%} @@ -443,65 +468,61 @@ unknown_values AS ( SELECT - {{ datavault4dbt.string_to_timestamp(timestamp_format, beginning_of_all_times) }} as {{ load_datetime_col_name }}, - '{{ unknown_value_rsrc }}' as {{ record_source_col_name }} + {{ datavault4dbt.string_to_timestamp(timestamp_format, beginning_of_all_times) }} as {{ load_datetime_col_name }} + ,'{{ unknown_value_rsrc }}' as {{ record_source_col_name }} - {%- if columns_without_excluded_columns is defined and columns_without_excluded_columns| length > 0 -%}, + {%- if columns_without_excluded_columns is defined and columns_without_excluded_columns| length > 0 -%} {# Generating Ghost Records for all source columns, except the ldts, rsrc & edwSequence column #} {%- for column in columns_without_excluded_columns %} - {{ datavault4dbt.ghost_record_per_datatype(column_name=datavault4dbt.escape_column_names(column.name), datatype=column.dtype, ghost_record_type='unknown', col_size=column.char_size) }} - {%- if not loop.last %},{% endif -%} + ,{{ datavault4dbt.ghost_record_per_datatype(column_name=datavault4dbt.escape_column_names(column.name), datatype=column.dtype, ghost_record_type='unknown', col_size=column.char_size) }} {%- endfor -%} {%- endif -%} - {%- if datavault4dbt.is_something(missing_columns) -%}, + {%- if datavault4dbt.is_something(missing_columns) -%} {# Additionally generating ghost record for missing columns #} {%- for col, dtype in missing_columns.items() %} - {{ datavault4dbt.ghost_record_per_datatype(column_name=datavault4dbt.escape_column_names(col), datatype=dtype, ghost_record_type='unknown') }} - {%- if not loop.last %},{% endif -%} + ,{{ datavault4dbt.ghost_record_per_datatype(column_name=datavault4dbt.escape_column_names(col), datatype=dtype, ghost_record_type='unknown') }} {%- endfor -%} {%- endif -%} - {%- if datavault4dbt.is_something(prejoined_columns) -%}, - {# Additionally generating ghost records for the prejoined attributes#} - {% for col, vals in prejoined_columns.items() %} + {%- if datavault4dbt.is_something(prejoined_columns) -%} + {# Additionally generating ghost records for the prejoined attributes #} + {%- for prejoin in prejoined_columns -%} - {%- if 'src_name' in vals.keys() or 'src_table' in vals.keys() -%} - {%- set relation = source(vals['src_name']|string, vals['src_table']) -%} - {%- elif 'ref_model' in vals.keys() -%} - {%- set relation = ref(vals['ref_model']) -%} + {%- if 'ref_model' in prejoin.keys() -%} + {%- set relation = ref(prejoin['ref_model']) -%} + {%- elif 'src_name' in prejoin.keys() and 'src_table' in prejoin.keys() -%} + {%- set relation = source(prejoin['src_name']|string, prejoin['src_table']) -%} {%- endif -%} {%- set pj_relation_columns = adapter.get_columns_in_relation( relation ) -%} - {{ log('pj_relation_columns: ' ~ pj_relation_columns, false ) }} + {{ log('pj_relation_columns for '~relation~': ' ~ pj_relation_columns, false ) }} {%- for column in pj_relation_columns -%} - - {%- if column.name|lower == vals['bk']|lower -%} - {{- log('column found? yes, for column :' ~ column.name , false) -}} - {{ datavault4dbt.ghost_record_per_datatype(column_name=datavault4dbt.escape_column_names(column.name), datatype=column.dtype, ghost_record_type='unknown', alias=datavault4dbt.escape_column_names(col)) }} + {%- if column.name|lower in prejoin['extract_columns']|map('lower') -%} + {%- set prejoin_extract_cols_lower = prejoin['extract_columns']|map('lower')|list -%} + {%- set prejoin_col_index = prejoin_extract_cols_lower.index(column.name|lower) -%} + {{ log('column found? yes, for column: ' ~ column.name , false) }} + ,{{ datavault4dbt.ghost_record_per_datatype(column_name=datavault4dbt.escape_column_names(column.name), datatype=column.dtype, ghost_record_type='unknown', alias=datavault4dbt.escape_column_names(prejoin['aliases'][prejoin_col_index])) }} {%- endif -%} {%- endfor -%} - {%- if not loop.last %},{%- endif %} {% endfor -%} {%- endif %} - {%- if datavault4dbt.is_something(derived_columns) -%}, - {# Additionally generating Ghost Records for Derived Columns #} + {%- if datavault4dbt.is_something(derived_columns) -%} + {# Additionally generating Ghost Records for Derived Columns #} {%- for column_name, properties in derived_columns_with_datatypes_DICT.items() %} - {{ datavault4dbt.ghost_record_per_datatype(column_name=datavault4dbt.escape_column_names(column_name), datatype=properties.datatype, col_size=properties.col_size, ghost_record_type='unknown') }} - {%- if not loop.last %},{% endif -%} + ,{{ datavault4dbt.ghost_record_per_datatype(column_name=datavault4dbt.escape_column_names(column_name), datatype=properties.datatype, col_size=properties.col_size, ghost_record_type='unknown') }} {%- endfor -%} {%- endif -%} - {%- if datavault4dbt.is_something(processed_hash_columns) -%}, + {%- if datavault4dbt.is_something(processed_hash_columns) -%} {%- for hash_column in processed_hash_columns %} - CAST({{ datavault4dbt.as_constant(column_str=unknown_key) }} as {{ hash_dtype }}) as {{ hash_column }} - {%- if not loop.last %},{% endif %} + ,CAST({{ datavault4dbt.as_constant(column_str=unknown_key) }} as {{ hash_dtype }}) as {{ hash_column }} {%- endfor -%} {%- endif -%} @@ -513,62 +534,61 @@ error_values AS ( SELECT - {{ datavault4dbt.string_to_timestamp(timestamp_format , end_of_all_times) }} as {{ load_datetime_col_name }}, - '{{ error_value_rsrc }}' as {{ record_source_col_name }} + {{ datavault4dbt.string_to_timestamp(timestamp_format , end_of_all_times) }} as {{ load_datetime_col_name }} + ,'{{ error_value_rsrc }}' as {{ record_source_col_name }} - {%- if columns_without_excluded_columns is defined and columns_without_excluded_columns| length > 0 -%}, + {%- if columns_without_excluded_columns is defined and columns_without_excluded_columns| length > 0 -%} {# Generating Ghost Records for Source Columns #} {%- for column in columns_without_excluded_columns %} - {{ datavault4dbt.ghost_record_per_datatype(column_name=datavault4dbt.escape_column_names(column.name), datatype=column.dtype, ghost_record_type='error', col_size=column.char_size) }} - {%- if not loop.last %},{% endif -%} + ,{{ datavault4dbt.ghost_record_per_datatype(column_name=datavault4dbt.escape_column_names(column.name), datatype=column.dtype, ghost_record_type='error', col_size=column.char_size) }} {%- endfor -%} {%- endif -%} - {%- if datavault4dbt.is_something(missing_columns) -%}, + {%- if datavault4dbt.is_something(missing_columns) -%} {# Additionally generating ghost record for Missing columns #} {%- for col, dtype in missing_columns.items() %} - {{ datavault4dbt.ghost_record_per_datatype(column_name=datavault4dbt.escape_column_names(col), datatype=dtype, ghost_record_type='error') }} - {%- if not loop.last %},{% endif -%} + ,{{ datavault4dbt.ghost_record_per_datatype(column_name=datavault4dbt.escape_column_names(col), datatype=dtype, ghost_record_type='error') }} {%- endfor -%} {%- endif -%} - {%- if datavault4dbt.is_something(prejoined_columns) -%}, - {# Additionally generating ghost records for the prejoined attributes #} - {%- for col, vals in prejoined_columns.items() %} + {%- if datavault4dbt.is_something(prejoined_columns) -%} + {# Additionally generating ghost records for the prejoined attributes#} + {% for prejoin in prejoined_columns %} - {%- if 'src_name' in vals.keys() or 'src_table' in vals.keys() -%} - {%- set relation = source(vals['src_name']|string, vals['src_table']) -%} - {%- elif 'ref_model' in vals.keys() -%} - {%- set relation = ref(vals['ref_model']) -%} + {%- if 'ref_model' in prejoin.keys() -%} + {% set relation = ref(prejoin['ref_model']) -%} + {%- elif 'src_name' in prejoin.keys() and 'src_table' in prejoin.keys() -%} + {%- set relation = source(prejoin['src_name']|string, prejoin['src_table']) -%} {%- endif -%} {%- set pj_relation_columns = adapter.get_columns_in_relation( relation ) -%} + {{- log('pj_relation_columns for '~relation~': ' ~ pj_relation_columns, false ) -}} {% for column in pj_relation_columns -%} - {% if column.name|lower == vals['bk']|lower -%} - {{ datavault4dbt.ghost_record_per_datatype(column_name=datavault4dbt.escape_column_names(column.name), datatype=column.dtype, ghost_record_type='error', alias=datavault4dbt.escape_column_names(col)) -}} + {%- if column.name|lower in prejoin['extract_columns']|map('lower') -%} + {%- set prejoin_extract_cols_lower = prejoin['extract_columns']|map('lower')|list -%} + {%- set prejoin_col_index = prejoin_extract_cols_lower.index(column.name|lower) -%} + {{ log('column found? yes, for column: ' ~ column.name , false) }} + ,{{ datavault4dbt.ghost_record_per_datatype(column_name=datavault4dbt.escape_column_names(column.name), datatype=column.dtype, ghost_record_type='error', alias=datavault4dbt.escape_column_names(prejoin['aliases'][prejoin_col_index])) }} {%- endif -%} + {%- endfor -%} - {%- if not loop.last -%},{%- endif %} {% endfor -%} + {%- endif %} - {%- endif -%} - - {%- if datavault4dbt.is_something(derived_columns) %}, + {%- if datavault4dbt.is_something(derived_columns) %} {# Additionally generating Ghost Records for Derived Columns #} {%- for column_name, properties in derived_columns_with_datatypes_DICT.items() %} - {{ datavault4dbt.ghost_record_per_datatype(column_name=datavault4dbt.escape_column_names(column_name), datatype=properties.datatype, col_size=properties.col_size, ghost_record_type='error') }} - {%- if not loop.last %},{% endif %} + ,{{ datavault4dbt.ghost_record_per_datatype(column_name=datavault4dbt.escape_column_names(column_name), datatype=properties.datatype, col_size=properties.col_size, ghost_record_type='error') }} {%- endfor -%} {%- endif -%} - {%- if datavault4dbt.is_something(processed_hash_columns) -%}, + {%- if datavault4dbt.is_something(processed_hash_columns) -%} {%- for hash_column in processed_hash_columns %} - CAST({{ datavault4dbt.as_constant(column_str=error_key) }} as {{ hash_dtype }}) as {{ hash_column }} - {%- if not loop.last %},{% endif %} + ,CAST({{ datavault4dbt.as_constant(column_str=error_key) }} as {{ hash_dtype }}) as {{ hash_column }} {%- endfor -%} {%- endif -%} @@ -595,6 +615,7 @@ columns_to_select AS ( {{ datavault4dbt.print_list(datavault4dbt.escape_column_names(final_columns_to_select)) }} FROM {{ last_cte }} + {% if enable_ghost_records and not is_incremental() %} UNION ALL diff --git a/macros/staging/oracle/stage.sql b/macros/staging/oracle/stage.sql index c2be1409..cae9e7ea 100644 --- a/macros/staging/oracle/stage.sql +++ b/macros/staging/oracle/stage.sql @@ -96,12 +96,11 @@ {# Getting the column names for all additional columns #} {%- set derived_column_names = datavault4dbt.extract_column_names(derived_columns) -%} {%- set hashed_column_names = datavault4dbt.extract_column_names(hashed_columns) -%} -{%- set prejoined_column_names = datavault4dbt.extract_column_names(prejoined_columns) -%} +{%- set prejoined_column_names = datavault4dbt.extract_prejoin_column_names(prejoined_columns) -%} {%- set missing_column_names = datavault4dbt.extract_column_names(missing_columns) -%} {%- set exclude_column_names = hashed_column_names + prejoined_column_names + missing_column_names + ldts_rsrc_input_column_names %} {%- set source_and_derived_column_names = (all_source_columns + derived_column_names) | unique | list -%} {%- set all_columns = adapter.get_columns_in_relation( source_relation ) -%} - {%- set columns_without_excluded_columns = [] -%} {%- set final_columns_to_select = [] -%} @@ -189,6 +188,8 @@ {# Setting the ldts default datatype #} {% set ldts_default_dtype = datavault4dbt.timestamp_default_dtype() %} +{{ datavault4dbt.prepend_generated_by() }} + WITH {# Selecting everything that we need from the source relation. #} @@ -263,6 +264,7 @@ missing_columns AS ( ), {%- endif -%} + {%- if datavault4dbt.is_something(prejoined_columns) %} {# Prejoining Business Keys of other source objects for Link purposes #} prejoined_columns AS ( @@ -270,19 +272,43 @@ prejoined_columns AS ( SELECT {% if final_columns_to_select | length > 0 -%} {{ datavault4dbt.print_list(datavault4dbt.prefix(columns=datavault4dbt.escape_column_names(final_columns_to_select), prefix_str='lcte').split(',')) }} - {% endif %} - {%- for col, vals in prejoined_columns.items() -%} - ,pj_{{loop.index}}.{{ vals['bk'] }} AS {{ col }} - {% endfor -%} + {%- endif -%} + + {# Iterate over each prejoin, doing logic checks and generating the select-statements #} + {%- for prejoin in prejoined_columns -%} + {%- set prejoin_alias = 'pj_' + loop.index|string -%} + + {# If extract_columns and/or aliases are passed as string convert them to a list so they can be used as iterators later #} + {%- if not datavault4dbt.is_list(prejoin['extract_columns'])-%} + {%- do prejoin.update({'extract_columns': [prejoin['extract_columns']]}) -%} + {%- endif -%} + {%- if not datavault4dbt.is_list(prejoin['aliases']) and datavault4dbt.is_something(prejoin['aliases']) -%} + {%- do prejoin.update({'aliases': [prejoin['aliases']]}) -%} + {%- endif -%} + + {# If passed, make sure there are as many aliases as there are extract_columns, ensuring a 1:1 mapping #} + {%- if datavault4dbt.is_something(prejoin['aliases']) -%} + {%- if not prejoin['aliases']|length == prejoin['extract_columns']|length -%} + {%- do exceptions.raise_compiler_error("Prejoin aliases must have the same length as extract_columns. Got " + ~ prejoin['extract_columns']|length ~ " extract_column(s) and " ~ prejoin['aliases']|length ~ " aliase(s).") -%} + {%- endif -%} + {%- endif -%} + + {# Generate the columns for the SELECT-statement #} + {%- for column in prejoin['extract_columns'] %} + ,{{ prejoin_alias }}.{{ column }} {% if datavault4dbt.is_something(prejoin['aliases']) -%} AS {{ prejoin['aliases'][loop.index0] }} {% endif -%} + {%- endfor -%} + {%- endfor %} FROM {{ last_cte }} lcte - {% for col, vals in prejoined_columns.items() %} + {# Iterate over prejoins and generate the join-statements #} + {%- for prejoin in prejoined_columns -%} - {%- if 'src_name' in vals.keys() or 'src_table' in vals.keys() -%} - {%- set relation = source(vals['src_name']|string, vals['src_table']) -%} - {%- elif 'ref_model' in vals.keys() -%} - {%- set relation = ref(vals['ref_model']) -%} + {%- if 'ref_model' in prejoin.keys() -%} + {% set relation = ref(prejoin['ref_model']) -%} + {%- elif 'src_name' in prejoin.keys() and 'src_table' in prejoin.keys() -%} + {%- set relation = source(prejoin['src_name']|string, prejoin['src_table']) -%} {%- else -%} {%- set error_message -%} Prejoin error: Invalid target entity definition. Allowed are: @@ -303,28 +329,25 @@ prejoined_columns AS ( ref_column_name: join_columns_in_ref_model Got: - {{ col }}: {{ vals }} + {{ prejoin }} {%- endset -%} {%- do exceptions.raise_compiler_error(error_message) -%} {%- endif -%} -{# This sets a default value for the operator that connects multiple joining conditions. Only when it is not set by user. #} - {%- if 'operator' not in vals.keys() -%} + {%- if 'operator' not in prejoin.keys() -%} {%- set operator = 'AND' -%} {%- else -%} - {%- set operator = vals['operator'] -%} + {%- set operator = prejoin['operator'] -%} {%- endif -%} - - {%- set prejoin_alias = 'pj_' + loop.index|string -%} - - left join {{ relation }} {{ prejoin_alias }} - on {{ datavault4dbt.multikey(columns=vals['this_column_name'], prefix=['lcte', prejoin_alias], condition='=', operator=operator, right_columns=vals['ref_column_name']) }} - - {% endfor %} + {%- set prejoin_alias = 'pj_' + loop.index|string %} + + left join {{ relation }} {{ prejoin_alias }} + on {{ datavault4dbt.multikey(columns=prejoin['this_column_name'], prefix=['lcte', prejoin_alias], condition='=', operator=operator, right_columns=prejoin['ref_column_name']) }} + {%- endfor -%} {% set last_cte = "prejoined_columns" -%} - {%- set final_columns_to_select = final_columns_to_select + prejoined_column_names %} + {%- set final_columns_to_select = final_columns_to_select + prejoined_column_names -%} ), {%- endif -%} @@ -452,65 +475,61 @@ unknown_values AS ( SELECT - {{ datavault4dbt.string_to_timestamp(timestamp_format, beginning_of_all_times) }} as {{ load_datetime_col_name }}, - '{{ unknown_value_rsrc }}' as {{ record_source_col_name }} + {{ datavault4dbt.string_to_timestamp(timestamp_format, beginning_of_all_times) }} as {{ load_datetime_col_name }} + ,'{{ unknown_value_rsrc }}' as {{ record_source_col_name }} - {%- if columns_without_excluded_columns is defined and columns_without_excluded_columns| length > 0 -%}, + {%- if columns_without_excluded_columns is defined and columns_without_excluded_columns| length > 0 -%} {# Generating Ghost Records for all source columns, except the ldts, rsrc & edwSequence column #} {%- for column in columns_without_excluded_columns %} - {{ datavault4dbt.ghost_record_per_datatype(column_name=column.name, datatype=column.dtype, ghost_record_type='unknown') }} - {%- if not loop.last %},{% endif -%} + ,{{ datavault4dbt.ghost_record_per_datatype(column_name=column.name, datatype=column.dtype, ghost_record_type='unknown') }} {%- endfor -%} {%- endif -%} - {%- if datavault4dbt.is_something(missing_columns) -%}, + {%- if datavault4dbt.is_something(missing_columns) -%} {# Additionally generating ghost record for missing columns #} {%- for col, dtype in missing_columns.items() %} - {{ datavault4dbt.ghost_record_per_datatype(column_name=col, datatype=dtype, ghost_record_type='unknown') }} - {%- if not loop.last %},{% endif -%} + ,{{- datavault4dbt.ghost_record_per_datatype(column_name=col, datatype=dtype, ghost_record_type='unknown') }} {%- endfor -%} {%- endif -%} - {%- if datavault4dbt.is_something(prejoined_columns) -%}, - {# Additionally generating ghost records for the prejoined attributes#} - {% for col, vals in prejoined_columns.items() %} + {%- if datavault4dbt.is_something(prejoined_columns) -%} + {# Additionally generating ghost records for the prejoined attributes #} + {%- for prejoin in prejoined_columns -%} - {%- if 'src_name' in vals.keys() or 'src_table' in vals.keys() -%} - {%- set relation = source(vals['src_name']|string, vals['src_table']) -%} - {%- elif 'ref_model' in vals.keys() -%} - {%- set relation = ref(vals['ref_model']) -%} + {%- if 'ref_model' in prejoin.keys() -%} + {%- set relation = ref(prejoin['ref_model']) -%} + {%- elif 'src_name' in prejoin.keys() and 'src_table' in prejoin.keys() -%} + {%- set relation = source(prejoin['src_name']|string, prejoin['src_table']) -%} {%- endif -%} {%- set pj_relation_columns = adapter.get_columns_in_relation( relation ) -%} - {{ log('pj_relation_columns: ' ~ pj_relation_columns, false ) }} - - {% for column in pj_relation_columns -%} - - {% if column.name|lower == vals['bk']|lower -%} - {{ log('column found? yes, for column :' ~ column.name , false) }} - {{ datavault4dbt.ghost_record_per_datatype(column_name=column.name, datatype=column.dtype, ghost_record_type='unknown', alias=col) }} + {{ log('pj_relation_columns for '~relation~': ' ~ pj_relation_columns, false ) }} + + {%- for column in pj_relation_columns -%} + {%- if column.name|lower in prejoin['extract_columns']|map('lower') -%} + {%- set prejoin_extract_cols_lower = prejoin['extract_columns']|map('lower')|list -%} + {%- set prejoin_col_index = prejoin_extract_cols_lower.index(column.name|lower) -%} + {{ log('column found? yes, for column: ' ~ column.name , false) }} + ,{{ datavault4dbt.ghost_record_per_datatype(column_name=column.name, datatype=column.dtype, ghost_record_type='unknown', alias=prejoin['aliases'][prejoin_col_index]) }} {%- endif -%} {%- endfor -%} - {%- if not loop.last %},{% endif %} {% endfor -%} {%- endif %} - {%- if datavault4dbt.is_something(derived_columns) -%}, - {# Additionally generating Ghost Records for Derived Columns #} - {%- for column_name, properties in derived_columns_with_datatypes_DICT.items() %} - {{ datavault4dbt.ghost_record_per_datatype(column_name=column_name, datatype=properties.datatype, col_size=properties.col_size, ghost_record_type='unknown') }} - {%- if not loop.last %},{% endif -%} + {%- if datavault4dbt.is_something(derived_columns) -%} + {# Additionally generating Ghost Records for Derived Columns #} + {% for column_name, properties in derived_columns_with_datatypes_DICT.items() %} + ,{{ datavault4dbt.ghost_record_per_datatype(column_name=column_name, datatype=properties.datatype, col_size=properties.col_size, ghost_record_type='unknown') }} {%- endfor -%} {%- endif -%} - {%- if datavault4dbt.is_something(processed_hash_columns) -%}, + {%- if datavault4dbt.is_something(processed_hash_columns) -%} {%- for hash_column in processed_hash_columns %} - CAST({{ datavault4dbt.as_constant(column_str=unknown_key) }} as {{ hash_dtype }}) as {{ hash_column }} - {%- if not loop.last %},{% endif %} + ,CAST({{ datavault4dbt.as_constant(column_str=unknown_key) }} as {{ hash_dtype }}) as {{ hash_column }} {%- endfor -%} {%- endif -%} @@ -523,62 +542,61 @@ error_values AS ( SELECT - {{ datavault4dbt.string_to_timestamp(timestamp_format , end_of_all_times) }} as {{ load_datetime_col_name }}, - '{{ error_value_rsrc }}' as {{ record_source_col_name }} + {{ datavault4dbt.string_to_timestamp(timestamp_format , end_of_all_times) }} as {{ load_datetime_col_name }} + ,'{{ error_value_rsrc }}' as {{ record_source_col_name }} - {%- if columns_without_excluded_columns is defined and columns_without_excluded_columns| length > 0 -%}, + {%- if columns_without_excluded_columns is defined and columns_without_excluded_columns| length > 0 -%} {# Generating Ghost Records for Source Columns #} {%- for column in columns_without_excluded_columns %} - {{ datavault4dbt.ghost_record_per_datatype(column_name=column.name, datatype=column.dtype, ghost_record_type='error') }} - {%- if not loop.last %},{% endif -%} + ,{{ datavault4dbt.ghost_record_per_datatype(column_name=column.name, datatype=column.dtype, ghost_record_type='error') }} {%- endfor -%} {%- endif -%} - {%- if datavault4dbt.is_something(missing_columns) -%}, + {%- if datavault4dbt.is_something(missing_columns) -%} {# Additionally generating ghost record for Missing columns #} {%- for col, dtype in missing_columns.items() %} - {{ datavault4dbt.ghost_record_per_datatype(column_name=col, datatype=dtype, ghost_record_type='error') }} - {%- if not loop.last %},{% endif -%} + ,{{ datavault4dbt.ghost_record_per_datatype(column_name=col, datatype=dtype, ghost_record_type='error') }} {%- endfor -%} {%- endif -%} - {%- if datavault4dbt.is_something(prejoined_columns) -%}, - {# Additionally generating ghost records for the prejoined attributes #} - {%- for col, vals in prejoined_columns.items() %} + {%- if datavault4dbt.is_something(prejoined_columns) -%} + {# Additionally generating ghost records for the prejoined attributes#} + {% for prejoin in prejoined_columns %} - {%- if 'src_name' in vals.keys() or 'src_table' in vals.keys() -%} - {%- set relation = source(vals['src_name']|string, vals['src_table']) -%} - {%- elif 'ref_model' in vals.keys() -%} - {%- set relation = ref(vals['ref_model']) -%} + {%- if 'ref_model' in prejoin.keys() -%} + {% set relation = ref(prejoin['ref_model']) -%} + {%- elif 'src_name' in prejoin.keys() and 'src_table' in prejoin.keys() -%} + {%- set relation = source(prejoin['src_name']|string, prejoin['src_table']) -%} {%- endif -%} {%- set pj_relation_columns = adapter.get_columns_in_relation( relation ) -%} + {{- log('pj_relation_columns for '~relation~': ' ~ pj_relation_columns, false ) -}} {% for column in pj_relation_columns -%} - {% if column.name|lower == vals['bk']|lower -%} - {{ datavault4dbt.ghost_record_per_datatype(column_name=column.name, datatype=column.dtype, ghost_record_type='error', alias=col) -}} + {%- if column.name|lower in prejoin['extract_columns']|map('lower') -%} + {%- set prejoin_extract_cols_lower = prejoin['extract_columns']|map('lower')|list -%} + {%- set prejoin_col_index = prejoin_extract_cols_lower.index(column.name|lower) -%} + {{ log('column found? yes, for column: ' ~ column.name , false) }} + ,{{ datavault4dbt.ghost_record_per_datatype(column_name=column.name, datatype=column.dtype, ghost_record_type='error', alias=prejoin['aliases'][prejoin_col_index]) }} {%- endif -%} + {%- endfor -%} - {%- if not loop.last -%},{%- endif %} {% endfor -%} + {%- endif %} - {%- endif -%} - - {%- if datavault4dbt.is_something(derived_columns) %}, + {%- if datavault4dbt.is_something(derived_columns) %} {# Additionally generating Ghost Records for Derived Columns #} {%- for column_name, properties in derived_columns_with_datatypes_DICT.items() %} - {{ datavault4dbt.ghost_record_per_datatype(column_name=column_name, datatype=properties.datatype, col_size=properties.col_size, ghost_record_type='error') }} - {%- if not loop.last %},{% endif %} + ,{{ datavault4dbt.ghost_record_per_datatype(column_name=column_name, datatype=properties.datatype, col_size=properties.col_size, ghost_record_type='error') }} {%- endfor -%} {%- endif -%} - {%- if datavault4dbt.is_something(processed_hash_columns) -%}, + {%- if datavault4dbt.is_something(processed_hash_columns) -%} {%- for hash_column in processed_hash_columns %} - CAST({{ datavault4dbt.as_constant(column_str=error_key) }} as {{ hash_dtype }}) as {{ hash_column }} - {%- if not loop.last %},{% endif %} + ,CAST({{ datavault4dbt.as_constant(column_str=error_key) }} as {{ hash_dtype }}) as {{ hash_column }} {%- endfor -%} {%- endif -%} @@ -621,4 +639,4 @@ columns_to_select AS ( SELECT * FROM columns_to_select -{%- endmacro -%} \ No newline at end of file +{%- endmacro -%} diff --git a/macros/staging/postgres/stage.sql b/macros/staging/postgres/stage.sql index 9edd3a38..c59cc9c6 100644 --- a/macros/staging/postgres/stage.sql +++ b/macros/staging/postgres/stage.sql @@ -96,12 +96,11 @@ {# Getting the column names for all additional columns #} {%- set derived_column_names = datavault4dbt.extract_column_names(derived_columns) -%} {%- set hashed_column_names = datavault4dbt.extract_column_names(hashed_columns) -%} -{%- set prejoined_column_names = datavault4dbt.extract_column_names(prejoined_columns) -%} +{%- set prejoined_column_names = datavault4dbt.extract_prejoin_column_names(prejoined_columns) -%} {%- set missing_column_names = datavault4dbt.extract_column_names(missing_columns) -%} {%- set exclude_column_names = hashed_column_names + prejoined_column_names + missing_column_names + ldts_rsrc_input_column_names %} {%- set source_and_derived_column_names = (all_source_columns + derived_column_names) | unique | list -%} {%- set all_columns = adapter.get_columns_in_relation( source_relation ) -%} - {%- set columns_without_excluded_columns = [] -%} {%- set final_columns_to_select = [] -%} @@ -184,6 +183,8 @@ {# Setting the ldts default datatype #} {% set ldts_default_dtype = datavault4dbt.timestamp_default_dtype() %} +{{ datavault4dbt.prepend_generated_by() }} + WITH {# Selecting everything that we need from the source relation. #} @@ -256,6 +257,7 @@ missing_columns AS ( ), {%- endif -%} + {%- if datavault4dbt.is_something(prejoined_columns) %} {# Prejoining Business Keys of other source objects for Link purposes #} prejoined_columns AS ( @@ -263,19 +265,43 @@ prejoined_columns AS ( SELECT {% if final_columns_to_select | length > 0 -%} {{ datavault4dbt.print_list(datavault4dbt.prefix(columns=datavault4dbt.escape_column_names(final_columns_to_select), prefix_str='lcte').split(',')) }} - {% endif %} - {%- for col, vals in prejoined_columns.items() -%} - ,pj_{{loop.index}}.{{ vals['bk'] }} AS {{ col }} - {% endfor -%} + {%- endif -%} + + {# Iterate over each prejoin, doing logic checks and generating the select-statements #} + {%- for prejoin in prejoined_columns -%} + {%- set prejoin_alias = 'pj_' + loop.index|string -%} + + {# If extract_columns and/or aliases are passed as string convert them to a list so they can be used as iterators later #} + {%- if not datavault4dbt.is_list(prejoin['extract_columns'])-%} + {%- do prejoin.update({'extract_columns': [prejoin['extract_columns']]}) -%} + {%- endif -%} + {%- if not datavault4dbt.is_list(prejoin['aliases']) and datavault4dbt.is_something(prejoin['aliases']) -%} + {%- do prejoin.update({'aliases': [prejoin['aliases']]}) -%} + {%- endif -%} + + {# If passed, make sure there are as many aliases as there are extract_columns, ensuring a 1:1 mapping #} + {%- if datavault4dbt.is_something(prejoin['aliases']) -%} + {%- if not prejoin['aliases']|length == prejoin['extract_columns']|length -%} + {%- do exceptions.raise_compiler_error("Prejoin aliases must have the same length as extract_columns. Got " + ~ prejoin['extract_columns']|length ~ " extract_column(s) and " ~ prejoin['aliases']|length ~ " aliase(s).") -%} + {%- endif -%} + {%- endif -%} + + {# Generate the columns for the SELECT-statement #} + {%- for column in prejoin['extract_columns'] %} + ,{{ prejoin_alias }}.{{ column }} {% if datavault4dbt.is_something(prejoin['aliases']) -%} AS {{ prejoin['aliases'][loop.index0] }} {% endif -%} + {%- endfor -%} + {%- endfor %} FROM {{ last_cte }} lcte - {% for col, vals in prejoined_columns.items() %} + {# Iterate over prejoins and generate the join-statements #} + {%- for prejoin in prejoined_columns -%} - {%- if 'src_name' in vals.keys() or 'src_table' in vals.keys() -%} - {%- set relation = source(vals['src_name']|string, vals['src_table']) -%} - {%- elif 'ref_model' in vals.keys() -%} - {%- set relation = ref(vals['ref_model']) -%} + {%- if 'ref_model' in prejoin.keys() -%} + {% set relation = ref(prejoin['ref_model']) -%} + {%- elif 'src_name' in prejoin.keys() and 'src_table' in prejoin.keys() -%} + {%- set relation = source(prejoin['src_name']|string, prejoin['src_table']) -%} {%- else -%} {%- set error_message -%} Prejoin error: Invalid target entity definition. Allowed are: @@ -296,25 +322,22 @@ prejoined_columns AS ( ref_column_name: join_columns_in_ref_model Got: - {{ col }}: {{ vals }} + {{ prejoin }} {%- endset -%} - {%- do exceptions.raise_compiler_error(error_message) -%} + {%- do exceptions.raise_compiler_error(error_message) -%} {%- endif -%} -{# This sets a default value for the operator that connects multiple joining conditions. Only when it is not set by user. #} - {%- if 'operator' not in vals.keys() -%} + {%- if 'operator' not in prejoin.keys() -%} {%- set operator = 'AND' -%} {%- else -%} - {%- set operator = vals['operator'] -%} + {%- set operator = prejoin['operator'] -%} {%- endif -%} - - {%- set prejoin_alias = 'pj_' + loop.index|string -%} - - left join {{ relation }} as {{ prejoin_alias }} - on {{ datavault4dbt.multikey(columns=vals['this_column_name'], prefix=['lcte', prejoin_alias], condition='=', operator=operator, right_columns=vals['ref_column_name']) }} - - {% endfor %} + {%- set prejoin_alias = 'pj_' + loop.index|string %} + + left join {{ relation }} as {{ prejoin_alias }} + on {{ datavault4dbt.multikey(columns=prejoin['this_column_name'], prefix=['lcte', prejoin_alias], condition='=', operator=operator, right_columns=prejoin['ref_column_name']) }} + {%- endfor -%} {% set last_cte = "prejoined_columns" -%} {%- set final_columns_to_select = final_columns_to_select + prejoined_column_names %} @@ -444,65 +467,61 @@ unknown_values AS ( SELECT - {{ datavault4dbt.string_to_timestamp(timestamp_format, beginning_of_all_times) }} as {{ load_datetime_col_name }}, - '{{ unknown_value_rsrc }}' as {{ record_source_col_name }} + {{ datavault4dbt.string_to_timestamp(timestamp_format, beginning_of_all_times) }} as {{ load_datetime_col_name }} + ,'{{ unknown_value_rsrc }}' as {{ record_source_col_name }} - {%- if columns_without_excluded_columns is defined and columns_without_excluded_columns| length > 0 -%}, + {%- if columns_without_excluded_columns is defined and columns_without_excluded_columns| length > 0 -%} {# Generating Ghost Records for all source columns, except the ldts, rsrc & edwSequence column #} {%- for column in columns_without_excluded_columns %} - {{ datavault4dbt.ghost_record_per_datatype(column_name=column.name, datatype=column.dtype, ghost_record_type='unknown') }} - {%- if not loop.last %},{% endif -%} + ,{{ datavault4dbt.ghost_record_per_datatype(column_name=column.name, datatype=column.dtype, ghost_record_type='unknown') }} {%- endfor -%} {%- endif -%} - {%- if datavault4dbt.is_something(missing_columns) -%}, + {%- if datavault4dbt.is_something(missing_columns) -%} {# Additionally generating ghost record for missing columns #} {%- for col, dtype in missing_columns.items() %} - {{ datavault4dbt.ghost_record_per_datatype(column_name=col, datatype=dtype, ghost_record_type='unknown') }} - {%- if not loop.last %},{% endif -%} + ,{{- datavault4dbt.ghost_record_per_datatype(column_name=col, datatype=dtype, ghost_record_type='unknown') }} {%- endfor -%} {%- endif -%} - {%- if datavault4dbt.is_something(prejoined_columns) -%}, - {# Additionally generating ghost records for the prejoined attributes#} - {% for col, vals in prejoined_columns.items() %} + {%- if datavault4dbt.is_something(prejoined_columns) -%} + {# Additionally generating ghost records for the prejoined attributes #} + {%- for prejoin in prejoined_columns -%} - {%- if 'src_name' in vals.keys() or 'src_table' in vals.keys() -%} - {%- set relation = source(vals['src_name']|string, vals['src_table']) -%} - {%- elif 'ref_model' in vals.keys() -%} - {%- set relation = ref(vals['ref_model']) -%} + {%- if 'ref_model' in prejoin.keys() -%} + {%- set relation = ref(prejoin['ref_model']) -%} + {%- elif 'src_name' in prejoin.keys() and 'src_table' in prejoin.keys() -%} + {%- set relation = source(prejoin['src_name']|string, prejoin['src_table']) -%} {%- endif -%} {%- set pj_relation_columns = adapter.get_columns_in_relation( relation ) -%} - {{ log('pj_relation_columns: ' ~ pj_relation_columns, false ) }} - - {% for column in pj_relation_columns -%} - - {% if column.name|lower == vals['bk']|lower -%} - {{ log('column found? yes, for column :' ~ column.name , false) }} - {{ datavault4dbt.ghost_record_per_datatype(column_name=column.name, datatype=column.dtype, ghost_record_type='unknown', alias=col) }} + {{ log('pj_relation_columns for '~relation~': ' ~ pj_relation_columns, false ) }} + + {%- for column in pj_relation_columns -%} + {%- if column.name|lower in prejoin['extract_columns']|map('lower') -%} + {%- set prejoin_extract_cols_lower = prejoin['extract_columns']|map('lower')|list -%} + {%- set prejoin_col_index = prejoin_extract_cols_lower.index(column.name|lower) -%} + {{ log('column found? yes, for column: ' ~ column.name , false) }} + ,{{ datavault4dbt.ghost_record_per_datatype(column_name=column.name, datatype=column.dtype, ghost_record_type='unknown', alias=prejoin['aliases'][prejoin_col_index]) }} {%- endif -%} {%- endfor -%} - {%- if not loop.last %},{% endif %} {% endfor -%} {%- endif %} - {%- if datavault4dbt.is_something(derived_columns) -%}, - {# Additionally generating Ghost Records for Derived Columns #} - {%- for column_name, properties in derived_columns_with_datatypes_DICT.items() %} - {{ datavault4dbt.ghost_record_per_datatype(column_name=column_name, datatype=properties.datatype, ghost_record_type='unknown') }} - {%- if not loop.last %},{% endif -%} + {%- if datavault4dbt.is_something(derived_columns) -%} + {# Additionally generating Ghost Records for Derived Columns #} + {% for column_name, properties in derived_columns_with_datatypes_DICT.items() %} + ,{{ datavault4dbt.ghost_record_per_datatype(column_name=column_name, datatype=properties.datatype, ghost_record_type='unknown') }} {%- endfor -%} {%- endif -%} - {%- if datavault4dbt.is_something(processed_hash_columns) -%}, + {%- if datavault4dbt.is_something(processed_hash_columns) -%} {%- for hash_column in processed_hash_columns %} - CAST({{ datavault4dbt.as_constant(column_str=unknown_key) }} as {{ hash_dtype }}) as {{ hash_column }} - {%- if not loop.last %},{% endif %} + ,CAST({{ datavault4dbt.as_constant(column_str=unknown_key) }} as {{ hash_dtype }}) as {{ hash_column }} {%- endfor -%} {%- endif -%} @@ -514,62 +533,61 @@ error_values AS ( SELECT - {{ datavault4dbt.string_to_timestamp(timestamp_format , end_of_all_times) }} as {{ load_datetime_col_name }}, - '{{ error_value_rsrc }}' as {{ record_source_col_name }} + {{ datavault4dbt.string_to_timestamp(timestamp_format , end_of_all_times) }} as {{ load_datetime_col_name }} + ,'{{ error_value_rsrc }}' as {{ record_source_col_name }} - {%- if columns_without_excluded_columns is defined and columns_without_excluded_columns| length > 0 -%}, + {%- if columns_without_excluded_columns is defined and columns_without_excluded_columns| length > 0 -%} {# Generating Ghost Records for Source Columns #} {%- for column in columns_without_excluded_columns %} - {{ datavault4dbt.ghost_record_per_datatype(column_name=column.name, datatype=column.dtype, ghost_record_type='error') }} - {%- if not loop.last %},{% endif -%} + ,{{ datavault4dbt.ghost_record_per_datatype(column_name=column.name, datatype=column.dtype, ghost_record_type='error') }} {%- endfor -%} {%- endif -%} - {%- if datavault4dbt.is_something(missing_columns) -%}, + {%- if datavault4dbt.is_something(missing_columns) -%} {# Additionally generating ghost record for Missing columns #} {%- for col, dtype in missing_columns.items() %} - {{ datavault4dbt.ghost_record_per_datatype(column_name=col, datatype=dtype, ghost_record_type='error') }} - {%- if not loop.last %},{% endif -%} + ,{{ datavault4dbt.ghost_record_per_datatype(column_name=col, datatype=dtype, ghost_record_type='error') }} {%- endfor -%} {%- endif -%} - {%- if datavault4dbt.is_something(prejoined_columns) -%}, - {# Additionally generating ghost records for the prejoined attributes #} - {%- for col, vals in prejoined_columns.items() %} + {%- if datavault4dbt.is_something(prejoined_columns) -%} + {# Additionally generating ghost records for the prejoined attributes#} + {% for prejoin in prejoined_columns %} - {%- if 'src_name' in vals.keys() or 'src_table' in vals.keys() -%} - {%- set relation = source(vals['src_name']|string, vals['src_table']) -%} - {%- elif 'ref_model' in vals.keys() -%} - {%- set relation = ref(vals['ref_model']) -%} + {%- if 'ref_model' in prejoin.keys() -%} + {% set relation = ref(prejoin['ref_model']) -%} + {%- elif 'src_name' in prejoin.keys() and 'src_table' in prejoin.keys() -%} + {%- set relation = source(prejoin['src_name']|string, prejoin['src_table']) -%} {%- endif -%} {%- set pj_relation_columns = adapter.get_columns_in_relation( relation ) -%} + {{- log('pj_relation_columns for '~relation~': ' ~ pj_relation_columns, false ) -}} - {% for column in pj_relation_columns -%} - {% if column.name|lower == vals['bk']|lower -%} - {{ datavault4dbt.ghost_record_per_datatype(column_name=column.name, datatype=column.dtype, ghost_record_type='error', alias=col) -}} - {%- endif -%} - {%- endfor -%} - {%- if not loop.last -%},{%- endif %} - {% endfor -%} + {% for column in pj_relation_columns -%} + {%- if column.name|lower in prejoin['extract_columns']|map('lower') -%} + {%- set prejoin_extract_cols_lower = prejoin['extract_columns']|map('lower')|list -%} + {%- set prejoin_col_index = prejoin_extract_cols_lower.index(column.name|lower) -%} + {{ log('column found? yes, for column: ' ~ column.name , false) }} + ,{{ datavault4dbt.ghost_record_per_datatype(column_name=column.name, datatype=column.dtype, ghost_record_type='error', alias=prejoin['aliases'][prejoin_col_index]) }} + {%- endif -%} - {%- endif -%} + {%- endfor -%} + {% endfor -%} + {%- endif %} - {%- if datavault4dbt.is_something(derived_columns) %}, + {%- if datavault4dbt.is_something(derived_columns) %} {# Additionally generating Ghost Records for Derived Columns #} {%- for column_name, properties in derived_columns_with_datatypes_DICT.items() %} - {{ datavault4dbt.ghost_record_per_datatype(column_name=column_name, datatype=properties.datatype, ghost_record_type='error') }} - {%- if not loop.last %},{% endif %} + ,{{ datavault4dbt.ghost_record_per_datatype(column_name=column_name, datatype=properties.datatype, ghost_record_type='error') }} {%- endfor -%} {%- endif -%} - {%- if datavault4dbt.is_something(processed_hash_columns) -%}, + {%- if datavault4dbt.is_something(processed_hash_columns) -%} {%- for hash_column in processed_hash_columns %} - CAST({{ datavault4dbt.as_constant(column_str=error_key) }} as {{ hash_dtype }}) as {{ hash_column }} - {%- if not loop.last %},{% endif %} + ,CAST({{ datavault4dbt.as_constant(column_str=error_key) }} as {{ hash_dtype }}) as {{ hash_column }} {%- endfor -%} {%- endif -%} diff --git a/macros/staging/redshift/stage.sql b/macros/staging/redshift/stage.sql index a7704c03..b9861cb1 100644 --- a/macros/staging/redshift/stage.sql +++ b/macros/staging/redshift/stage.sql @@ -95,7 +95,7 @@ {# Getting the column names for all additional columns #} {%- set derived_column_names = datavault4dbt.extract_column_names(derived_columns) -%} {%- set hashed_column_names = datavault4dbt.extract_column_names(hashed_columns) -%} -{%- set prejoined_column_names = datavault4dbt.extract_column_names(prejoined_columns) -%} +{%- set prejoined_column_names = datavault4dbt.extract_prejoin_column_names(prejoined_columns) -%} {%- set missing_column_names = datavault4dbt.extract_column_names(missing_columns) -%} {%- set exclude_column_names = hashed_column_names + prejoined_column_names + missing_column_names + ldts_rsrc_input_column_names %} {%- set source_and_derived_column_names = (all_source_columns + derived_column_names) | unique | list -%} @@ -183,6 +183,8 @@ {# Setting the ldts default datatype #} {% set ldts_default_dtype = datavault4dbt.timestamp_default_dtype() %} +{{ datavault4dbt.prepend_generated_by() }} + WITH {# Selecting everything that we need from the source relation. #} @@ -255,26 +257,51 @@ missing_columns AS ( ), {%- endif -%} + {%- if datavault4dbt.is_something(prejoined_columns) %} {# Prejoining Business Keys of other source objects for Link purposes #} prejoined_columns AS ( SELECT - {% if final_columns_to_select | length > 0 -%} + {% if final_columns_to_select | length > 0 -%} {{ datavault4dbt.print_list(datavault4dbt.prefix(columns=datavault4dbt.escape_column_names(final_columns_to_select), prefix_str='lcte').split(',')) }} - {% endif %} - {%- for col, vals in prejoined_columns.items() -%} - ,pj_{{loop.index}}.{{ vals['bk'] }} AS {{ col }} - {% endfor -%} + {%- endif -%} + + {# Iterate over each prejoin, doing logic checks and generating the select-statements #} + {%- for prejoin in prejoined_columns -%} + {%- set prejoin_alias = 'pj_' + loop.index|string -%} + + {# If extract_columns and/or aliases are passed as string convert them to a list so they can be used as iterators later #} + {%- if not datavault4dbt.is_list(prejoin['extract_columns'])-%} + {%- do prejoin.update({'extract_columns': [prejoin['extract_columns']]}) -%} + {%- endif -%} + {%- if not datavault4dbt.is_list(prejoin['aliases']) and datavault4dbt.is_something(prejoin['aliases']) -%} + {%- do prejoin.update({'aliases': [prejoin['aliases']]}) -%} + {%- endif -%} + + {# If passed, make sure there are as many aliases as there are extract_columns, ensuring a 1:1 mapping #} + {%- if datavault4dbt.is_something(prejoin['aliases']) -%} + {%- if not prejoin['aliases']|length == prejoin['extract_columns']|length -%} + {%- do exceptions.raise_compiler_error("Prejoin aliases must have the same length as extract_columns. Got " + ~ prejoin['extract_columns']|length ~ " extract_column(s) and " ~ prejoin['aliases']|length ~ " aliase(s).") -%} + {%- endif -%} + {%- endif -%} + + {# Generate the columns for the SELECT-statement #} + {%- for column in prejoin['extract_columns'] %} + ,{{ prejoin_alias }}.{{ column }} {% if datavault4dbt.is_something(prejoin['aliases']) -%} AS {{ prejoin['aliases'][loop.index0] }} {% endif -%} + {%- endfor -%} + {%- endfor %} FROM {{ last_cte }} lcte - {% for col, vals in prejoined_columns.items() %} + {# Iterate over prejoins and generate the join-statements #} + {%- for prejoin in prejoined_columns -%} - {%- if 'src_name' in vals.keys() or 'src_table' in vals.keys() -%} - {%- set relation = source(vals['src_name']|string, vals['src_table']) -%} - {%- elif 'ref_model' in vals.keys() -%} - {%- set relation = ref(vals['ref_model']) -%} + {%- if 'ref_model' in prejoin.keys() -%} + {% set relation = ref(prejoin['ref_model']) -%} + {%- elif 'src_name' in prejoin.keys() and 'src_table' in prejoin.keys() -%} + {%- set relation = source(prejoin['src_name']|string, prejoin['src_table']) -%} {%- else -%} {%- set error_message -%} Prejoin error: Invalid target entity definition. Allowed are: @@ -295,28 +322,25 @@ prejoined_columns AS ( ref_column_name: join_columns_in_ref_model Got: - {{ col }}: {{ vals }} + {{ prejoin }} {%- endset -%} {%- do exceptions.raise_compiler_error(error_message) -%} {%- endif -%} -{# This sets a default value for the operator that connects multiple joining conditions. Only when it is not set by user. #} - {%- if 'operator' not in vals.keys() -%} + {%- if 'operator' not in prejoin.keys() -%} {%- set operator = 'AND' -%} {%- else -%} - {%- set operator = vals['operator'] -%} + {%- set operator = prejoin['operator'] -%} {%- endif -%} - - {%- set prejoin_alias = 'pj_' + loop.index|string -%} - - left join {{ relation }} as {{ prejoin_alias }} - on {{ datavault4dbt.multikey(columns=vals['this_column_name'], prefix=['lcte', prejoin_alias], condition='=', operator=operator, right_columns=vals['ref_column_name']) }} - - {% endfor %} + {%- set prejoin_alias = 'pj_' + loop.index|string %} + + left join {{ relation }} as {{ prejoin_alias }} + on {{ datavault4dbt.multikey(columns=prejoin['this_column_name'], prefix=['lcte', prejoin_alias], condition='=', operator=operator, right_columns=prejoin['ref_column_name']) }} + {%- endfor -%} {% set last_cte = "prejoined_columns" -%} - {%- set final_columns_to_select = final_columns_to_select + prejoined_column_names %} + {%- set final_columns_to_select = final_columns_to_select + prejoined_column_names -%} ), {%- endif -%} @@ -455,65 +479,61 @@ unknown_values AS ( SELECT - {{ datavault4dbt.string_to_timestamp(timestamp_format, beginning_of_all_times) }} as {{ load_datetime_col_name }}, - '{{ unknown_value_rsrc }}' as {{ record_source_col_name }} + {{ datavault4dbt.string_to_timestamp(timestamp_format, beginning_of_all_times) }} as {{ load_datetime_col_name }} + ,'{{ unknown_value_rsrc }}' as {{ record_source_col_name }} - {%- if columns_without_excluded_columns is defined and columns_without_excluded_columns| length > 0 -%}, - {# Generating Ghost Records for all source columns, except the ldts, rsrc & edwSequence column and derived_columns #} + {%- if columns_without_excluded_columns is defined and columns_without_excluded_columns| length > 0 -%} + {# Generating Ghost Records for all source columns, except the ldts, rsrc & edwSequence column #} {%- for column in columns_without_excluded_columns %} - {{ datavault4dbt.ghost_record_per_datatype(column_name=column.name, datatype=column.data_type, ghost_record_type='unknown') }} - {%- if not loop.last %},{% endif -%} + ,{{ datavault4dbt.ghost_record_per_datatype(column_name=column.name, datatype=column.dtype, ghost_record_type='unknown') }} {%- endfor -%} {%- endif -%} - {%- if datavault4dbt.is_something(missing_columns) -%}, + {%- if datavault4dbt.is_something(missing_columns) -%} {# Additionally generating ghost record for missing columns #} {%- for col, dtype in missing_columns.items() %} - {{ datavault4dbt.ghost_record_per_datatype(column_name=col, datatype=dtype, ghost_record_type='unknown') }} - {%- if not loop.last %},{% endif -%} + ,{{- datavault4dbt.ghost_record_per_datatype(column_name=col, datatype=dtype, ghost_record_type='unknown') }} {%- endfor -%} {%- endif -%} - {%- if datavault4dbt.is_something(prejoined_columns) -%}, - {# Additionally generating ghost records for the prejoined attributes#} - {% for col, vals in prejoined_columns.items() %} + {%- if datavault4dbt.is_something(prejoined_columns) -%} + {# Additionally generating ghost records for the prejoined attributes #} + {%- for prejoin in prejoined_columns -%} - {%- if 'src_name' in vals.keys() or 'src_table' in vals.keys() -%} - {%- set relation = source(vals['src_name']|string, vals['src_table']) -%} - {%- elif 'ref_model' in vals.keys() -%} - {%- set relation = ref(vals['ref_model']) -%} + {%- if 'ref_model' in prejoin.keys() -%} + {%- set relation = ref(prejoin['ref_model']) -%} + {%- elif 'src_name' in prejoin.keys() and 'src_table' in prejoin.keys() -%} + {%- set relation = source(prejoin['src_name']|string, prejoin['src_table']) -%} {%- endif -%} {%- set pj_relation_columns = adapter.get_columns_in_relation( relation ) -%} - {{ log('pj_relation_columns: ' ~ pj_relation_columns, false ) }} - - {% for column in pj_relation_columns -%} - - {% if column.name|lower == vals['bk']|lower -%} - {{ log('column found? yes, for column :' ~ column.name , false) }} - {{ datavault4dbt.ghost_record_per_datatype(column_name=column.name, datatype=column.data_type, ghost_record_type='unknown', alias=col) }} + {{ log('pj_relation_columns for '~relation~': ' ~ pj_relation_columns, false ) }} + + {%- for column in pj_relation_columns -%} + {%- if column.name|lower in prejoin['extract_columns']|map('lower') -%} + {%- set prejoin_extract_cols_lower = prejoin['extract_columns']|map('lower')|list -%} + {%- set prejoin_col_index = prejoin_extract_cols_lower.index(column.name|lower) -%} + {{ log('column found? yes, for column: ' ~ column.name , false) }} + ,{{ datavault4dbt.ghost_record_per_datatype(column_name=column.name, datatype=column.dtype, ghost_record_type='unknown', alias=prejoin['aliases'][prejoin_col_index]) }} {%- endif -%} {%- endfor -%} - {%- if not loop.last %},{% endif %} {% endfor -%} {%- endif %} - {%- if datavault4dbt.is_something(derived_columns) -%}, - {# Additionally generating Ghost Records for Derived Columns #} - {%- for column_name, properties in derived_columns_with_datatypes_DICT.items() %} - {{ datavault4dbt.ghost_record_per_datatype(column_name=column_name, datatype=properties.datatype, ghost_record_type='unknown') }} - {%- if not loop.last %},{% endif -%} + {%- if datavault4dbt.is_something(derived_columns) -%} + {# Additionally generating Ghost Records for Derived Columns #} + {% for column_name, properties in derived_columns_with_datatypes_DICT.items() %} + ,{{ datavault4dbt.ghost_record_per_datatype(column_name=column_name, datatype=properties.datatype, ghost_record_type='unknown') }} {%- endfor -%} {%- endif -%} - {%- if datavault4dbt.is_something(processed_hash_columns) -%}, + {%- if datavault4dbt.is_something(processed_hash_columns) -%} {%- for hash_column in processed_hash_columns %} - CAST({{ datavault4dbt.as_constant(column_str=unknown_key) }} as {{ hash_dtype }}) as {{ hash_column }} - {%- if not loop.last %},{% endif %} + ,CAST({{ datavault4dbt.as_constant(column_str=unknown_key) }} as {{ hash_dtype }}) as {{ hash_column }} {%- endfor -%} {%- endif -%} @@ -525,62 +545,61 @@ error_values AS ( SELECT - {{ datavault4dbt.string_to_timestamp(timestamp_format , end_of_all_times) }} as {{ load_datetime_col_name }}, - '{{ error_value_rsrc }}' as {{ record_source_col_name }} + {{ datavault4dbt.string_to_timestamp(timestamp_format , end_of_all_times) }} as {{ load_datetime_col_name }} + ,'{{ error_value_rsrc }}' as {{ record_source_col_name }} - {%- if columns_without_excluded_columns is defined and columns_without_excluded_columns| length > 0 -%}, + {%- if columns_without_excluded_columns is defined and columns_without_excluded_columns| length > 0 -%} {# Generating Ghost Records for Source Columns #} {%- for column in columns_without_excluded_columns %} - {{ datavault4dbt.ghost_record_per_datatype(column_name=column.name, datatype=column.data_type, ghost_record_type='error') }} - {%- if not loop.last %},{% endif -%} + ,{{ datavault4dbt.ghost_record_per_datatype(column_name=column.name, datatype=column.dtype, ghost_record_type='error') }} {%- endfor -%} {%- endif -%} - {%- if datavault4dbt.is_something(missing_columns) -%}, + {%- if datavault4dbt.is_something(missing_columns) -%} {# Additionally generating ghost record for Missing columns #} {%- for col, dtype in missing_columns.items() %} - {{ datavault4dbt.ghost_record_per_datatype(column_name=col, datatype=dtype, ghost_record_type='error') }} - {%- if not loop.last %},{% endif -%} + ,{{ datavault4dbt.ghost_record_per_datatype(column_name=col, datatype=dtype, ghost_record_type='error') }} {%- endfor -%} {%- endif -%} - {%- if datavault4dbt.is_something(prejoined_columns) -%}, - {# Additionally generating ghost records for the prejoined attributes #} - {%- for col, vals in prejoined_columns.items() %} + {%- if datavault4dbt.is_something(prejoined_columns) -%} + {# Additionally generating ghost records for the prejoined attributes#} + {% for prejoin in prejoined_columns %} - {%- if 'src_name' in vals.keys() or 'src_table' in vals.keys() -%} - {%- set relation = source(vals['src_name']|string, vals['src_table']) -%} - {%- elif 'ref_model' in vals.keys() -%} - {%- set relation = ref(vals['ref_model']) -%} + {%- if 'ref_model' in prejoin.keys() -%} + {% set relation = ref(prejoin['ref_model']) -%} + {%- elif 'src_name' in prejoin.keys() and 'src_table' in prejoin.keys() -%} + {%- set relation = source(prejoin['src_name']|string, prejoin['src_table']) -%} {%- endif -%} {%- set pj_relation_columns = adapter.get_columns_in_relation( relation ) -%} + {{- log('pj_relation_columns for '~relation~': ' ~ pj_relation_columns, false ) -}} {% for column in pj_relation_columns -%} - {% if column.name|lower == vals['bk']|lower -%} - {{ datavault4dbt.ghost_record_per_datatype(column_name=column.name, datatype=column.data_type, ghost_record_type='error', alias=col) -}} + {%- if column.name|lower in prejoin['extract_columns']|map('lower') -%} + {%- set prejoin_extract_cols_lower = prejoin['extract_columns']|map('lower')|list -%} + {%- set prejoin_col_index = prejoin_extract_cols_lower.index(column.name|lower) -%} + {{ log('column found? yes, for column: ' ~ column.name , false) }} + ,{{ datavault4dbt.ghost_record_per_datatype(column_name=column.name, datatype=column.dtype, ghost_record_type='error', alias=prejoin['aliases'][prejoin_col_index]) }} {%- endif -%} + {%- endfor -%} - {%- if not loop.last -%},{%- endif %} {% endfor -%} + {%- endif %} - {%- endif -%} - - {%- if datavault4dbt.is_something(derived_columns) %}, + {%- if datavault4dbt.is_something(derived_columns) %} {# Additionally generating Ghost Records for Derived Columns #} {%- for column_name, properties in derived_columns_with_datatypes_DICT.items() %} - {{ datavault4dbt.ghost_record_per_datatype(column_name=column_name, datatype=properties.datatype, ghost_record_type='error') }} - {%- if not loop.last %},{% endif %} + ,{{ datavault4dbt.ghost_record_per_datatype(column_name=column_name, datatype=properties.datatype, ghost_record_type='error') }} {%- endfor -%} {%- endif -%} - {%- if datavault4dbt.is_something(processed_hash_columns) -%}, + {%- if datavault4dbt.is_something(processed_hash_columns) -%} {%- for hash_column in processed_hash_columns %} - CAST({{ datavault4dbt.as_constant(column_str=error_key) }} as {{ hash_dtype }}) as {{ hash_column }} - {%- if not loop.last %},{% endif %} + ,CAST({{ datavault4dbt.as_constant(column_str=error_key) }} as {{ hash_dtype }}) as {{ hash_column }} {%- endfor -%} {%- endif -%} diff --git a/macros/staging/snowflake/stage.sql b/macros/staging/snowflake/stage.sql index 956c632e..14810ed4 100644 --- a/macros/staging/snowflake/stage.sql +++ b/macros/staging/snowflake/stage.sql @@ -96,7 +96,7 @@ {# Getting the column names for all additional columns #} {%- set derived_column_names = datavault4dbt.extract_column_names(derived_columns) -%} {%- set hashed_column_names = datavault4dbt.extract_column_names(hashed_columns) -%} -{%- set prejoined_column_names = datavault4dbt.extract_column_names(prejoined_columns) -%} +{%- set prejoined_column_names = datavault4dbt.extract_prejoin_column_names(prejoined_columns) -%} {%- set missing_column_names = datavault4dbt.extract_column_names(missing_columns) -%} {%- set exclude_column_names = hashed_column_names + prejoined_column_names + missing_column_names + ldts_rsrc_input_column_names %} {%- set source_and_derived_column_names = (all_source_columns + derived_column_names) | unique | list -%} @@ -189,6 +189,8 @@ {# Setting the ldts default datatype #} {% set ldts_default_dtype = datavault4dbt.timestamp_default_dtype() %} +{{ datavault4dbt.prepend_generated_by() }} + WITH {# Selecting everything that we need from the source relation. #} @@ -263,6 +265,7 @@ missing_columns AS ( ), {%- endif -%} + {%- if datavault4dbt.is_something(prejoined_columns) %} {# Prejoining Business Keys of other source objects for Link purposes #} prejoined_columns AS ( @@ -270,19 +273,43 @@ prejoined_columns AS ( SELECT {% if final_columns_to_select | length > 0 -%} {{ datavault4dbt.print_list(datavault4dbt.prefix(columns=datavault4dbt.escape_column_names(final_columns_to_select), prefix_str='lcte').split(',')) }} - {% endif %} - {%- for col, vals in prejoined_columns.items() -%} - ,pj_{{loop.index}}.{{ vals['bk'] }} AS {{ col }} - {% endfor -%} + {%- endif -%} + + {# Iterate over each prejoin, doing logic checks and generating the select-statements #} + {%- for prejoin in prejoined_columns -%} + {%- set prejoin_alias = 'pj_' + loop.index|string -%} + + {# If extract_columns and/or aliases are passed as string convert them to a list so they can be used as iterators later #} + {%- if not datavault4dbt.is_list(prejoin['extract_columns'])-%} + {%- do prejoin.update({'extract_columns': [prejoin['extract_columns']]}) -%} + {%- endif -%} + {%- if not datavault4dbt.is_list(prejoin['aliases']) and datavault4dbt.is_something(prejoin['aliases']) -%} + {%- do prejoin.update({'aliases': [prejoin['aliases']]}) -%} + {%- endif -%} + + {# If passed, make sure there are as many aliases as there are extract_columns, ensuring a 1:1 mapping #} + {%- if datavault4dbt.is_something(prejoin['aliases']) -%} + {%- if not prejoin['aliases']|length == prejoin['extract_columns']|length -%} + {%- do exceptions.raise_compiler_error("Prejoin aliases must have the same length as extract_columns. Got " + ~ prejoin['extract_columns']|length ~ " extract_column(s) and " ~ prejoin['aliases']|length ~ " aliase(s).") -%} + {%- endif -%} + {%- endif -%} + + {# Generate the columns for the SELECT-statement #} + {%- for column in prejoin['extract_columns'] %} + ,{{ prejoin_alias }}.{{ column }} {% if datavault4dbt.is_something(prejoin['aliases']) -%} AS {{ prejoin['aliases'][loop.index0] }} {% endif -%} + {%- endfor -%} + {%- endfor %} FROM {{ last_cte }} lcte - {% for col, vals in prejoined_columns.items() %} + {# Iterate over prejoins and generate the join-statements #} + {%- for prejoin in prejoined_columns -%} - {%- if 'src_name' in vals.keys() or 'src_table' in vals.keys() -%} - {%- set relation = source(vals['src_name']|string, vals['src_table']) -%} - {%- elif 'ref_model' in vals.keys() -%} - {%- set relation = ref(vals['ref_model']) -%} + {%- if 'ref_model' in prejoin.keys() -%} + {% set relation = ref(prejoin['ref_model']) -%} + {%- elif 'src_name' in prejoin.keys() and 'src_table' in prejoin.keys() -%} + {%- set relation = source(prejoin['src_name']|string, prejoin['src_table']) -%} {%- else -%} {%- set error_message -%} Prejoin error: Invalid target entity definition. Allowed are: @@ -303,28 +330,25 @@ prejoined_columns AS ( ref_column_name: join_columns_in_ref_model Got: - {{ col }}: {{ vals }} + {{ prejoin }} {%- endset -%} {%- do exceptions.raise_compiler_error(error_message) -%} {%- endif -%} -{# This sets a default value for the operator that connects multiple joining conditions. Only when it is not set by user. #} - {%- if 'operator' not in vals.keys() -%} + {%- if 'operator' not in prejoin.keys() -%} {%- set operator = 'AND' -%} {%- else -%} - {%- set operator = vals['operator'] -%} + {%- set operator = prejoin['operator'] -%} {%- endif -%} - - {%- set prejoin_alias = 'pj_' + loop.index|string -%} - - left join {{ relation }} as {{ prejoin_alias }} - on {{ datavault4dbt.multikey(columns=vals['this_column_name'], prefix=['lcte', prejoin_alias], condition='=', operator=operator, right_columns=vals['ref_column_name']) }} - - {% endfor %} + {%- set prejoin_alias = 'pj_' + loop.index|string %} + + left join {{ relation }} as {{ prejoin_alias }} + on {{ datavault4dbt.multikey(columns=prejoin['this_column_name'], prefix=['lcte', prejoin_alias], condition='=', operator=operator, right_columns=prejoin['ref_column_name']) }} + {%- endfor -%} {% set last_cte = "prejoined_columns" -%} - {%- set final_columns_to_select = final_columns_to_select + prejoined_column_names %} + {%- set final_columns_to_select = final_columns_to_select + prejoined_column_names -%} ), {%- endif -%} @@ -395,65 +419,61 @@ unknown_values AS ( SELECT - {{ datavault4dbt.string_to_timestamp(timestamp_format, beginning_of_all_times) }} as {{ load_datetime_col_name }}, - '{{ unknown_value_rsrc }}' as {{ record_source_col_name }} + {{ datavault4dbt.string_to_timestamp(timestamp_format, beginning_of_all_times) }} as {{ load_datetime_col_name }} + ,'{{ unknown_value_rsrc }}' as {{ record_source_col_name }} - {%- if columns_without_excluded_columns is defined and columns_without_excluded_columns| length > 0 -%}, + {%- if columns_without_excluded_columns is defined and columns_without_excluded_columns| length > 0 -%} {# Generating Ghost Records for all source columns, except the ldts, rsrc & edwSequence column #} {%- for column in columns_without_excluded_columns %} - {{ datavault4dbt.ghost_record_per_datatype(column_name=column.name, datatype=column.dtype, ghost_record_type='unknown') }} - {%- if not loop.last %},{% endif -%} + ,{{ datavault4dbt.ghost_record_per_datatype(column_name=column.name, datatype=column.dtype, ghost_record_type='unknown') }} {%- endfor -%} {%- endif -%} - {%- if datavault4dbt.is_something(missing_columns) -%}, + {%- if datavault4dbt.is_something(missing_columns) -%} {# Additionally generating ghost record for missing columns #} {%- for col, dtype in missing_columns.items() %} - {{ datavault4dbt.ghost_record_per_datatype(column_name=col, datatype=dtype, ghost_record_type='unknown') }} - {%- if not loop.last %},{% endif -%} + ,{{- datavault4dbt.ghost_record_per_datatype(column_name=col, datatype=dtype, ghost_record_type='unknown') }} {%- endfor -%} {%- endif -%} - {%- if datavault4dbt.is_something(prejoined_columns) -%}, - {# Additionally generating ghost records for the prejoined attributes#} - {% for col, vals in prejoined_columns.items() %} + {%- if datavault4dbt.is_something(prejoined_columns) -%} + {# Additionally generating ghost records for the prejoined attributes #} + {%- for prejoin in prejoined_columns -%} - {%- if 'src_name' in vals.keys() or 'src_table' in vals.keys() -%} - {%- set relation = source(vals['src_name']|string, vals['src_table']) -%} - {%- elif 'ref_model' in vals.keys() -%} - {%- set relation = ref(vals['ref_model']) -%} + {%- if 'ref_model' in prejoin.keys() -%} + {%- set relation = ref(prejoin['ref_model']) -%} + {%- elif 'src_name' in prejoin.keys() and 'src_table' in prejoin.keys() -%} + {%- set relation = source(prejoin['src_name']|string, prejoin['src_table']) -%} {%- endif -%} {%- set pj_relation_columns = adapter.get_columns_in_relation( relation ) -%} - {{ log('pj_relation_columns: ' ~ pj_relation_columns, false ) }} - - {% for column in pj_relation_columns -%} - - {% if column.name|lower == vals['bk']|lower -%} - {{ log('column found? yes, for column :' ~ column.name , false) }} - {{ datavault4dbt.ghost_record_per_datatype(column_name=column.name, datatype=column.dtype, ghost_record_type='unknown', alias=col) }} + {{ log('pj_relation_columns for '~relation~': ' ~ pj_relation_columns, false ) }} + + {%- for column in pj_relation_columns -%} + {%- if column.name|lower in prejoin['extract_columns']|map('lower') -%} + {%- set prejoin_extract_cols_lower = prejoin['extract_columns']|map('lower')|list -%} + {%- set prejoin_col_index = prejoin_extract_cols_lower.index(column.name|lower) -%} + {{ log('column found? yes, for column: ' ~ column.name , false) }} + ,{{ datavault4dbt.ghost_record_per_datatype(column_name=column.name, datatype=column.dtype, ghost_record_type='unknown', alias=prejoin['aliases'][prejoin_col_index]) }} {%- endif -%} {%- endfor -%} - {%- if not loop.last %},{% endif %} {% endfor -%} {%- endif %} - {%- if datavault4dbt.is_something(derived_columns) -%}, - {# Additionally generating Ghost Records for Derived Columns #} - {%- for column_name, properties in derived_columns_with_datatypes_DICT.items() %} - {{ datavault4dbt.ghost_record_per_datatype(column_name=column_name, datatype=properties.datatype, col_size=properties.col_size, ghost_record_type='unknown') }} - {%- if not loop.last %},{% endif -%} + {%- if datavault4dbt.is_something(derived_columns) -%} + {# Additionally generating Ghost Records for Derived Columns #} + {% for column_name, properties in derived_columns_with_datatypes_DICT.items() %} + ,{{ datavault4dbt.ghost_record_per_datatype(column_name=column_name, datatype=properties.datatype, ghost_record_type='unknown') }} {%- endfor -%} {%- endif -%} - {%- if datavault4dbt.is_something(processed_hash_columns) -%}, + {%- if datavault4dbt.is_something(processed_hash_columns) -%} {%- for hash_column in processed_hash_columns %} - CAST({{ datavault4dbt.as_constant(column_str=unknown_key) }} as {{ hash_dtype }}) as {{ hash_column }} - {%- if not loop.last %},{% endif %} + ,CAST({{ datavault4dbt.as_constant(column_str=unknown_key) }} as {{ hash_dtype }}) as {{ hash_column }} {%- endfor -%} {%- endif -%} @@ -465,62 +485,61 @@ error_values AS ( SELECT - {{ datavault4dbt.string_to_timestamp(timestamp_format , end_of_all_times) }} as {{ load_datetime_col_name }}, - '{{ error_value_rsrc }}' as {{ record_source_col_name }} + {{ datavault4dbt.string_to_timestamp(timestamp_format , end_of_all_times) }} as {{ load_datetime_col_name }} + ,'{{ error_value_rsrc }}' as {{ record_source_col_name }} - {%- if columns_without_excluded_columns is defined and columns_without_excluded_columns| length > 0 -%}, + {%- if columns_without_excluded_columns is defined and columns_without_excluded_columns| length > 0 -%} {# Generating Ghost Records for Source Columns #} {%- for column in columns_without_excluded_columns %} - {{ datavault4dbt.ghost_record_per_datatype(column_name=column.name, datatype=column.dtype, ghost_record_type='error') }} - {%- if not loop.last %},{% endif -%} + ,{{ datavault4dbt.ghost_record_per_datatype(column_name=column.name, datatype=column.dtype, ghost_record_type='error') }} {%- endfor -%} {%- endif -%} - {%- if datavault4dbt.is_something(missing_columns) -%}, + {%- if datavault4dbt.is_something(missing_columns) -%} {# Additionally generating ghost record for Missing columns #} {%- for col, dtype in missing_columns.items() %} - {{ datavault4dbt.ghost_record_per_datatype(column_name=col, datatype=dtype, ghost_record_type='error') }} - {%- if not loop.last %},{% endif -%} + ,{{ datavault4dbt.ghost_record_per_datatype(column_name=col, datatype=dtype, ghost_record_type='error') }} {%- endfor -%} {%- endif -%} - {%- if datavault4dbt.is_something(prejoined_columns) -%}, - {# Additionally generating ghost records for the prejoined attributes #} - {%- for col, vals in prejoined_columns.items() %} + {%- if datavault4dbt.is_something(prejoined_columns) -%} + {# Additionally generating ghost records for the prejoined attributes#} + {% for prejoin in prejoined_columns %} - {%- if 'src_name' in vals.keys() or 'src_table' in vals.keys() -%} - {%- set relation = source(vals['src_name']|string, vals['src_table']) -%} - {%- elif 'ref_model' in vals.keys() -%} - {%- set relation = ref(vals['ref_model']) -%} + {%- if 'ref_model' in prejoin.keys() -%} + {% set relation = ref(prejoin['ref_model']) -%} + {%- elif 'src_name' in prejoin.keys() and 'src_table' in prejoin.keys() -%} + {%- set relation = source(prejoin['src_name']|string, prejoin['src_table']) -%} {%- endif -%} {%- set pj_relation_columns = adapter.get_columns_in_relation( relation ) -%} + {{- log('pj_relation_columns for '~relation~': ' ~ pj_relation_columns, false ) -}} {% for column in pj_relation_columns -%} - {% if column.name|lower == vals['bk']|lower -%} - {{ datavault4dbt.ghost_record_per_datatype(column_name=column.name, datatype=column.dtype, ghost_record_type='error', alias=col) -}} + {%- if column.name|lower in prejoin['extract_columns']|map('lower') -%} + {%- set prejoin_extract_cols_lower = prejoin['extract_columns']|map('lower')|list -%} + {%- set prejoin_col_index = prejoin_extract_cols_lower.index(column.name|lower) -%} + {{ log('column found? yes, for column: ' ~ column.name , false) }} + ,{{ datavault4dbt.ghost_record_per_datatype(column_name=column.name, datatype=column.dtype, ghost_record_type='error', alias=prejoin['aliases'][prejoin_col_index]) }} {%- endif -%} + {%- endfor -%} - {%- if not loop.last -%},{%- endif %} {% endfor -%} + {%- endif %} - {%- endif -%} - - {%- if datavault4dbt.is_something(derived_columns) %}, + {%- if datavault4dbt.is_something(derived_columns) %} {# Additionally generating Ghost Records for Derived Columns #} {%- for column_name, properties in derived_columns_with_datatypes_DICT.items() %} - {{ datavault4dbt.ghost_record_per_datatype(column_name=column_name, datatype=properties.datatype, col_size=properties.col_size, ghost_record_type='error') }} - {%- if not loop.last %},{% endif %} + ,{{ datavault4dbt.ghost_record_per_datatype(column_name=column_name, datatype=properties.datatype, ghost_record_type='error') }} {%- endfor -%} {%- endif -%} - {%- if datavault4dbt.is_something(processed_hash_columns) -%}, + {%- if datavault4dbt.is_something(processed_hash_columns) -%} {%- for hash_column in processed_hash_columns %} - CAST({{ datavault4dbt.as_constant(column_str=error_key) }} as {{ hash_dtype }}) as {{ hash_column }} - {%- if not loop.last %},{% endif %} + ,CAST({{ datavault4dbt.as_constant(column_str=error_key) }} as {{ hash_dtype }}) as {{ hash_column }} {%- endfor -%} {%- endif -%} diff --git a/macros/staging/stage.sql b/macros/staging/stage.sql index 76b17ed0..403df72c 100644 --- a/macros/staging/stage.sql +++ b/macros/staging/stage.sql @@ -120,6 +120,11 @@ {%- if datavault4dbt.is_nothing(ldts) -%} {%- set ldts = datavault4dbt.current_timestamp() -%} {%- endif -%} + + {# To parse the list syntax of prejoined columns #} + {%- if datavault4dbt.is_something(prejoined_columns) -%} + {%- set prejoined_columns = datavault4dbt.process_prejoined_columns(prejoined_columns) -%} + {%- endif -%} {{- adapter.dispatch('stage', 'datavault4dbt')(include_source_columns=include_source_columns, ldts=ldts, diff --git a/macros/internal/helpers/stage_processing_macros.sql b/macros/staging/stage_processing_macros.sql similarity index 50% rename from macros/internal/helpers/stage_processing_macros.sql rename to macros/staging/stage_processing_macros.sql index 6ecf2676..3f77d594 100644 --- a/macros/internal/helpers/stage_processing_macros.sql +++ b/macros/staging/stage_processing_macros.sql @@ -54,24 +54,27 @@ {# Do nothing. No source column required. #} {%- elif value is mapping and value.is_hashdiff -%} {%- do extracted_input_columns.append(value['columns']) -%} - {%- elif value is mapping and 'this_column_name' in value.keys() -%} - {%- if datavault4dbt.is_list(value['this_column_name'])-%} - {%- for column in value['this_column_name'] -%} - {%- do extracted_input_columns.append(column) -%} - {%- endfor -%} - {%- else -%} - {%- do extracted_input_columns.append(value['this_column_name']) -%} - {%- endif -%} {%- else -%} {%- do extracted_input_columns.append(value) -%} {%- endif -%} {%- endfor -%} - - {%- do return(extracted_input_columns) -%} + + {%- elif datavault4dbt.is_list(columns_dict) -%} + {% for prejoin in columns_dict %} + {%- if datavault4dbt.is_list(prejoin['this_column_name'])-%} + {%- for column in prejoin['this_column_name'] -%} + {%- do extracted_input_columns.append(column) -%} + {%- endfor -%} + {%- else -%} + {%- do extracted_input_columns.append(prejoin['this_column_name']) -%} + {%- endif -%} + {% endfor %} {%- else -%} {%- do return([]) -%} {%- endif -%} + {%- do return(extracted_input_columns) -%} + {%- endmacro -%} @@ -123,4 +126,89 @@ {%- endif %} {%- endfor -%} -{%- endmacro -%} \ No newline at end of file +{%- endmacro -%} + + +{%- macro process_prejoined_columns(prejoined_columns=none) -%} + {# Check if the old syntax is used for prejoined columns + If so parse it to new list syntax #} + + {% if datavault4dbt.is_list(prejoined_columns) %} + {% do return(prejoined_columns) %} + {% else %} + {% set output = [] %} + + {% for key, value in prejoined_columns.items() %} + {% set ref_model = value.get('ref_model') %} + {% set src_name = value.get('src_name') %} + {% set src_table = value.get('src_table') %} + {%- if 'operator' not in value.keys() -%} + {%- do value.update({'operator': 'AND'}) -%} + {%- set operator = 'AND' -%} + {%- else -%} + {%- set operator = value.get('operator') -%} + {%- endif -%} + + {% set match_criteria = ( + ref_model and output | selectattr('ref_model', 'equalto', ref_model) or + src_name and output | selectattr('src_name', 'equalto', src_name) | selectattr('src_table', 'equalto', src_table) + ) | selectattr('this_column_name', 'equalto', value.this_column_name) + | selectattr('ref_column_name', 'equalto', value.ref_column_name) + | selectattr('operator', 'equalto', value.operator) + | list | first %} + + {% if match_criteria %} + {% do match_criteria['extract_columns'].append(value.bk) %} + {% do match_criteria['aliases'].append(key) %} + {% else %} + {% set new_item = { + 'extract_columns': [value.bk], + 'aliases': [key], + 'this_column_name': value.this_column_name, + 'ref_column_name': value.ref_column_name, + 'operator': operator + } %} + + {% if ref_model %} + {% do new_item.update({'ref_model': ref_model}) %} + {% elif src_name and src_table %} + {% do new_item.update({'src_name': src_name, 'src_table': src_table}) %} + {% endif %} + + {% do output.append(new_item) %} + {% endif %} + {% endfor %} + {% endif %} + + {%- do return(output) -%} + +{%- endmacro -%} + + +{%- macro extract_prejoin_column_names(prejoined_columns=none) -%} + + {%- set extracted_column_names = [] -%} + + {% if not datavault4dbt.is_something(prejoined_columns) %} + {%- do return(extracted_column_names) -%} + {% endif %} + + {% for prejoin in prejoined_columns %} + {% if datavault4dbt.is_list(prejoin['aliases']) %} + {% for alias in prejoin['aliases'] %} + {%- do extracted_column_names.append(alias) -%} + {% endfor %} + {% elif datavault4dbt.is_something(prejoin['aliases']) %} + {%- do extracted_column_names.append(prejoin['aliases']) -%} + {% elif datavault4dbt.is_list(prejoin['extract_columns']) %} + {% for column in prejoin['extract_columns'] %} + {%- do extracted_column_names.append(column) -%} + {% endfor %} + {% else %} + {%- do extracted_column_names.append(prejoin['extract_columns']) -%} + {% endif %} + {%- endfor -%} + + {%- do return(extracted_column_names) -%} + +{%- endmacro -%} diff --git a/macros/staging/staging.yml b/macros/staging/staging.yml new file mode 100644 index 00000000..86988615 --- /dev/null +++ b/macros/staging/staging.yml @@ -0,0 +1,23 @@ +version: 2 + +macros: + - name: process_prejoined_columns + description: > + A macro to process prejoined columns. If a list of dictioniaries(new syntax) is provided it will do nothing and return the list. + If a dictionary of dictionaries if provided(old syntax) it will be transformed to the new syntax. + When multiple columns are to be extracted from the same prejoin-target and with the same conditions(columns and operator) they will be combined into one item. + arguments: + - name: prejoined_columns + type: list or dictionary + description: The value of the prejoined_columns as defined in the yaml_metadata of the stage-model. + + - name: extract_prejoin_column_names + description: > + A macro to extract the names of the prejoined columns of each staging-model. + Takes a list of prejoins and will add the aliases of the prejoins to the return-list. + If no aliases are present it will return the names of the extracted columns. + Returns an empty list if the passed parameter is empty. + arguments: + - name: prejoined_columns + type: list + description: The prejoined_columns as process by the process_prejoined_columns-macro \ No newline at end of file diff --git a/macros/staging/synapse/stage.sql b/macros/staging/synapse/stage.sql index 84edee88..77c51daa 100644 --- a/macros/staging/synapse/stage.sql +++ b/macros/staging/synapse/stage.sql @@ -89,7 +89,7 @@ {# Getting the column names for all additional columns #} {%- set derived_column_names = datavault4dbt.extract_column_names(derived_columns) -%} {%- set hashed_column_names = datavault4dbt.extract_column_names(hashed_columns) -%} -{%- set prejoined_column_names = datavault4dbt.extract_column_names(prejoined_columns) -%} +{%- set prejoined_column_names = datavault4dbt.extract_prejoin_column_names(prejoined_columns) -%} {%- set missing_column_names = datavault4dbt.extract_column_names(missing_columns) -%} {%- set exclude_column_names = derived_column_names + hashed_column_names + prejoined_column_names + missing_column_names + ldts_rsrc_input_column_names %} {%- set source_and_derived_column_names = (all_source_columns + derived_column_names) | unique | list -%} @@ -179,6 +179,8 @@ {# Setting the ldts default datatype #} {% set ldts_default_dtype = datavault4dbt.timestamp_default_dtype() %} +{{ datavault4dbt.prepend_generated_by() }} + WITH {# Selecting everything that we need from the source relation. #} @@ -255,28 +257,53 @@ missing_columns AS ( ), {%- endif -%} -{%- if datavault4dbt.is_something(prejoined_columns) %} -{%- set final_columns_to_select = (final_columns_to_select + derived_input_columns) | unique | list -%} +{%- if datavault4dbt.is_something(prejoined_columns) %} {# Prejoining Business Keys of other source objects for Link purposes #} prejoined_columns AS ( +{%- set final_columns_to_select = (final_columns_to_select + derived_input_columns) | unique | list -%} + SELECT {% if final_columns_to_select | length > 0 -%} {{ datavault4dbt.print_list(datavault4dbt.prefix(columns=datavault4dbt.escape_column_names(final_columns_to_select), prefix_str='lcte').split(',')) }} - {% endif %} - {%- for col, vals in prejoined_columns.items() -%} - ,pj_{{loop.index}}.{{ vals['bk'] }} AS {{ col }} - {% endfor -%} + {%- endif -%} + + {# Iterate over each prejoin, doing logic checks and generating the select-statements #} + {%- for prejoin in prejoined_columns -%} + {%- set prejoin_alias = 'pj_' + loop.index|string -%} + + {# If extract_columns and/or aliases are passed as string convert them to a list so they can be used as iterators later #} + {%- if not datavault4dbt.is_list(prejoin['extract_columns'])-%} + {%- do prejoin.update({'extract_columns': [prejoin['extract_columns']]}) -%} + {%- endif -%} + {%- if not datavault4dbt.is_list(prejoin['aliases']) and datavault4dbt.is_something(prejoin['aliases']) -%} + {%- do prejoin.update({'aliases': [prejoin['aliases']]}) -%} + {%- endif -%} + + {# If passed, make sure there are as many aliases as there are extract_columns, ensuring a 1:1 mapping #} + {%- if datavault4dbt.is_something(prejoin['aliases']) -%} + {%- if not prejoin['aliases']|length == prejoin['extract_columns']|length -%} + {%- do exceptions.raise_compiler_error("Prejoin aliases must have the same length as extract_columns. Got " + ~ prejoin['extract_columns']|length ~ " extract_column(s) and " ~ prejoin['aliases']|length ~ " aliase(s).") -%} + {%- endif -%} + {%- endif -%} + + {# Generate the columns for the SELECT-statement #} + {%- for column in prejoin['extract_columns'] %} + ,{{ prejoin_alias }}.{{ column }} {% if datavault4dbt.is_something(prejoin['aliases']) -%} AS {{ prejoin['aliases'][loop.index0] }} {% endif -%} + {%- endfor -%} + {%- endfor %} FROM {{ last_cte }} lcte - {% for col, vals in prejoined_columns.items() %} + {# Iterate over prejoins and generate the join-statements #} + {%- for prejoin in prejoined_columns -%} - {%- if 'src_name' in vals.keys() or 'src_table' in vals.keys() -%} - {%- set relation = source(vals['src_name']|string, vals['src_table']) -%} - {%- elif 'ref_model' in vals.keys() -%} - {%- set relation = ref(vals['ref_model']) -%} + {%- if 'ref_model' in prejoin.keys() -%} + {% set relation = ref(prejoin['ref_model']) -%} + {%- elif 'src_name' in prejoin.keys() and 'src_table' in prejoin.keys() -%} + {%- set relation = source(prejoin['src_name']|string, prejoin['src_table']) -%} {%- else -%} {%- set error_message -%} Prejoin error: Invalid target entity definition. Allowed are: @@ -297,28 +324,25 @@ prejoined_columns AS ( ref_column_name: join_columns_in_ref_model Got: - {{ col }}: {{ vals }} + {{ prejoin }} {%- endset -%} {%- do exceptions.raise_compiler_error(error_message) -%} {%- endif -%} -{# This sets a default value for the operator that connects multiple joining conditions. Only when it is not set by user. #} - {%- if 'operator' not in vals.keys() -%} + {%- if 'operator' not in prejoin.keys() -%} {%- set operator = 'AND' -%} {%- else -%} - {%- set operator = vals['operator'] -%} + {%- set operator = prejoin['operator'] -%} {%- endif -%} - - {%- set prejoin_alias = 'pj_' + loop.index|string -%} - - left join {{ relation }} as {{ prejoin_alias }} - on {{ datavault4dbt.multikey(columns=vals['this_column_name'], prefix=['lcte', prejoin_alias], condition='=', operator=operator, right_columns=vals['ref_column_name']) }} - - {% endfor %} + {%- set prejoin_alias = 'pj_' + loop.index|string %} + + left join {{ relation }} as {{ prejoin_alias }} + on {{ datavault4dbt.multikey(columns=prejoin['this_column_name'], prefix=['lcte', prejoin_alias], condition='=', operator=operator, right_columns=prejoin['ref_column_name']) }} + {%- endfor -%} {% set last_cte = "prejoined_columns" -%} - {%- set final_columns_to_select = final_columns_to_select + prejoined_column_names %} + {%- set final_columns_to_select = final_columns_to_select + prejoined_column_names -%} ), {%- endif -%} @@ -445,65 +469,61 @@ unknown_values AS ( SELECT - {{ datavault4dbt.string_to_timestamp(timestamp_format, beginning_of_all_times) }} as {{ load_datetime_col_name }}, - '{{ unknown_value_rsrc }}' as {{ record_source_col_name }} + {{ datavault4dbt.string_to_timestamp(timestamp_format, beginning_of_all_times) }} as {{ load_datetime_col_name }} + ,'{{ unknown_value_rsrc }}' as {{ record_source_col_name }} - {%- if columns_without_excluded_columns is defined and columns_without_excluded_columns| length > 0 -%}, + {%- if columns_without_excluded_columns is defined and columns_without_excluded_columns| length > 0 -%} {# Generating Ghost Records for all source columns, except the ldts, rsrc & edwSequence column #} {%- for column in columns_without_excluded_columns %} - {{ datavault4dbt.ghost_record_per_datatype(column_name=column.name, datatype=column.dtype, ghost_record_type='unknown', col_size=column.char_size) }} - {%- if not loop.last %},{% endif -%} + ,{{ datavault4dbt.ghost_record_per_datatype(column_name=column.name, datatype=column.dtype, ghost_record_type='unknown', col_size=column.char_size) }} {%- endfor -%} {%- endif -%} - {%- if datavault4dbt.is_something(missing_columns) -%}, + {%- if datavault4dbt.is_something(missing_columns) -%} {# Additionally generating ghost record for missing columns #} {%- for col, dtype in missing_columns.items() %} - {{ datavault4dbt.ghost_record_per_datatype(column_name=col, datatype=dtype, ghost_record_type='unknown') }} - {%- if not loop.last %},{% endif -%} + ,{{ datavault4dbt.ghost_record_per_datatype(column_name=col, datatype=dtype, ghost_record_type='unknown') }} {%- endfor -%} {%- endif -%} - {%- if datavault4dbt.is_something(prejoined_columns) -%}, - {# Additionally generating ghost records for the prejoined attributes#} - {% for col, vals in prejoined_columns.items() %} + {%- if datavault4dbt.is_something(prejoined_columns) -%} + {# Additionally generating ghost records for the prejoined attributes #} + {%- for prejoin in prejoined_columns -%} - {%- if 'src_name' in vals.keys() or 'src_table' in vals.keys() -%} - {%- set relation = source(vals['src_name']|string, vals['src_table']) -%} - {%- elif 'ref_model' in vals.keys() -%} - {%- set relation = ref(vals['ref_model']) -%} + {%- if 'ref_model' in prejoin.keys() -%} + {%- set relation = ref(prejoin['ref_model']) -%} + {%- elif 'src_name' in prejoin.keys() and 'src_table' in prejoin.keys() -%} + {%- set relation = source(prejoin['src_name']|string, prejoin['src_table']) -%} {%- endif -%} {%- set pj_relation_columns = adapter.get_columns_in_relation( relation ) -%} - {{ log('pj_relation_columns: ' ~ pj_relation_columns, false ) }} - - {% for column in pj_relation_columns -%} - - {% if column.name|lower == vals['bk']|lower -%} - {{ log('column found? yes, for column :' ~ column.name , false) }} - {{ datavault4dbt.ghost_record_per_datatype(column_name=column.name, datatype=column.dtype, ghost_record_type='unknown', alias=col) }} + {{ log('pj_relation_columns for '~relation~': ' ~ pj_relation_columns, false ) }} + + {%- for column in pj_relation_columns -%} + {%- if column.name|lower in prejoin['extract_columns']|map('lower') -%} + {%- set prejoin_extract_cols_lower = prejoin['extract_columns']|map('lower')|list -%} + {%- set prejoin_col_index = prejoin_extract_cols_lower.index(column.name|lower) -%} + {{ log('column found? yes, for column: ' ~ column.name , false) }} + ,{{ datavault4dbt.ghost_record_per_datatype(column_name=column.name, datatype=column.dtype, ghost_record_type='unknown', alias=prejoin['aliases'][prejoin_col_index]) }} {%- endif -%} {%- endfor -%} - {%- if not loop.last %},{% endif %} {% endfor -%} {%- endif %} - {%- if datavault4dbt.is_something(derived_columns) -%}, - {# Additionally generating Ghost Records for Derived Columns #} + {%- if datavault4dbt.is_something(derived_columns) -%} + {# Additionally generating Ghost Records for Derived Columns #} {%- for column_name, properties in derived_columns_with_datatypes_DICT.items() %} - {{ datavault4dbt.ghost_record_per_datatype(column_name=column_name, datatype=properties.datatype, col_size=properties.col_size, ghost_record_type='unknown') }} - {%- if not loop.last %},{% endif -%} + ,{{ datavault4dbt.ghost_record_per_datatype(column_name=column_name, datatype=properties.datatype, col_size=properties.col_size, ghost_record_type='unknown') }} {%- endfor -%} {%- endif -%} - {%- if datavault4dbt.is_something(processed_hash_columns) -%}, + {%- if datavault4dbt.is_something(processed_hash_columns) -%} {%- for hash_column in processed_hash_columns %} - CAST({{ datavault4dbt.as_constant(column_str=unknown_key) }} as {{ hash_dtype }}) as {{ hash_column }} - {%- if not loop.last %},{% endif %} + ,CAST({{ datavault4dbt.as_constant(column_str=unknown_key) }} as {{ hash_dtype }}) as {{ hash_column }} {%- endfor -%} {%- endif -%} @@ -515,62 +535,61 @@ error_values AS ( SELECT - {{ datavault4dbt.string_to_timestamp(timestamp_format , end_of_all_times) }} as {{ load_datetime_col_name }}, - '{{ error_value_rsrc }}' as {{ record_source_col_name }} + {{ datavault4dbt.string_to_timestamp(timestamp_format , end_of_all_times) }} as {{ load_datetime_col_name }} + ,'{{ error_value_rsrc }}' as {{ record_source_col_name }} - {%- if columns_without_excluded_columns is defined and columns_without_excluded_columns| length > 0 -%}, + {%- if columns_without_excluded_columns is defined and columns_without_excluded_columns| length > 0 -%} {# Generating Ghost Records for Source Columns #} {%- for column in columns_without_excluded_columns %} - {{ datavault4dbt.ghost_record_per_datatype(column_name=column.name, datatype=column.dtype, ghost_record_type='error', col_size=column.char_size) }} - {%- if not loop.last %},{% endif -%} + ,{{ datavault4dbt.ghost_record_per_datatype(column_name=column.name, datatype=column.dtype, ghost_record_type='error', col_size=column.char_size) }} {%- endfor -%} {%- endif -%} - {%- if datavault4dbt.is_something(missing_columns) -%}, + {%- if datavault4dbt.is_something(missing_columns) -%} {# Additionally generating ghost record for Missing columns #} {%- for col, dtype in missing_columns.items() %} - {{ datavault4dbt.ghost_record_per_datatype(column_name=col, datatype=dtype, ghost_record_type='error') }} - {%- if not loop.last %},{% endif -%} + ,{{ datavault4dbt.ghost_record_per_datatype(column_name=col, datatype=dtype, ghost_record_type='error') }} {%- endfor -%} {%- endif -%} - {%- if datavault4dbt.is_something(prejoined_columns) -%}, - {# Additionally generating ghost records for the prejoined attributes #} - {%- for col, vals in prejoined_columns.items() %} + {%- if datavault4dbt.is_something(prejoined_columns) -%} + {# Additionally generating ghost records for the prejoined attributes#} + {% for prejoin in prejoined_columns %} - {%- if 'src_name' in vals.keys() or 'src_table' in vals.keys() -%} - {%- set relation = source(vals['src_name']|string, vals['src_table']) -%} - {%- elif 'ref_model' in vals.keys() -%} - {%- set relation = ref(vals['ref_model']) -%} + {%- if 'ref_model' in prejoin.keys() -%} + {% set relation = ref(prejoin['ref_model']) -%} + {%- elif 'src_name' in prejoin.keys() and 'src_table' in prejoin.keys() -%} + {%- set relation = source(prejoin['src_name']|string, prejoin['src_table']) -%} {%- endif -%} {%- set pj_relation_columns = adapter.get_columns_in_relation( relation ) -%} + {{- log('pj_relation_columns for '~relation~': ' ~ pj_relation_columns, false ) -}} {% for column in pj_relation_columns -%} - {% if column.name|lower == vals['bk']|lower -%} - {{ datavault4dbt.ghost_record_per_datatype(column_name=column.name, datatype=column.dtype, ghost_record_type='error', alias=col) -}} + {%- if column.name|lower in prejoin['extract_columns']|map('lower') -%} + {%- set prejoin_extract_cols_lower = prejoin['extract_columns']|map('lower')|list -%} + {%- set prejoin_col_index = prejoin_extract_cols_lower.index(column.name|lower) -%} + {{ log('column found? yes, for column: ' ~ column.name , false) }} + ,{{ datavault4dbt.ghost_record_per_datatype(column_name=column.name, datatype=column.dtype, ghost_record_type='error', alias=prejoin['aliases'][prejoin_col_index]) }} {%- endif -%} + {%- endfor -%} - {%- if not loop.last -%},{%- endif %} {% endfor -%} + {%- endif %} - {%- endif -%} - - {%- if datavault4dbt.is_something(derived_columns) %}, + {%- if datavault4dbt.is_something(derived_columns) %} {# Additionally generating Ghost Records for Derived Columns #} {%- for column_name, properties in derived_columns_with_datatypes_DICT.items() %} - {{ datavault4dbt.ghost_record_per_datatype(column_name=column_name, datatype=properties.datatype, col_size=properties.col_size, ghost_record_type='error') }} - {%- if not loop.last %},{% endif %} + ,{{ datavault4dbt.ghost_record_per_datatype(column_name=column_name, datatype=properties.datatype, col_size=properties.col_size, ghost_record_type='error') }} {%- endfor -%} {%- endif -%} - {%- if datavault4dbt.is_something(processed_hash_columns) -%}, + {%- if datavault4dbt.is_something(processed_hash_columns) -%} {%- for hash_column in processed_hash_columns %} - CAST({{ datavault4dbt.as_constant(column_str=error_key) }} as {{ hash_dtype }}) as {{ hash_column }} - {%- if not loop.last %},{% endif %} + ,CAST({{ datavault4dbt.as_constant(column_str=error_key) }} as {{ hash_dtype }}) as {{ hash_column }} {%- endfor -%} {%- endif -%}